From dd29257dd83ef526f7efb8efb95701312b774ce4 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Fri, 6 Mar 2026 20:45:32 +0000 Subject: [PATCH 01/62] chore: snapshot mia-sw01 and was-sw01 running configs Captured via ansible `show running-config` before applying mia-sw01 outbound validator redirect changes. Co-Authored-By: Claude Opus 4.6 --- docs/switch-configs/mia-sw01-running.cfg | 2588 ++++++++++++++++++++++ docs/switch-configs/was-sw01-running.cfg | 2404 ++++++++++++++++++++ 2 files changed, 4992 insertions(+) create mode 100644 docs/switch-configs/mia-sw01-running.cfg create mode 100644 docs/switch-configs/was-sw01-running.cfg diff --git a/docs/switch-configs/mia-sw01-running.cfg b/docs/switch-configs/mia-sw01-running.cfg new file mode 100644 index 00000000..a6debdd0 --- /dev/null +++ b/docs/switch-configs/mia-sw01-running.cfg @@ -0,0 +1,2588 @@ +! Command: show running-config +! device: laconic-mia-sw01 (DCS-7280CR3A-32S, EOS-4.34.0F) +! +! boot system flash:/EOS-4.34.0F-x86_64.swi +! +no aaa root +! +username install privilege 15 role network-admin secret sha512 $6$JD3NnDhb.UETkCQM$a.I0rQskHVQlgFlU/wjYi56EA/jDqYsuPjJs9I.eo.MDC2OH9KzxFK1KjkNm11Pvy0k0Qbo5ZNuKit64.t2t.1 +username install ssh-key ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFfXOvzoBAtK2IcqbjTWH8PWwqBNhXkRCZ2xR40EQ7at rix@bitwise3.localhost +! +hardware counter feature gre tunnel interface out +hardware counter feature gre tunnel interface in +! +daemon doublezero-agent + exec /usr/local/bin/doublezero-agent -pubkey WTngs9GF7PDyWuVPkRg3KRj8E8sJCJ2zLc75DAn9DHT -controller 35.85.74.71:7000 -verbose + no shutdown +! +daemon doublezero-telemetry + exec /usr/local/bin/doublezero-telemetry --local-device-pubkey WTngs9GF7PDyWuVPkRg3KRj8E8sJCJ2zLc75DAn9DHT --env mainnet --keypair /mnt/flash/metrics-publisher.json + no shutdown +! +daemon eapilocal + exec /usr/bin/EosSdkRpcAgent --daemon-name eapilocal + no shutdown +! +hardware access-list update default-result permit +! +no service interface inactive port-id allocation disabled +! +service routing protocols model multi-agent +! +logging buffered 128000 +no logging console +logging facility local7 +! +management api eos-sdk-rpc + transport grpc eapilocal + localhost loopback + service all + no disabled +! +hostname laconic-mia-sw01 +ip name-server vrf default 1.1.1.1 +ip name-server vrf default 4.4.4.4 +ip name-server vrf default 8.8.8.8 +ip name-server vrf default 9.9.9.9 +! +spanning-tree mode mstp +! +system l1 + unsupported speed action error + unsupported error-correction action error +! +vrf instance management +! +vrf instance vrf1 +! +management api netconf + transport ssh netconf +! +monitor telemetry influx + destination influxdb DZ_INFLUX + url https://us-east-1-1.aws.cloud2.influxdata.com + database name doublezero-mainnet-beta + retention policy autogen + vrf management + username DZ password 7 0317530A535E730C0A5F5D31052831063E101270063430324B370E4718046F5045021C133B25160E59444751590908113E0D7A50032C15020C295A254F5E061320060A5D58071A7E1D7B12300B202E235B5960784D624E07351127765343617B5976286B0C0C50590B3D150D5F5D01350503 + tag global dzd_pubkey WTngs9GF7PDyWuVPkRg3KRj8E8sJCJ2zLc75DAn9DHT + tag global location mia +! +interface Ethernet1/1 + mtu 2048 + speed forced 10000full + no switchport + ip address 209.42.167.133/31 + ip verify unicast source reachable-via rx allow-default + ip access-group SEC-DIA-IN in +! +interface Ethernet1/2 +! +interface Ethernet1/3 +! +interface Ethernet1/4 +! +interface Ethernet3/1 +! +interface Ethernet4/1 + mtu 2048 + no switchport + ip address 172.16.1.189/31 + pim ipv4 sparse-mode + isis enable 1 + isis circuit-type level-2 + isis hello-interval 1 + isis metric 25550 + isis hello padding + isis network point-to-point +! +interface Ethernet5/1 +! +interface Ethernet6/1 +! +interface Ethernet7/1 +! +interface Ethernet8/1 + mtu 2048 + no switchport + ip address 172.16.1.192/31 + pim ipv4 sparse-mode + isis enable 1 + isis circuit-type level-2 + isis hello-interval 1 + isis metric 27608 + isis hello padding + isis network point-to-point +! +interface Ethernet9/1 +! +interface Ethernet10/1 +! +interface Ethernet11/1 +! +interface Ethernet12/1 +! +interface Ethernet13/1 +! +interface Ethernet14/1 +! +interface Ethernet15/1 +! +interface Ethernet16/1 +! +interface Ethernet17/1 +! +interface Ethernet18/1 +! +interface Ethernet19/1 +! +interface Ethernet20/1 +! +interface Ethernet21/1 +! +interface Ethernet22/1 +! +interface Ethernet23/1 +! +interface Ethernet24/1 +! +interface Ethernet25/1 +! +interface Ethernet26/1 +! +interface Ethernet27/1 +! +interface Ethernet28/1 +! +interface Ethernet29/1 +! +interface Ethernet30/1 +! +interface Ethernet31/1 +! +interface Ethernet32/1 +! +interface Loopback100 + ip address 209.42.167.136/32 + isis enable 1 +! +interface Loopback255 + ip address 172.16.1.110/32 + node-segment ipv4 index 77 + isis enable 1 +! +interface Loopback256 + ip address 172.16.1.157/32 + isis enable 1 +! +interface Loopback1000 + description RP Address + ip address 10.0.0.0/32 +! +interface Management1 + vrf management + ip address 209.42.167.130/30 +! +interface Tunnel500 + description USER-UCAST-500 + mtu 9216 + vrf vrf1 + ip address 169.254.7.6/31 + ip access-group SEC-USER-500-IN in + tunnel mode gre + tunnel source 209.42.167.133 + tunnel destination 186.233.184.235 + tunnel path-mtu-discovery + tunnel ttl 32 +! +interface Tunnel501 + description USER-MCAST-501 + mtu 9216 + ip address 169.254.2.200/31 + ip access-group SEC-USER-SUB-MCAST-IN in + multicast ipv4 boundary SEC-USER-MCAST-BOUNDARY-501-OUT out + pim ipv4 sparse-mode + pim ipv4 dr-priority 4294967295 + tunnel mode gre + tunnel source 209.42.167.133 + tunnel destination 186.233.185.50 + tunnel path-mtu-discovery + tunnel ttl 32 +! +interface Tunnel502 + description USER-UCAST-502 + mtu 9216 + vrf vrf1 + ip address 169.254.4.154/31 + ip access-group SEC-USER-502-IN in + tunnel mode gre + tunnel source 209.42.167.133 + tunnel destination 155.138.213.71 + tunnel path-mtu-discovery + tunnel ttl 32 +! +interface Tunnel503 + description USER-MCAST-503 + mtu 9216 + ip address 169.254.7.162/31 + ip access-group SEC-USER-PUB-MCAST-IN in + multicast ipv4 boundary SEC-USER-MCAST-BOUNDARY-503-OUT out + pim ipv4 sparse-mode + pim ipv4 border-router + pim ipv4 dr-priority 4294967295 + tunnel mode gre + tunnel source 209.42.167.136 + tunnel destination 155.138.213.71 + tunnel path-mtu-discovery + tunnel ttl 32 +! +interface Tunnel504 +! +interface Tunnel505 + description USER-UCAST-505 + mtu 9216 + vrf vrf1 + ip address 169.254.4.68/31 + ip access-group SEC-USER-505-IN in + tunnel mode gre + tunnel source 209.42.167.136 + tunnel destination 186.233.185.50 + tunnel path-mtu-discovery + tunnel ttl 32 +! +interface Tunnel506 +! +hardware tcam + profile tunnel-interface-acl + feature acl port ip + sequence 45 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control ttl + action count drop mirror + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan eth ipv4 forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + ! + feature acl port ip egress mpls-tunnelled-match + sequence 95 + ! + feature acl port ipv6 + sequence 25 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-ops-3b l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + packet ipv6 forwarding routed multicast + packet ipv6 ipv6 forwarding routed decap + ! + feature acl port ipv6 egress + sequence 105 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + ! + feature acl port mac + sequence 55 + key size limit 160 + key field dst-mac ether-type src-mac + action count drop mirror + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + packet ipv6 forwarding routed decap + packet ipv6 forwarding routed multicast + packet ipv6 ipv6 forwarding routed decap + packet mpls forwarding bridged decap + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + packet non-ip forwarding bridged + ! + feature acl subintf ip + sequence 40 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control ttl + action count drop + packet ipv4 forwarding routed + ! + feature acl subintf ipv6 + sequence 15 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop + packet ipv6 forwarding routed + ! + feature acl tunnel interface ip + key size limit 160 + key field inner-dst-ip inner-ip-frag inner-ip-protocol inner-l4-dst-port inner-l4-src-port inner-src-ip inner-tcp-control inner-tos inner-ttl l4-ops-7b + action count drop + packet ipv4 non-vxlan forwarding routed decap + ! + feature acl vlan ip + sequence 35 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control ttl + action count drop + packet ipv4 forwarding routed + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan eth ipv4 forwarding routed decap + ! + feature acl vlan ipv6 + sequence 10 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop + packet ipv6 forwarding routed + packet ipv6 ipv6 forwarding routed decap + ! + feature acl vlan ipv6 egress + sequence 20 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + ! + feature counter lfib + sequence 85 + ! + feature forwarding-destination mpls + sequence 100 + ! + feature mirror ip + sequence 80 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control + action count mirror set-policer + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 non-vxlan forwarding routed decap + ! + feature mpls + sequence 5 + key size limit 160 + action drop redirect set-ecn + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + ! + feature mpls pop ingress + sequence 90 + ! + feature pbr ip + sequence 60 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control + action count redirect + packet ipv4 forwarding routed + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + ! + feature pbr ipv6 + sequence 30 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count redirect + packet ipv6 forwarding routed + ! + feature pbr mpls + sequence 65 + key size limit 160 + key field mpls-inner-ip-tos + action count drop redirect + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + ! + feature qos ip + sequence 75 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control + action set-dscp set-policer set-tc + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + ! + feature qos ipv6 + sequence 70 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low + action set-dscp set-policer set-tc + packet ipv6 forwarding routed + ! + feature tunnel vxlan + sequence 50 + key size limit 160 + packet ipv4 vxlan eth ipv4 forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + system profile tunnel-interface-acl +! +ip access-list MAIN-CONTROL-PLANE-ACL + counters per-entry + 10 permit icmp any any + 20 permit ip any any tracked + 30 permit udp any any eq bfd ttl eq 255 + 40 permit udp any any eq bfd-echo ttl eq 254 + 50 permit udp any any eq multihop-bfd micro-bfd sbfd + 60 permit udp any eq sbfd any eq sbfd-initiator + 70 permit ospf any any + 80 permit tcp any any eq ssh telnet www snmp bgp https msdp ldp netconf-ssh gnmi + 90 permit udp any any eq bootps bootpc ntp snmp ptp-event ptp-general rip ldp + 100 permit tcp any any eq mlag ttl eq 255 + 110 permit udp any any eq mlag ttl eq 255 + 120 permit vrrp any any + 130 permit ahp any any + 140 permit pim any any + 150 permit igmp any any + 160 permit tcp any any range 5900 5910 + 170 permit tcp any any range 50000 50100 + 180 permit udp any any range 51000 51100 + 190 permit tcp any any eq 3333 + 200 permit tcp any any eq nat ttl eq 255 + 210 permit tcp any eq bgp any + 220 permit rsvp any any + 230 permit tcp any any eq 9340 + 240 permit tcp any any eq 9559 + 250 permit udp any any eq 8503 + 260 permit udp any any eq lsp-ping + 270 permit udp any eq lsp-ping any + 280 remark Permit TWAMP (UDP 862) + 290 permit udp any any eq 862 +! +ip access-list SEC-DIA-IN + counters per-entry + 10 deny ip 0.0.0.0/8 any + 20 deny ip 10.0.0.0/8 any + 30 deny ip 100.64.0.0/10 any + 40 deny ip 127.0.0.0/8 any + 50 deny ip 169.254.0.0/16 any + 60 deny ip 172.16.0.0/12 any + 70 deny ip 192.0.0.0/24 any + 80 deny ip 192.0.2.0/24 any + 90 deny ip 192.168.0.0/16 any + 100 deny ip 198.18.0.0/15 any + 110 deny ip 198.51.100.0/24 any + 120 deny ip 203.0.113.0/24 any + 130 deny ip 224.0.0.0/3 any + 140 permit ip any any +! +ip access-list SEC-USER-500-IN + counters per-entry + 10 permit icmp host 169.254.7.7 any + 20 permit udp host 169.254.7.7 any range 33434 33534 + 30 permit icmp host 169.254.7.7 any time-exceeded + 40 permit tcp host 169.254.7.7 host 169.254.7.6 eq bgp + 50 permit ip host 186.233.184.235 any + 60 deny ip any any +! +ip access-list SEC-USER-502-IN + counters per-entry + 10 permit icmp host 169.254.4.155 any + 20 permit udp host 169.254.4.155 any range 33434 33534 + 30 permit icmp host 169.254.4.155 any time-exceeded + 40 permit tcp host 169.254.4.155 host 169.254.4.154 eq bgp + 50 permit ip host 155.138.213.71 any + 60 deny ip any any +! +ip access-list SEC-USER-505-IN + counters per-entry + 10 permit icmp host 169.254.4.69 any + 20 permit udp host 169.254.4.69 any range 33434 33534 + 30 permit icmp host 169.254.4.69 any time-exceeded + 40 permit tcp host 169.254.4.69 host 169.254.4.68 eq bgp + 50 permit ip host 186.233.185.50 any + 60 deny ip any any +! +ip access-list SEC-USER-PUB-MCAST-IN + counters per-entry + 10 permit icmp any any + 20 permit tcp any any eq bgp + 30 permit ip any host 224.0.0.13 + 40 permit ip any 233.84.178.0/24 + 50 deny ip any any +! +ip access-list SEC-USER-SUB-MCAST-IN + counters per-entry + 10 permit icmp any any + 20 permit tcp any any eq bgp + 30 permit ip any host 224.0.0.13 + 40 deny ip any any +! +ip access-list standard SEC-USER-MCAST-BOUNDARY-501-OUT + counters per-entry + 10 permit host 233.84.178.1 + 20 deny 224.0.0.0/4 +! +ip access-list standard SEC-USER-MCAST-BOUNDARY-503-OUT + counters per-entry + 10 permit host 233.84.178.1 + 20 deny 224.0.0.0/4 +! +ip routing +ip routing vrf management +ip routing vrf vrf1 +! +ip prefix-list PL-USER-500 seq 10 permit 186.233.184.235/32 +ip prefix-list PL-USER-501 seq 10 deny 0.0.0.0/0 le 32 +ip prefix-list PL-USER-502 seq 10 permit 155.138.213.71/32 +ip prefix-list PL-USER-503 seq 10 permit 148.51.120.125/32 +ip prefix-list PL-USER-505 seq 10 permit 186.233.185.50/32 +ip community-list COMM-ALL_MCAST_USERS permit 21682:1300 +ip community-list COMM-ALL_USERS permit 21682:1200 +ip community-list COMM-MIA_USERS permit 21682:10028 +! +system control-plane + ip access-group MAIN-CONTROL-PLANE-ACL in +! +ip route 0.0.0.0/0 209.42.167.132 +ip route 0.0.0.0/0 209.43.167.132 +ip route vrf management 0.0.0.0/0 209.42.167.129 +! +mpls ip +! +mpls icmp ttl-exceeded tunneling +mpls icmp ip source-interface Loopback255 +! +ntp server 0.pool.ntp.org +ntp server 1.pool.ntp.org +ntp server 2.pool.ntp.org +! +policy-map type copp copp-system-policy + class copp-system-ipunicast + shape kbps 500000 +! +route-map RM-USER-500-IN permit 10 + match ip address prefix-list PL-USER-500 + match as-path length = 1 + set community 21682:1200 21682:10028 +! +route-map RM-USER-500-OUT deny 10 + match community COMM-MIA_USERS +! +route-map RM-USER-500-OUT permit 20 + match community COMM-ALL_USERS +! +route-map RM-USER-501-IN permit 10 + match ip address prefix-list PL-USER-501 + match as-path length = 1 + set community 21682:1300 21682:10028 +! +route-map RM-USER-501-OUT deny 10 + match community COMM-MIA_USERS +! +route-map RM-USER-501-OUT permit 20 + match community COMM-ALL_MCAST_USERS +! +route-map RM-USER-502-IN permit 10 + match ip address prefix-list PL-USER-502 + match as-path length = 1 + set community 21682:1200 21682:10028 +! +route-map RM-USER-502-OUT deny 10 + match community COMM-MIA_USERS +! +route-map RM-USER-502-OUT permit 20 + match community COMM-ALL_USERS +! +route-map RM-USER-503-IN permit 10 + match ip address prefix-list PL-USER-503 + match as-path length = 1 + set community 21682:1300 21682:10028 +! +route-map RM-USER-503-OUT deny 10 + match community COMM-MIA_USERS +! +route-map RM-USER-503-OUT permit 20 + match community COMM-ALL_MCAST_USERS +! +route-map RM-USER-505-IN permit 10 + match ip address prefix-list PL-USER-505 + match as-path length = 1 + set community 21682:1200 21682:10028 +! +route-map RM-USER-505-OUT deny 10 + match community COMM-MIA_USERS +! +route-map RM-USER-505-OUT permit 20 + match community COMM-ALL_USERS +! +router bgp 65342 + router-id 172.16.1.110 + timers bgp 1 3 + distance bgp 20 200 200 + neighbor 169.254.2.201 remote-as 65000 + neighbor 169.254.2.201 local-as 209321 no-prepend replace-as + neighbor 169.254.2.201 passive + neighbor 169.254.2.201 description USER-501 + neighbor 169.254.2.201 route-map RM-USER-501-IN in + neighbor 169.254.2.201 route-map RM-USER-501-OUT out + neighbor 169.254.2.201 maximum-routes 1 + neighbor 169.254.2.201 maximum-accepted-routes 1 + neighbor 169.254.7.163 remote-as 65000 + neighbor 169.254.7.163 local-as 209321 no-prepend replace-as + neighbor 169.254.7.163 passive + neighbor 169.254.7.163 description USER-503 + neighbor 169.254.7.163 route-map RM-USER-503-IN in + neighbor 169.254.7.163 route-map RM-USER-503-OUT out + neighbor 169.254.7.163 maximum-routes 1 + neighbor 169.254.7.163 maximum-accepted-routes 1 + neighbor 172.16.0.1 remote-as 65342 + neighbor 172.16.0.1 next-hop-self + neighbor 172.16.0.1 update-source Loopback255 + neighbor 172.16.0.1 description dz-ny7-sw01-vpnv4 + neighbor 172.16.0.1 timers 3 9 + neighbor 172.16.0.1 send-community + neighbor 172.16.0.2 remote-as 65342 + neighbor 172.16.0.2 next-hop-self + neighbor 172.16.0.2 update-source Loopback256 + neighbor 172.16.0.2 description dz-ny7-sw01-ipv4 + neighbor 172.16.0.2 timers 3 9 + neighbor 172.16.0.2 send-community + neighbor 172.16.0.3 remote-as 65342 + neighbor 172.16.0.3 next-hop-self + neighbor 172.16.0.3 update-source Loopback255 + neighbor 172.16.0.3 description sea001-dz001-vpnv4 + neighbor 172.16.0.3 timers 3 9 + neighbor 172.16.0.3 send-community + neighbor 172.16.0.4 remote-as 65342 + neighbor 172.16.0.4 next-hop-self + neighbor 172.16.0.4 update-source Loopback256 + neighbor 172.16.0.4 description sea001-dz001-ipv4 + neighbor 172.16.0.4 timers 3 9 + neighbor 172.16.0.4 send-community + neighbor 172.16.0.5 remote-as 65342 + neighbor 172.16.0.5 next-hop-self + neighbor 172.16.0.5 update-source Loopback255 + neighbor 172.16.0.5 description dz-ld4-sw01-vpnv4 + neighbor 172.16.0.5 timers 3 9 + neighbor 172.16.0.5 send-community + neighbor 172.16.0.6 remote-as 65342 + neighbor 172.16.0.6 next-hop-self + neighbor 172.16.0.6 update-source Loopback256 + neighbor 172.16.0.6 description dz-ld4-sw01-ipv4 + neighbor 172.16.0.6 timers 3 9 + neighbor 172.16.0.6 send-community + neighbor 172.16.0.7 remote-as 65342 + neighbor 172.16.0.7 next-hop-self + neighbor 172.16.0.7 update-source Loopback255 + neighbor 172.16.0.7 description sea001-dz002-vpnv4 + neighbor 172.16.0.7 timers 3 9 + neighbor 172.16.0.7 send-community + neighbor 172.16.0.8 remote-as 65342 + neighbor 172.16.0.8 next-hop-self + neighbor 172.16.0.8 update-source Loopback256 + neighbor 172.16.0.8 description sea001-dz002-ipv4 + neighbor 172.16.0.8 timers 3 9 + neighbor 172.16.0.8 send-community + neighbor 172.16.0.9 remote-as 65342 + neighbor 172.16.0.9 next-hop-self + neighbor 172.16.0.9 update-source Loopback255 + neighbor 172.16.0.9 description tyo001-dz002-vpnv4 + neighbor 172.16.0.9 timers 3 9 + neighbor 172.16.0.9 send-community + neighbor 172.16.0.14 remote-as 65342 + neighbor 172.16.0.14 next-hop-self + neighbor 172.16.0.14 update-source Loopback256 + neighbor 172.16.0.14 description tyo001-dz002-ipv4 + neighbor 172.16.0.14 timers 3 9 + neighbor 172.16.0.14 send-community + neighbor 172.16.0.15 remote-as 65342 + neighbor 172.16.0.15 next-hop-self + neighbor 172.16.0.15 update-source Loopback255 + neighbor 172.16.0.15 description dz-ny5-sw01-vpnv4 + neighbor 172.16.0.15 timers 3 9 + neighbor 172.16.0.15 send-community + neighbor 172.16.0.16 remote-as 65342 + neighbor 172.16.0.16 next-hop-self + neighbor 172.16.0.16 update-source Loopback255 + neighbor 172.16.0.16 description fra001-dz002-vpnv4 + neighbor 172.16.0.16 timers 3 9 + neighbor 172.16.0.16 send-community + neighbor 172.16.0.17 remote-as 65342 + neighbor 172.16.0.17 next-hop-self + neighbor 172.16.0.17 update-source Loopback256 + neighbor 172.16.0.17 description fra001-dz002-ipv4 + neighbor 172.16.0.17 timers 3 9 + neighbor 172.16.0.17 send-community + neighbor 172.16.0.20 remote-as 65342 + neighbor 172.16.0.20 next-hop-self + neighbor 172.16.0.20 update-source Loopback256 + neighbor 172.16.0.20 description dz-ny5-sw01-ipv4 + neighbor 172.16.0.20 timers 3 9 + neighbor 172.16.0.20 send-community + neighbor 172.16.0.21 remote-as 65342 + neighbor 172.16.0.21 next-hop-self + neighbor 172.16.0.21 update-source Loopback255 + neighbor 172.16.0.21 description mrs001-dz001-vpnv4 + neighbor 172.16.0.21 timers 3 9 + neighbor 172.16.0.21 send-community + neighbor 172.16.0.22 remote-as 65342 + neighbor 172.16.0.22 next-hop-self + neighbor 172.16.0.22 update-source Loopback256 + neighbor 172.16.0.22 description mrs001-dz001-ipv4 + neighbor 172.16.0.22 timers 3 9 + neighbor 172.16.0.22 send-community + neighbor 172.16.0.23 remote-as 65342 + neighbor 172.16.0.23 next-hop-self + neighbor 172.16.0.23 update-source Loopback255 + neighbor 172.16.0.23 description fr2-dzx-001-vpnv4 + neighbor 172.16.0.23 timers 3 9 + neighbor 172.16.0.23 send-community + neighbor 172.16.0.24 remote-as 65342 + neighbor 172.16.0.24 next-hop-self + neighbor 172.16.0.24 update-source Loopback255 + neighbor 172.16.0.24 description dub001-dz001-vpnv4 + neighbor 172.16.0.24 timers 3 9 + neighbor 172.16.0.24 send-community + neighbor 172.16.0.25 remote-as 65342 + neighbor 172.16.0.25 next-hop-self + neighbor 172.16.0.25 update-source Loopback256 + neighbor 172.16.0.25 description dub001-dz001-ipv4 + neighbor 172.16.0.25 timers 3 9 + neighbor 172.16.0.25 send-community + neighbor 172.16.0.26 remote-as 65342 + neighbor 172.16.0.26 next-hop-self + neighbor 172.16.0.26 update-source Loopback255 + neighbor 172.16.0.26 description dub001-dz002-vpnv4 + neighbor 172.16.0.26 timers 3 9 + neighbor 172.16.0.26 send-community + neighbor 172.16.0.27 remote-as 65342 + neighbor 172.16.0.27 next-hop-self + neighbor 172.16.0.27 update-source Loopback255 + neighbor 172.16.0.27 description mrs001-dz002-vpnv4 + neighbor 172.16.0.27 timers 3 9 + neighbor 172.16.0.27 send-community + neighbor 172.16.0.28 remote-as 65342 + neighbor 172.16.0.28 next-hop-self + neighbor 172.16.0.28 update-source Loopback256 + neighbor 172.16.0.28 description mrs001-dz002-ipv4 + neighbor 172.16.0.28 timers 3 9 + neighbor 172.16.0.28 send-community + neighbor 172.16.0.29 remote-as 65342 + neighbor 172.16.0.29 next-hop-self + neighbor 172.16.0.29 update-source Loopback256 + neighbor 172.16.0.29 description fr2-dzx-001-ipv4 + neighbor 172.16.0.29 timers 3 9 + neighbor 172.16.0.29 send-community + neighbor 172.16.0.30 remote-as 65342 + neighbor 172.16.0.30 next-hop-self + neighbor 172.16.0.30 update-source Loopback255 + neighbor 172.16.0.30 description lon001-dz002-vpnv4 + neighbor 172.16.0.30 timers 3 9 + neighbor 172.16.0.30 send-community + neighbor 172.16.0.31 remote-as 65342 + neighbor 172.16.0.31 next-hop-self + neighbor 172.16.0.31 update-source Loopback256 + neighbor 172.16.0.31 description lon001-dz002-ipv4 + neighbor 172.16.0.31 timers 3 9 + neighbor 172.16.0.31 send-community + neighbor 172.16.0.32 remote-as 65342 + neighbor 172.16.0.32 next-hop-self + neighbor 172.16.0.32 update-source Loopback255 + neighbor 172.16.0.32 description was001-dz001-vpnv4 + neighbor 172.16.0.32 timers 3 9 + neighbor 172.16.0.32 send-community + neighbor 172.16.0.33 remote-as 65342 + neighbor 172.16.0.33 next-hop-self + neighbor 172.16.0.33 update-source Loopback255 + neighbor 172.16.0.33 description dz-mrs-01-vpnv4 + neighbor 172.16.0.33 timers 3 9 + neighbor 172.16.0.33 send-community + neighbor 172.16.0.34 remote-as 65342 + neighbor 172.16.0.34 next-hop-self + neighbor 172.16.0.34 update-source Loopback255 + neighbor 172.16.0.34 description was001-dz002-vpnv4 + neighbor 172.16.0.34 timers 3 9 + neighbor 172.16.0.34 send-community + neighbor 172.16.0.35 remote-as 65342 + neighbor 172.16.0.35 next-hop-self + neighbor 172.16.0.35 update-source Loopback256 + neighbor 172.16.0.35 description was001-dz002-ipv4 + neighbor 172.16.0.35 timers 3 9 + neighbor 172.16.0.35 send-community + neighbor 172.16.0.36 remote-as 65342 + neighbor 172.16.0.36 next-hop-self + neighbor 172.16.0.36 update-source Loopback255 + neighbor 172.16.0.36 description nyc001-dz001-vpnv4 + neighbor 172.16.0.36 timers 3 9 + neighbor 172.16.0.36 send-community + neighbor 172.16.0.37 remote-as 65342 + neighbor 172.16.0.37 next-hop-self + neighbor 172.16.0.37 update-source Loopback256 + neighbor 172.16.0.37 description nyc001-dz001-ipv4 + neighbor 172.16.0.37 timers 3 9 + neighbor 172.16.0.37 send-community + neighbor 172.16.0.38 remote-as 65342 + neighbor 172.16.0.38 next-hop-self + neighbor 172.16.0.38 update-source Loopback255 + neighbor 172.16.0.38 description nyc001-dz002-vpnv4 + neighbor 172.16.0.38 timers 3 9 + neighbor 172.16.0.38 send-community + neighbor 172.16.0.39 remote-as 65342 + neighbor 172.16.0.39 next-hop-self + neighbor 172.16.0.39 update-source Loopback256 + neighbor 172.16.0.39 description dz-mrs-01-ipv4 + neighbor 172.16.0.39 timers 3 9 + neighbor 172.16.0.39 send-community + neighbor 172.16.0.42 remote-as 65342 + neighbor 172.16.0.42 next-hop-self + neighbor 172.16.0.42 update-source Loopback255 + neighbor 172.16.0.42 description tyo001-dz001-vpnv4 + neighbor 172.16.0.42 timers 3 9 + neighbor 172.16.0.42 send-community + neighbor 172.16.0.43 remote-as 65342 + neighbor 172.16.0.43 next-hop-self + neighbor 172.16.0.43 update-source Loopback256 + neighbor 172.16.0.43 description tyo001-dz001-ipv4 + neighbor 172.16.0.43 timers 3 9 + neighbor 172.16.0.43 send-community + neighbor 172.16.0.46 remote-as 65342 + neighbor 172.16.0.46 next-hop-self + neighbor 172.16.0.46 update-source Loopback255 + neighbor 172.16.0.46 description dz-mad-01-vpnv4 + neighbor 172.16.0.46 timers 3 9 + neighbor 172.16.0.46 send-community + neighbor 172.16.0.47 remote-as 65342 + neighbor 172.16.0.47 next-hop-self + neighbor 172.16.0.47 update-source Loopback255 + neighbor 172.16.0.47 description sjc001-dz001-vpnv4 + neighbor 172.16.0.47 timers 3 9 + neighbor 172.16.0.47 send-community + neighbor 172.16.0.48 remote-as 65342 + neighbor 172.16.0.48 next-hop-self + neighbor 172.16.0.48 update-source Loopback256 + neighbor 172.16.0.48 description sjc001-dz001-ipv4 + neighbor 172.16.0.48 timers 3 9 + neighbor 172.16.0.48 send-community + neighbor 172.16.0.49 remote-as 65342 + neighbor 172.16.0.49 next-hop-self + neighbor 172.16.0.49 update-source Loopback255 + neighbor 172.16.0.49 description sjc001-dz002-vpnv4 + neighbor 172.16.0.49 timers 3 9 + neighbor 172.16.0.49 send-community + neighbor 172.16.0.50 remote-as 65342 + neighbor 172.16.0.50 next-hop-self + neighbor 172.16.0.50 update-source Loopback256 + neighbor 172.16.0.50 description dz-mad-01-ipv4 + neighbor 172.16.0.50 timers 3 9 + neighbor 172.16.0.50 send-community + neighbor 172.16.0.51 remote-as 65342 + neighbor 172.16.0.51 next-hop-self + neighbor 172.16.0.51 update-source Loopback255 + neighbor 172.16.0.51 description dz-chi-sw01-vpnv4 + neighbor 172.16.0.51 timers 3 9 + neighbor 172.16.0.51 send-community + neighbor 172.16.0.56 remote-as 65342 + neighbor 172.16.0.56 next-hop-self + neighbor 172.16.0.56 update-source Loopback255 + neighbor 172.16.0.56 description nyc002-dz002-vpnv4 + neighbor 172.16.0.56 timers 3 9 + neighbor 172.16.0.56 send-community + neighbor 172.16.0.57 remote-as 65342 + neighbor 172.16.0.57 next-hop-self + neighbor 172.16.0.57 update-source Loopback256 + neighbor 172.16.0.57 description nyc002-dz002-ipv4 + neighbor 172.16.0.57 timers 3 9 + neighbor 172.16.0.57 send-community + neighbor 172.16.0.62 remote-as 65342 + neighbor 172.16.0.62 next-hop-self + neighbor 172.16.0.62 update-source Loopback255 + neighbor 172.16.0.62 description lax001-dz002-vpnv4 + neighbor 172.16.0.62 timers 3 9 + neighbor 172.16.0.62 send-community + neighbor 172.16.0.63 remote-as 65342 + neighbor 172.16.0.63 next-hop-self + neighbor 172.16.0.63 update-source Loopback256 + neighbor 172.16.0.63 description lax001-dz002-ipv4 + neighbor 172.16.0.63 timers 3 9 + neighbor 172.16.0.63 send-community + neighbor 172.16.0.68 remote-as 65342 + neighbor 172.16.0.68 next-hop-self + neighbor 172.16.0.68 update-source Loopback255 + neighbor 172.16.0.68 description sin001-dz002-vpnv4 + neighbor 172.16.0.68 timers 3 9 + neighbor 172.16.0.68 send-community + neighbor 172.16.0.69 remote-as 65342 + neighbor 172.16.0.69 next-hop-self + neighbor 172.16.0.69 update-source Loopback256 + neighbor 172.16.0.69 description sin001-dz002-ipv4 + neighbor 172.16.0.69 timers 3 9 + neighbor 172.16.0.69 send-community + neighbor 172.16.0.70 remote-as 65342 + neighbor 172.16.0.70 next-hop-self + neighbor 172.16.0.70 update-source Loopback255 + neighbor 172.16.0.70 description tyo002-dz002-vpnv4 + neighbor 172.16.0.70 timers 3 9 + neighbor 172.16.0.70 send-community + neighbor 172.16.0.71 remote-as 65342 + neighbor 172.16.0.71 next-hop-self + neighbor 172.16.0.71 update-source Loopback256 + neighbor 172.16.0.71 description tyo002-dz002-ipv4 + neighbor 172.16.0.71 timers 3 9 + neighbor 172.16.0.71 send-community + neighbor 172.16.0.72 remote-as 65342 + neighbor 172.16.0.72 next-hop-self + neighbor 172.16.0.72 update-source Loopback256 + neighbor 172.16.0.72 description dz-chi-sw01-ipv4 + neighbor 172.16.0.72 timers 3 9 + neighbor 172.16.0.72 send-community + neighbor 172.16.0.73 remote-as 65342 + neighbor 172.16.0.73 next-hop-self + neighbor 172.16.0.73 update-source Loopback255 + neighbor 172.16.0.73 description hkg001-dz001-vpnv4 + neighbor 172.16.0.73 timers 3 9 + neighbor 172.16.0.73 send-community + neighbor 172.16.0.74 remote-as 65342 + neighbor 172.16.0.74 next-hop-self + neighbor 172.16.0.74 update-source Loopback256 + neighbor 172.16.0.74 description hkg001-dz001-ipv4 + neighbor 172.16.0.74 timers 3 9 + neighbor 172.16.0.74 send-community + neighbor 172.16.0.75 remote-as 65342 + neighbor 172.16.0.75 next-hop-self + neighbor 172.16.0.75 update-source Loopback255 + neighbor 172.16.0.75 description hkg001-dz002-vpnv4 + neighbor 172.16.0.75 timers 3 9 + neighbor 172.16.0.75 send-community + neighbor 172.16.0.76 remote-as 65342 + neighbor 172.16.0.76 next-hop-self + neighbor 172.16.0.76 update-source Loopback255 + neighbor 172.16.0.76 description chi001-dz001-vpnv4 + neighbor 172.16.0.76 timers 3 9 + neighbor 172.16.0.76 send-community + neighbor 172.16.0.77 remote-as 65342 + neighbor 172.16.0.77 next-hop-self + neighbor 172.16.0.77 update-source Loopback255 + neighbor 172.16.0.77 description chi001-dz002-vpnv4 + neighbor 172.16.0.77 timers 3 9 + neighbor 172.16.0.77 send-community + neighbor 172.16.0.78 remote-as 65342 + neighbor 172.16.0.78 next-hop-self + neighbor 172.16.0.78 update-source Loopback255 + neighbor 172.16.0.78 description dz-lax-sw01-vpnv4 + neighbor 172.16.0.78 timers 3 9 + neighbor 172.16.0.78 send-community + neighbor 172.16.0.79 remote-as 65342 + neighbor 172.16.0.79 next-hop-self + neighbor 172.16.0.79 update-source Loopback255 + neighbor 172.16.0.79 description bom001-dz001-vpnv4 + neighbor 172.16.0.79 timers 3 9 + neighbor 172.16.0.79 send-community + neighbor 172.16.0.80 remote-as 65342 + neighbor 172.16.0.80 next-hop-self + neighbor 172.16.0.80 update-source Loopback256 + neighbor 172.16.0.80 description bom001-dz001-ipv4 + neighbor 172.16.0.80 timers 3 9 + neighbor 172.16.0.80 send-community + neighbor 172.16.0.81 remote-as 65342 + neighbor 172.16.0.81 next-hop-self + neighbor 172.16.0.81 update-source Loopback256 + neighbor 172.16.0.81 description dz-lax-sw01-ipv4 + neighbor 172.16.0.81 timers 3 9 + neighbor 172.16.0.81 send-community + neighbor 172.16.0.82 remote-as 65342 + neighbor 172.16.0.82 next-hop-self + neighbor 172.16.0.82 update-source Loopback255 + neighbor 172.16.0.82 description bom001-dz002-vpnv4 + neighbor 172.16.0.82 timers 3 9 + neighbor 172.16.0.82 send-community + neighbor 172.16.0.83 remote-as 65342 + neighbor 172.16.0.83 next-hop-self + neighbor 172.16.0.83 update-source Loopback256 + neighbor 172.16.0.83 description bom001-dz002-ipv4 + neighbor 172.16.0.83 timers 3 9 + neighbor 172.16.0.83 send-community + neighbor 172.16.0.86 remote-as 65342 + neighbor 172.16.0.86 next-hop-self + neighbor 172.16.0.86 update-source Loopback255 + neighbor 172.16.0.86 description dz-ams-01-vpnv4 + neighbor 172.16.0.86 timers 3 9 + neighbor 172.16.0.86 send-community + neighbor 172.16.0.87 remote-as 65342 + neighbor 172.16.0.87 next-hop-self + neighbor 172.16.0.87 update-source Loopback255 + neighbor 172.16.0.87 description dfw001-dz001-vpnv4 + neighbor 172.16.0.87 timers 3 9 + neighbor 172.16.0.87 send-community + neighbor 172.16.0.88 remote-as 65342 + neighbor 172.16.0.88 next-hop-self + neighbor 172.16.0.88 update-source Loopback256 + neighbor 172.16.0.88 description dfw001-dz001-ipv4 + neighbor 172.16.0.88 timers 3 9 + neighbor 172.16.0.88 send-community + neighbor 172.16.0.89 remote-as 65342 + neighbor 172.16.0.89 next-hop-self + neighbor 172.16.0.89 update-source Loopback255 + neighbor 172.16.0.89 description dz-fra-01-vpnv4 + neighbor 172.16.0.89 timers 3 9 + neighbor 172.16.0.89 send-community + neighbor 172.16.0.92 remote-as 65342 + neighbor 172.16.0.92 next-hop-self + neighbor 172.16.0.92 update-source Loopback255 + neighbor 172.16.0.92 description dfw001-dz002-vpnv4 + neighbor 172.16.0.92 timers 3 9 + neighbor 172.16.0.92 send-community + neighbor 172.16.0.93 remote-as 65342 + neighbor 172.16.0.93 next-hop-self + neighbor 172.16.0.93 update-source Loopback256 + neighbor 172.16.0.93 description dfw001-dz002-ipv4 + neighbor 172.16.0.93 timers 3 9 + neighbor 172.16.0.93 send-community + neighbor 172.16.0.94 remote-as 65342 + neighbor 172.16.0.94 next-hop-self + neighbor 172.16.0.94 update-source Loopback255 + neighbor 172.16.0.94 description dz-sxb-01-vpnv4 + neighbor 172.16.0.94 timers 3 9 + neighbor 172.16.0.94 send-community + neighbor 172.16.0.95 remote-as 65342 + neighbor 172.16.0.95 next-hop-self + neighbor 172.16.0.95 update-source Loopback255 + neighbor 172.16.0.95 description sao001-dz001-vpnv4 + neighbor 172.16.0.95 timers 3 9 + neighbor 172.16.0.95 send-community + neighbor 172.16.0.96 remote-as 65342 + neighbor 172.16.0.96 next-hop-self + neighbor 172.16.0.96 update-source Loopback256 + neighbor 172.16.0.96 description sao001-dz001-ipv4 + neighbor 172.16.0.96 timers 3 9 + neighbor 172.16.0.96 send-community + neighbor 172.16.0.97 remote-as 65342 + neighbor 172.16.0.97 next-hop-self + neighbor 172.16.0.97 update-source Loopback255 + neighbor 172.16.0.97 description dz-waw-01-vpnv4 + neighbor 172.16.0.97 timers 3 9 + neighbor 172.16.0.97 send-community + neighbor 172.16.0.100 remote-as 65342 + neighbor 172.16.0.100 next-hop-self + neighbor 172.16.0.100 update-source Loopback255 + neighbor 172.16.0.100 description sao001-dz002-vpnv4 + neighbor 172.16.0.100 timers 3 9 + neighbor 172.16.0.100 send-community + neighbor 172.16.0.101 remote-as 65342 + neighbor 172.16.0.101 next-hop-self + neighbor 172.16.0.101 update-source Loopback256 + neighbor 172.16.0.101 description sao001-dz002-ipv4 + neighbor 172.16.0.101 timers 3 9 + neighbor 172.16.0.101 send-community + neighbor 172.16.0.106 remote-as 65342 + neighbor 172.16.0.106 next-hop-self + neighbor 172.16.0.106 update-source Loopback256 + neighbor 172.16.0.106 description dz-ams-01-ipv4 + neighbor 172.16.0.106 timers 3 9 + neighbor 172.16.0.106 send-community + neighbor 172.16.0.107 remote-as 65342 + neighbor 172.16.0.107 next-hop-self + neighbor 172.16.0.107 update-source Loopback255 + neighbor 172.16.0.107 description ams001-dz002-vpnv4 + neighbor 172.16.0.107 timers 3 9 + neighbor 172.16.0.107 send-community + neighbor 172.16.0.108 remote-as 65342 + neighbor 172.16.0.108 next-hop-self + neighbor 172.16.0.108 update-source Loopback256 + neighbor 172.16.0.108 description ams001-dz002-ipv4 + neighbor 172.16.0.108 timers 3 9 + neighbor 172.16.0.108 send-community + neighbor 172.16.0.109 remote-as 65342 + neighbor 172.16.0.109 next-hop-self + neighbor 172.16.0.109 update-source Loopback256 + neighbor 172.16.0.109 description dub001-dz002-ipv4 + neighbor 172.16.0.109 timers 3 9 + neighbor 172.16.0.109 send-community + neighbor 172.16.0.110 remote-as 65342 + neighbor 172.16.0.110 next-hop-self + neighbor 172.16.0.110 update-source Loopback256 + neighbor 172.16.0.110 description was001-dz001-ipv4 + neighbor 172.16.0.110 timers 3 9 + neighbor 172.16.0.110 send-community + neighbor 172.16.0.111 remote-as 65342 + neighbor 172.16.0.111 next-hop-self + neighbor 172.16.0.111 update-source Loopback256 + neighbor 172.16.0.111 description nyc001-dz002-ipv4 + neighbor 172.16.0.111 timers 3 9 + neighbor 172.16.0.111 send-community + neighbor 172.16.0.112 remote-as 65342 + neighbor 172.16.0.112 next-hop-self + neighbor 172.16.0.112 update-source Loopback256 + neighbor 172.16.0.112 description sjc001-dz002-ipv4 + neighbor 172.16.0.112 timers 3 9 + neighbor 172.16.0.112 send-community + neighbor 172.16.0.113 remote-as 65342 + neighbor 172.16.0.113 next-hop-self + neighbor 172.16.0.113 update-source Loopback256 + neighbor 172.16.0.113 description hkg001-dz002-ipv4 + neighbor 172.16.0.113 timers 3 9 + neighbor 172.16.0.113 send-community + neighbor 172.16.0.114 remote-as 65342 + neighbor 172.16.0.114 next-hop-self + neighbor 172.16.0.114 update-source Loopback256 + neighbor 172.16.0.114 description chi001-dz001-ipv4 + neighbor 172.16.0.114 timers 3 9 + neighbor 172.16.0.114 send-community + neighbor 172.16.0.115 remote-as 65342 + neighbor 172.16.0.115 next-hop-self + neighbor 172.16.0.115 update-source Loopback256 + neighbor 172.16.0.115 description chi001-dz002-ipv4 + neighbor 172.16.0.115 timers 3 9 + neighbor 172.16.0.115 send-community + neighbor 172.16.0.140 remote-as 65342 + neighbor 172.16.0.140 next-hop-self + neighbor 172.16.0.140 update-source Loopback255 + neighbor 172.16.0.140 description dz-tor1-sw01-vpnv4 + neighbor 172.16.0.140 timers 3 9 + neighbor 172.16.0.140 send-community + neighbor 172.16.0.141 remote-as 65342 + neighbor 172.16.0.141 next-hop-self + neighbor 172.16.0.141 update-source Loopback256 + neighbor 172.16.0.141 description dz-tor1-sw01-ipv4 + neighbor 172.16.0.141 timers 3 9 + neighbor 172.16.0.141 send-community + neighbor 172.16.0.142 remote-as 65342 + neighbor 172.16.0.142 next-hop-self + neighbor 172.16.0.142 update-source Loopback255 + neighbor 172.16.0.142 description dz-mtl11-sw01-vpnv4 + neighbor 172.16.0.142 timers 3 9 + neighbor 172.16.0.142 send-community + neighbor 172.16.0.143 remote-as 65342 + neighbor 172.16.0.143 next-hop-self + neighbor 172.16.0.143 update-source Loopback256 + neighbor 172.16.0.143 description dz-mtl11-sw01-ipv4 + neighbor 172.16.0.143 timers 3 9 + neighbor 172.16.0.143 send-community + neighbor 172.16.0.148 remote-as 65342 + neighbor 172.16.0.148 next-hop-self + neighbor 172.16.0.148 update-source Loopback255 + neighbor 172.16.0.148 description dz-ny7-sw02-vpnv4 + neighbor 172.16.0.148 timers 3 9 + neighbor 172.16.0.148 send-community + neighbor 172.16.0.149 remote-as 65342 + neighbor 172.16.0.149 next-hop-self + neighbor 172.16.0.149 update-source Loopback256 + neighbor 172.16.0.149 description dz-ny7-sw02-ipv4 + neighbor 172.16.0.149 timers 3 9 + neighbor 172.16.0.149 send-community + neighbor 172.16.0.152 remote-as 65342 + neighbor 172.16.0.152 next-hop-self + neighbor 172.16.0.152 update-source Loopback255 + neighbor 172.16.0.152 description dz-dc10-sw01-vpnv4 + neighbor 172.16.0.152 timers 3 9 + neighbor 172.16.0.152 send-community + neighbor 172.16.0.153 remote-as 65342 + neighbor 172.16.0.153 next-hop-self + neighbor 172.16.0.153 update-source Loopback256 + neighbor 172.16.0.153 description dz-dc10-sw01-ipv4 + neighbor 172.16.0.153 timers 3 9 + neighbor 172.16.0.153 send-community + neighbor 172.16.0.156 remote-as 65342 + neighbor 172.16.0.156 next-hop-self + neighbor 172.16.0.156 update-source Loopback255 + neighbor 172.16.0.156 description dz-ch2-sw01-vpnv4 + neighbor 172.16.0.156 timers 3 9 + neighbor 172.16.0.156 send-community + neighbor 172.16.0.157 remote-as 65342 + neighbor 172.16.0.157 next-hop-self + neighbor 172.16.0.157 update-source Loopback256 + neighbor 172.16.0.157 description dz-ch2-sw01-ipv4 + neighbor 172.16.0.157 timers 3 9 + neighbor 172.16.0.157 send-community + neighbor 172.16.0.158 remote-as 65342 + neighbor 172.16.0.158 next-hop-self + neighbor 172.16.0.158 update-source Loopback255 + neighbor 172.16.0.158 description dz-fr5-sw01-vpnv4 + neighbor 172.16.0.158 timers 3 9 + neighbor 172.16.0.158 send-community + neighbor 172.16.0.159 remote-as 65342 + neighbor 172.16.0.159 next-hop-self + neighbor 172.16.0.159 update-source Loopback256 + neighbor 172.16.0.159 description dz-fr5-sw01-ipv4 + neighbor 172.16.0.159 timers 3 9 + neighbor 172.16.0.159 send-community + neighbor 172.16.0.160 remote-as 65342 + neighbor 172.16.0.160 next-hop-self + neighbor 172.16.0.160 update-source Loopback255 + neighbor 172.16.0.160 description dz-sea10-sw01-vpnv4 + neighbor 172.16.0.160 timers 3 9 + neighbor 172.16.0.160 send-community + neighbor 172.16.0.161 remote-as 65342 + neighbor 172.16.0.161 next-hop-self + neighbor 172.16.0.161 update-source Loopback256 + neighbor 172.16.0.161 description dz-sea10-sw01-ipv4 + neighbor 172.16.0.161 timers 3 9 + neighbor 172.16.0.161 send-community + neighbor 172.16.0.162 remote-as 65342 + neighbor 172.16.0.162 next-hop-self + neighbor 172.16.0.162 update-source Loopback255 + neighbor 172.16.0.162 description dz-sg1-sw01-vpnv4 + neighbor 172.16.0.162 timers 3 9 + neighbor 172.16.0.162 send-community + neighbor 172.16.0.163 remote-as 65342 + neighbor 172.16.0.163 next-hop-self + neighbor 172.16.0.163 update-source Loopback256 + neighbor 172.16.0.163 description dz-sg1-sw01-ipv4 + neighbor 172.16.0.163 timers 3 9 + neighbor 172.16.0.163 send-community + neighbor 172.16.0.164 remote-as 65342 + neighbor 172.16.0.164 next-hop-self + neighbor 172.16.0.164 update-source Loopback255 + neighbor 172.16.0.164 description dz-ty9-sw01-vpnv4 + neighbor 172.16.0.164 timers 3 9 + neighbor 172.16.0.164 send-community + neighbor 172.16.0.165 remote-as 65342 + neighbor 172.16.0.165 next-hop-self + neighbor 172.16.0.165 update-source Loopback256 + neighbor 172.16.0.165 description dz-ty9-sw01-ipv4 + neighbor 172.16.0.165 timers 3 9 + neighbor 172.16.0.165 send-community + neighbor 172.16.0.238 remote-as 65342 + neighbor 172.16.0.238 next-hop-self + neighbor 172.16.0.238 update-source Loopback255 + neighbor 172.16.0.238 description dgt-dzd-sin-sg3-vpnv4 + neighbor 172.16.0.238 timers 3 9 + neighbor 172.16.0.238 send-community + neighbor 172.16.0.239 remote-as 65342 + neighbor 172.16.0.239 next-hop-self + neighbor 172.16.0.239 update-source Loopback256 + neighbor 172.16.0.239 description dgt-dzd-sin-sg3-ipv4 + neighbor 172.16.0.239 timers 3 9 + neighbor 172.16.0.239 send-community + neighbor 172.16.0.242 remote-as 65342 + neighbor 172.16.0.242 next-hop-self + neighbor 172.16.0.242 update-source Loopback256 + neighbor 172.16.0.242 description dz-fra-01-ipv4 + neighbor 172.16.0.242 timers 3 9 + neighbor 172.16.0.242 send-community + neighbor 172.16.0.243 remote-as 65342 + neighbor 172.16.0.243 next-hop-self + neighbor 172.16.0.243 update-source Loopback256 + neighbor 172.16.0.243 description dz-sxb-01-ipv4 + neighbor 172.16.0.243 timers 3 9 + neighbor 172.16.0.243 send-community + neighbor 172.16.0.244 remote-as 65342 + neighbor 172.16.0.244 next-hop-self + neighbor 172.16.0.244 update-source Loopback256 + neighbor 172.16.0.244 description dz-waw-01-ipv4 + neighbor 172.16.0.244 timers 3 9 + neighbor 172.16.0.244 send-community + neighbor 172.16.0.245 remote-as 65342 + neighbor 172.16.0.245 next-hop-self + neighbor 172.16.0.245 update-source Loopback255 + neighbor 172.16.0.245 description dz-slc-sw01-vpnv4 + neighbor 172.16.0.245 timers 3 9 + neighbor 172.16.0.245 send-community + neighbor 172.16.0.254 remote-as 65342 + neighbor 172.16.0.254 next-hop-self + neighbor 172.16.0.254 update-source Loopback256 + neighbor 172.16.0.254 description dz-slc-sw01-ipv4 + neighbor 172.16.0.254 timers 3 9 + neighbor 172.16.0.254 send-community + neighbor 172.16.0.255 remote-as 65342 + neighbor 172.16.0.255 next-hop-self + neighbor 172.16.0.255 update-source Loopback256 + neighbor 172.16.0.255 description cherlita-ipv4 + neighbor 172.16.0.255 timers 3 9 + neighbor 172.16.0.255 send-community + neighbor 172.16.1.6 remote-as 65342 + neighbor 172.16.1.6 next-hop-self + neighbor 172.16.1.6 update-source Loopback256 + neighbor 172.16.1.6 description frankry-ipv4 + neighbor 172.16.1.6 timers 3 9 + neighbor 172.16.1.6 send-community + neighbor 172.16.1.7 remote-as 65342 + neighbor 172.16.1.7 next-hop-self + neighbor 172.16.1.7 update-source Loopback255 + neighbor 172.16.1.7 description cherlita-vpnv4 + neighbor 172.16.1.7 timers 3 9 + neighbor 172.16.1.7 send-community + neighbor 172.16.1.10 remote-as 65342 + neighbor 172.16.1.10 next-hop-self + neighbor 172.16.1.10 update-source Loopback255 + neighbor 172.16.1.10 description frankry-vpnv4 + neighbor 172.16.1.10 timers 3 9 + neighbor 172.16.1.10 send-community + neighbor 172.16.1.11 remote-as 65342 + neighbor 172.16.1.11 next-hop-self + neighbor 172.16.1.11 update-source Loopback255 + neighbor 172.16.1.11 description dz-muc-01-vpnv4 + neighbor 172.16.1.11 timers 3 9 + neighbor 172.16.1.11 send-community + neighbor 172.16.1.14 remote-as 65342 + neighbor 172.16.1.14 next-hop-self + neighbor 172.16.1.14 update-source Loopback256 + neighbor 172.16.1.14 description dz-muc-01-ipv4 + neighbor 172.16.1.14 timers 3 9 + neighbor 172.16.1.14 send-community + neighbor 172.16.1.15 remote-as 65342 + neighbor 172.16.1.15 next-hop-self + neighbor 172.16.1.15 update-source Loopback255 + neighbor 172.16.1.15 description ce2-dzd-001-vpnv4 + neighbor 172.16.1.15 timers 3 9 + neighbor 172.16.1.15 send-community + neighbor 172.16.1.26 remote-as 65342 + neighbor 172.16.1.26 next-hop-self + neighbor 172.16.1.26 update-source Loopback256 + neighbor 172.16.1.26 description ce2-dzd-001-ipv4 + neighbor 172.16.1.26 timers 3 9 + neighbor 172.16.1.26 send-community + neighbor 172.16.1.27 remote-as 65342 + neighbor 172.16.1.27 next-hop-self + neighbor 172.16.1.27 update-source Loopback255 + neighbor 172.16.1.27 description dgt-dzd-nyc-ny2-vpnv4 + neighbor 172.16.1.27 timers 3 9 + neighbor 172.16.1.27 send-community + neighbor 172.16.1.36 remote-as 65342 + neighbor 172.16.1.36 next-hop-self + neighbor 172.16.1.36 update-source Loopback256 + neighbor 172.16.1.36 description dgt-dzd-nyc-ny2-ipv4 + neighbor 172.16.1.36 timers 3 9 + neighbor 172.16.1.36 send-community + neighbor 172.16.1.37 remote-as 65342 + neighbor 172.16.1.37 next-hop-self + neighbor 172.16.1.37 update-source Loopback256 + neighbor 172.16.1.37 description swerry-ipv4 + neighbor 172.16.1.37 timers 3 9 + neighbor 172.16.1.37 send-community + neighbor 172.16.1.40 remote-as 65342 + neighbor 172.16.1.40 next-hop-self + neighbor 172.16.1.40 update-source Loopback256 + neighbor 172.16.1.40 description cherydam-ipv4 + neighbor 172.16.1.40 timers 3 9 + neighbor 172.16.1.40 send-community + neighbor 172.16.1.41 remote-as 65342 + neighbor 172.16.1.41 next-hop-self + neighbor 172.16.1.41 update-source Loopback255 + neighbor 172.16.1.41 description swerry-vpnv4 + neighbor 172.16.1.41 timers 3 9 + neighbor 172.16.1.41 send-community + neighbor 172.16.1.44 remote-as 65342 + neighbor 172.16.1.44 next-hop-self + neighbor 172.16.1.44 update-source Loopback255 + neighbor 172.16.1.44 description cherydam-vpnv4 + neighbor 172.16.1.44 timers 3 9 + neighbor 172.16.1.44 send-community + neighbor 172.16.1.45 remote-as 65342 + neighbor 172.16.1.45 next-hop-self + neighbor 172.16.1.45 update-source Loopback255 + neighbor 172.16.1.45 description dgt-dzd-ash-dc3-vpnv4 + neighbor 172.16.1.45 timers 3 9 + neighbor 172.16.1.45 send-community + neighbor 172.16.1.52 remote-as 65342 + neighbor 172.16.1.52 next-hop-self + neighbor 172.16.1.52 update-source Loopback256 + neighbor 172.16.1.52 description dgt-dzd-ash-dc3-ipv4 + neighbor 172.16.1.52 timers 3 9 + neighbor 172.16.1.52 send-community + neighbor 172.16.1.53 remote-as 65342 + neighbor 172.16.1.53 next-hop-self + neighbor 172.16.1.53 update-source Loopback255 + neighbor 172.16.1.53 description bdc-dzd-001-vpnv4 + neighbor 172.16.1.53 timers 3 9 + neighbor 172.16.1.53 send-community + neighbor 172.16.1.58 remote-as 65342 + neighbor 172.16.1.58 next-hop-self + neighbor 172.16.1.58 update-source Loopback255 + neighbor 172.16.1.58 description dgt-dzd-dal-da3-vpnv4 + neighbor 172.16.1.58 timers 3 9 + neighbor 172.16.1.58 send-community + neighbor 172.16.1.59 remote-as 65342 + neighbor 172.16.1.59 next-hop-self + neighbor 172.16.1.59 update-source Loopback256 + neighbor 172.16.1.59 description dgt-dzd-dal-da3-ipv4 + neighbor 172.16.1.59 timers 3 9 + neighbor 172.16.1.59 send-community + neighbor 172.16.1.64 remote-as 65342 + neighbor 172.16.1.64 next-hop-self + neighbor 172.16.1.64 update-source Loopback256 + neighbor 172.16.1.64 description bdc-dzd-001-ipv4 + neighbor 172.16.1.64 timers 3 9 + neighbor 172.16.1.64 send-community + neighbor 172.16.1.65 remote-as 65342 + neighbor 172.16.1.65 next-hop-self + neighbor 172.16.1.65 update-source Loopback255 + neighbor 172.16.1.65 description dgt-dzd-lon-ths-vpnv4 + neighbor 172.16.1.65 timers 3 9 + neighbor 172.16.1.65 send-community + neighbor 172.16.1.70 remote-as 65342 + neighbor 172.16.1.70 next-hop-self + neighbor 172.16.1.70 update-source Loopback255 + neighbor 172.16.1.70 description dgt-dzd-lax-la2-vpnv4 + neighbor 172.16.1.70 timers 3 9 + neighbor 172.16.1.70 send-community + neighbor 172.16.1.71 remote-as 65342 + neighbor 172.16.1.71 next-hop-self + neighbor 172.16.1.71 update-source Loopback256 + neighbor 172.16.1.71 description dgt-dzd-lon-ths-ipv4 + neighbor 172.16.1.71 timers 3 9 + neighbor 172.16.1.71 send-community + neighbor 172.16.1.72 remote-as 65342 + neighbor 172.16.1.72 next-hop-self + neighbor 172.16.1.72 update-source Loopback255 + neighbor 172.16.1.72 description dgt-dzd-fra-fr5-vpnv4 + neighbor 172.16.1.72 timers 3 9 + neighbor 172.16.1.72 send-community + neighbor 172.16.1.73 remote-as 65342 + neighbor 172.16.1.73 next-hop-self + neighbor 172.16.1.73 update-source Loopback256 + neighbor 172.16.1.73 description dgt-dzd-lax-la2-ipv4 + neighbor 172.16.1.73 timers 3 9 + neighbor 172.16.1.73 send-community + neighbor 172.16.1.86 remote-as 65342 + neighbor 172.16.1.86 next-hop-self + neighbor 172.16.1.86 update-source Loopback256 + neighbor 172.16.1.86 description dgt-dzd-fra-fr5-ipv4 + neighbor 172.16.1.86 timers 3 9 + neighbor 172.16.1.86 send-community + neighbor 172.16.1.87 remote-as 65342 + neighbor 172.16.1.87 next-hop-self + neighbor 172.16.1.87 update-source Loopback255 + neighbor 172.16.1.87 description dgt-dzd-ams-ams1-vpnv4 + neighbor 172.16.1.87 timers 3 9 + neighbor 172.16.1.87 send-community + neighbor 172.16.1.90 remote-as 65342 + neighbor 172.16.1.90 next-hop-self + neighbor 172.16.1.90 update-source Loopback256 + neighbor 172.16.1.90 description dgt-dzd-ams-ams1-ipv4 + neighbor 172.16.1.90 timers 3 9 + neighbor 172.16.1.90 send-community + neighbor 172.16.1.91 remote-as 65342 + neighbor 172.16.1.91 next-hop-self + neighbor 172.16.1.91 update-source Loopback255 + neighbor 172.16.1.91 description dz100a-slc1-tsw-vpnv4 + neighbor 172.16.1.91 timers 3 9 + neighbor 172.16.1.91 send-community + neighbor 172.16.1.98 remote-as 65342 + neighbor 172.16.1.98 next-hop-self + neighbor 172.16.1.98 update-source Loopback255 + neighbor 172.16.1.98 description dgt-dzd-dub-db2-vpnv4 + neighbor 172.16.1.98 timers 3 9 + neighbor 172.16.1.98 send-community + neighbor 172.16.1.99 remote-as 65342 + neighbor 172.16.1.99 next-hop-self + neighbor 172.16.1.99 update-source Loopback255 + neighbor 172.16.1.99 description swdzd01-lon2-vpnv4 + neighbor 172.16.1.99 timers 3 9 + neighbor 172.16.1.99 send-community + neighbor 172.16.1.100 remote-as 65342 + neighbor 172.16.1.100 next-hop-self + neighbor 172.16.1.100 update-source Loopback256 + neighbor 172.16.1.100 description dz100a-slc1-tsw-ipv4 + neighbor 172.16.1.100 timers 3 9 + neighbor 172.16.1.100 send-community + neighbor 172.16.1.101 remote-as 65342 + neighbor 172.16.1.101 next-hop-self + neighbor 172.16.1.101 update-source Loopback256 + neighbor 172.16.1.101 description dgt-dzd-dub-db2-ipv4 + neighbor 172.16.1.101 timers 3 9 + neighbor 172.16.1.101 send-community + neighbor 172.16.1.111 remote-as 65342 + neighbor 172.16.1.111 next-hop-self + neighbor 172.16.1.111 update-source Loopback255 + neighbor 172.16.1.111 description dgt-dzd-mrs-mrs1-vpnv4 + neighbor 172.16.1.111 timers 3 9 + neighbor 172.16.1.111 send-community + neighbor 172.16.1.114 remote-as 65342 + neighbor 172.16.1.114 next-hop-self + neighbor 172.16.1.114 update-source Loopback256 + neighbor 172.16.1.114 description dgt-dzd-mrs-mrs1-ipv4 + neighbor 172.16.1.114 timers 3 9 + neighbor 172.16.1.114 send-community + neighbor 172.16.1.115 remote-as 65342 + neighbor 172.16.1.115 next-hop-self + neighbor 172.16.1.115 update-source Loopback256 + neighbor 172.16.1.115 description swdzd01-lon2-ipv4 + neighbor 172.16.1.115 timers 3 9 + neighbor 172.16.1.115 send-community + neighbor 172.16.1.130 remote-as 65342 + neighbor 172.16.1.130 next-hop-self + neighbor 172.16.1.130 update-source Loopback255 + neighbor 172.16.1.130 description lts-dzd-001-vpnv4 + neighbor 172.16.1.130 timers 3 9 + neighbor 172.16.1.130 send-community + neighbor 172.16.1.131 remote-as 65342 + neighbor 172.16.1.131 next-hop-self + neighbor 172.16.1.131 update-source Loopback256 + neighbor 172.16.1.131 description lts-dzd-001-ipv4 + neighbor 172.16.1.131 timers 3 9 + neighbor 172.16.1.131 send-community + neighbor 172.16.1.138 remote-as 65342 + neighbor 172.16.1.138 next-hop-self + neighbor 172.16.1.138 update-source Loopback256 + neighbor 172.16.1.138 description dz-slc2-sw01-ipv4 + neighbor 172.16.1.138 timers 3 9 + neighbor 172.16.1.138 send-community + neighbor 172.16.1.139 remote-as 65342 + neighbor 172.16.1.139 next-hop-self + neighbor 172.16.1.139 update-source Loopback255 + neighbor 172.16.1.139 description dz-slc2-sw01-vpnv4 + neighbor 172.16.1.139 timers 3 9 + neighbor 172.16.1.139 send-community + neighbor 172.16.1.144 remote-as 65342 + neighbor 172.16.1.144 next-hop-self + neighbor 172.16.1.144 update-source Loopback255 + neighbor 172.16.1.144 description au1c-dz01-vpnv4 + neighbor 172.16.1.144 timers 3 9 + neighbor 172.16.1.144 send-community + neighbor 172.16.1.145 remote-as 65342 + neighbor 172.16.1.145 next-hop-self + neighbor 172.16.1.145 update-source Loopback256 + neighbor 172.16.1.145 description au1c-dz01-ipv4 + neighbor 172.16.1.145 timers 3 9 + neighbor 172.16.1.145 send-community + neighbor 172.16.1.146 remote-as 65342 + neighbor 172.16.1.146 next-hop-self + neighbor 172.16.1.146 update-source Loopback255 + neighbor 172.16.1.146 description la2r-dz01-vpnv4 + neighbor 172.16.1.146 timers 3 9 + neighbor 172.16.1.146 send-community + neighbor 172.16.1.147 remote-as 65342 + neighbor 172.16.1.147 next-hop-self + neighbor 172.16.1.147 update-source Loopback256 + neighbor 172.16.1.147 description la2r-dz01-ipv4 + neighbor 172.16.1.147 timers 3 9 + neighbor 172.16.1.147 send-community + neighbor 172.16.1.156 remote-as 65342 + neighbor 172.16.1.156 next-hop-self + neighbor 172.16.1.156 update-source Loopback255 + neighbor 172.16.1.156 description dz100a-lax1-tsw-vpnv4 + neighbor 172.16.1.156 timers 3 9 + neighbor 172.16.1.156 send-community + neighbor 172.16.1.158 remote-as 65342 + neighbor 172.16.1.158 next-hop-self + neighbor 172.16.1.158 update-source Loopback255 + neighbor 172.16.1.158 description laconic-was-sw01-vpnv4 + neighbor 172.16.1.158 timers 3 9 + neighbor 172.16.1.158 send-community + neighbor 172.16.1.159 remote-as 65342 + neighbor 172.16.1.159 next-hop-self + neighbor 172.16.1.159 update-source Loopback256 + neighbor 172.16.1.159 description dz100a-lax1-tsw-ipv4 + neighbor 172.16.1.159 timers 3 9 + neighbor 172.16.1.159 send-community + neighbor 172.16.1.162 remote-as 65342 + neighbor 172.16.1.162 next-hop-self + neighbor 172.16.1.162 update-source Loopback256 + neighbor 172.16.1.162 description laconic-was-sw01-ipv4 + neighbor 172.16.1.162 timers 3 9 + neighbor 172.16.1.162 send-community + neighbor 172.16.1.163 remote-as 65342 + neighbor 172.16.1.163 next-hop-self + neighbor 172.16.1.163 update-source Loopback255 + neighbor 172.16.1.163 description dz100a-sea1-tsw-vpnv4 + neighbor 172.16.1.163 timers 3 9 + neighbor 172.16.1.163 send-community + neighbor 172.16.1.166 remote-as 65342 + neighbor 172.16.1.166 next-hop-self + neighbor 172.16.1.166 update-source Loopback256 + neighbor 172.16.1.166 description dz100a-sea1-tsw-ipv4 + neighbor 172.16.1.166 timers 3 9 + neighbor 172.16.1.166 send-community + neighbor 172.16.1.167 remote-as 65342 + neighbor 172.16.1.167 next-hop-self + neighbor 172.16.1.167 update-source Loopback255 + neighbor 172.16.1.167 description laconic-dfw-sw01-vpnv4 + neighbor 172.16.1.167 timers 3 9 + neighbor 172.16.1.167 send-community + neighbor 172.16.1.170 remote-as 65342 + neighbor 172.16.1.170 next-hop-self + neighbor 172.16.1.170 update-source Loopback255 + neighbor 172.16.1.170 description dz100a-ewr1-tsw-vpnv4 + neighbor 172.16.1.170 timers 3 9 + neighbor 172.16.1.170 send-community + neighbor 172.16.1.171 remote-as 65342 + neighbor 172.16.1.171 next-hop-self + neighbor 172.16.1.171 update-source Loopback256 + neighbor 172.16.1.171 description dz100a-dal1-tsw-ipv4 + neighbor 172.16.1.171 timers 3 9 + neighbor 172.16.1.171 send-community + neighbor 172.16.1.172 remote-as 65342 + neighbor 172.16.1.172 next-hop-self + neighbor 172.16.1.172 update-source Loopback256 + neighbor 172.16.1.172 description laconic-dfw-sw01-ipv4 + neighbor 172.16.1.172 timers 3 9 + neighbor 172.16.1.172 send-community + neighbor 172.16.1.173 remote-as 65342 + neighbor 172.16.1.173 next-hop-self + neighbor 172.16.1.173 update-source Loopback255 + neighbor 172.16.1.173 description dz100a-iad1-tsw-vpnv4 + neighbor 172.16.1.173 timers 3 9 + neighbor 172.16.1.173 send-community + neighbor 172.16.1.176 remote-as 65342 + neighbor 172.16.1.176 next-hop-self + neighbor 172.16.1.176 update-source Loopback256 + neighbor 172.16.1.176 description dz100a-ewr1-tsw-ipv4 + neighbor 172.16.1.176 timers 3 9 + neighbor 172.16.1.176 send-community + neighbor 172.16.1.177 remote-as 65342 + neighbor 172.16.1.177 next-hop-self + neighbor 172.16.1.177 update-source Loopback255 + neighbor 172.16.1.177 description dz100a-dal1-tsw-vpnv4 + neighbor 172.16.1.177 timers 3 9 + neighbor 172.16.1.177 send-community + neighbor 172.16.1.178 remote-as 65342 + neighbor 172.16.1.178 next-hop-self + neighbor 172.16.1.178 update-source Loopback256 + neighbor 172.16.1.178 description dz100a-iad1-tsw-ipv4 + neighbor 172.16.1.178 timers 3 9 + neighbor 172.16.1.178 send-community + neighbor 172.16.1.179 remote-as 65342 + neighbor 172.16.1.179 next-hop-self + neighbor 172.16.1.179 update-source Loopback255 + neighbor 172.16.1.179 description fra-velia-vpnv4 + neighbor 172.16.1.179 timers 3 9 + neighbor 172.16.1.179 send-community + neighbor 172.16.1.182 remote-as 65342 + neighbor 172.16.1.182 next-hop-self + neighbor 172.16.1.182 update-source Loopback255 + neighbor 172.16.1.182 description dz103a-lon1-tsw-vpnv4 + neighbor 172.16.1.182 timers 3 9 + neighbor 172.16.1.182 send-community + neighbor 172.16.1.183 remote-as 65342 + neighbor 172.16.1.183 next-hop-self + neighbor 172.16.1.183 update-source Loopback256 + neighbor 172.16.1.183 description fra-velia-ipv4 + neighbor 172.16.1.183 timers 3 9 + neighbor 172.16.1.183 send-community + neighbor 172.16.1.184 remote-as 65342 + neighbor 172.16.1.184 next-hop-self + neighbor 172.16.1.184 update-source Loopback256 + neighbor 172.16.1.184 description dz100a-chi1-tsw-ipv4 + neighbor 172.16.1.184 timers 3 9 + neighbor 172.16.1.184 send-community + neighbor 172.16.1.185 remote-as 65342 + neighbor 172.16.1.185 next-hop-self + neighbor 172.16.1.185 update-source Loopback256 + neighbor 172.16.1.185 description dz103a-lon1-tsw-ipv4 + neighbor 172.16.1.185 timers 3 9 + neighbor 172.16.1.185 send-community + neighbor 172.16.1.186 remote-as 65342 + neighbor 172.16.1.186 next-hop-self + neighbor 172.16.1.186 update-source Loopback255 + neighbor 172.16.1.186 description allnodes-fra1-vpnv4 + neighbor 172.16.1.186 timers 3 9 + neighbor 172.16.1.186 send-community + neighbor 172.16.1.187 remote-as 65342 + neighbor 172.16.1.187 next-hop-self + neighbor 172.16.1.187 update-source Loopback255 + neighbor 172.16.1.187 description dz100a-fra2-tsw-vpnv4 + neighbor 172.16.1.187 timers 3 9 + neighbor 172.16.1.187 send-community + neighbor 172.16.1.190 remote-as 65342 + neighbor 172.16.1.190 next-hop-self + neighbor 172.16.1.190 update-source Loopback256 + neighbor 172.16.1.190 description dz100a-fra2-tsw-ipv4 + neighbor 172.16.1.190 timers 3 9 + neighbor 172.16.1.190 send-community + neighbor 172.16.1.191 remote-as 65342 + neighbor 172.16.1.191 next-hop-self + neighbor 172.16.1.191 update-source Loopback255 + neighbor 172.16.1.191 description dz100a-chi1-tsw-vpnv4 + neighbor 172.16.1.191 timers 3 9 + neighbor 172.16.1.191 send-community + neighbor 172.16.1.194 remote-as 65342 + neighbor 172.16.1.194 next-hop-self + neighbor 172.16.1.194 update-source Loopback255 + neighbor 172.16.1.194 description dz100a-ams2-tsw-vpnv4 + neighbor 172.16.1.194 timers 3 9 + neighbor 172.16.1.194 send-community + neighbor 172.16.1.195 remote-as 65342 + neighbor 172.16.1.195 next-hop-self + neighbor 172.16.1.195 update-source Loopback256 + neighbor 172.16.1.195 description allnodes-fra1-ipv4 + neighbor 172.16.1.195 timers 3 9 + neighbor 172.16.1.195 send-community + neighbor 172.16.1.196 remote-as 65342 + neighbor 172.16.1.196 next-hop-self + neighbor 172.16.1.196 update-source Loopback255 + neighbor 172.16.1.196 description dzd-fra-01-vpnv4 + neighbor 172.16.1.196 timers 3 9 + neighbor 172.16.1.196 send-community + neighbor 172.16.1.197 remote-as 65342 + neighbor 172.16.1.197 next-hop-self + neighbor 172.16.1.197 update-source Loopback256 + neighbor 172.16.1.197 description dz100a-ams2-tsw-ipv4 + neighbor 172.16.1.197 timers 3 9 + neighbor 172.16.1.197 send-community + neighbor 172.16.1.200 remote-as 65342 + neighbor 172.16.1.200 next-hop-self + neighbor 172.16.1.200 update-source Loopback255 + neighbor 172.16.1.200 description dzd-tok-01-vpnv4 + neighbor 172.16.1.200 timers 3 9 + neighbor 172.16.1.200 send-community + neighbor 172.16.1.201 remote-as 65342 + neighbor 172.16.1.201 next-hop-self + neighbor 172.16.1.201 update-source Loopback255 + neighbor 172.16.1.201 description dz115a-tyo2-tsw-vpnv4 + neighbor 172.16.1.201 timers 3 9 + neighbor 172.16.1.201 send-community + neighbor 172.16.1.204 remote-as 65342 + neighbor 172.16.1.204 next-hop-self + neighbor 172.16.1.204 update-source Loopback256 + neighbor 172.16.1.204 description dz115a-tyo2-tsw-ipv4 + neighbor 172.16.1.204 timers 3 9 + neighbor 172.16.1.204 send-community + neighbor 172.16.1.205 remote-as 65342 + neighbor 172.16.1.205 next-hop-self + neighbor 172.16.1.205 update-source Loopback256 + neighbor 172.16.1.205 description dzd-tok-01-ipv4 + neighbor 172.16.1.205 timers 3 9 + neighbor 172.16.1.205 send-community + neighbor 172.16.1.216 remote-as 65342 + neighbor 172.16.1.216 next-hop-self + neighbor 172.16.1.216 update-source Loopback255 + neighbor 172.16.1.216 description dgt-dzd-tyo-ty8-vpnv4 + neighbor 172.16.1.216 timers 3 9 + neighbor 172.16.1.216 send-community + neighbor 172.16.1.217 remote-as 65342 + neighbor 172.16.1.217 next-hop-self + neighbor 172.16.1.217 update-source Loopback256 + neighbor 172.16.1.217 description dzd-fra-01-ipv4 + neighbor 172.16.1.217 timers 3 9 + neighbor 172.16.1.217 send-community + neighbor 172.16.1.219 remote-as 65342 + neighbor 172.16.1.219 next-hop-self + neighbor 172.16.1.219 update-source Loopback256 + neighbor 172.16.1.219 description dgt-dzd-tyo-ty8-ipv4 + neighbor 172.16.1.219 timers 3 9 + neighbor 172.16.1.219 send-community + neighbor 172.16.1.222 remote-as 65342 + neighbor 172.16.1.222 next-hop-self + neighbor 172.16.1.222 update-source Loopback255 + neighbor 172.16.1.222 description dz100a-sgp1-tsw-vpnv4 + neighbor 172.16.1.222 timers 3 9 + neighbor 172.16.1.222 send-community + neighbor 172.16.1.223 remote-as 65342 + neighbor 172.16.1.223 next-hop-self + neighbor 172.16.1.223 update-source Loopback256 + neighbor 172.16.1.223 description dz100a-sgp1-tsw-ipv4 + neighbor 172.16.1.223 timers 3 9 + neighbor 172.16.1.223 send-community + ! + address-family ipv4 + neighbor 169.254.2.201 activate + neighbor 169.254.7.163 activate + no neighbor 172.16.0.1 activate + neighbor 172.16.0.2 activate + no neighbor 172.16.0.3 activate + neighbor 172.16.0.4 activate + no neighbor 172.16.0.5 activate + neighbor 172.16.0.6 activate + no neighbor 172.16.0.7 activate + neighbor 172.16.0.8 activate + no neighbor 172.16.0.9 activate + neighbor 172.16.0.14 activate + no neighbor 172.16.0.15 activate + no neighbor 172.16.0.16 activate + neighbor 172.16.0.17 activate + neighbor 172.16.0.20 activate + no neighbor 172.16.0.21 activate + neighbor 172.16.0.22 activate + no neighbor 172.16.0.23 activate + no neighbor 172.16.0.24 activate + neighbor 172.16.0.25 activate + no neighbor 172.16.0.26 activate + no neighbor 172.16.0.27 activate + neighbor 172.16.0.28 activate + neighbor 172.16.0.29 activate + no neighbor 172.16.0.30 activate + neighbor 172.16.0.31 activate + no neighbor 172.16.0.32 activate + no neighbor 172.16.0.33 activate + no neighbor 172.16.0.34 activate + neighbor 172.16.0.35 activate + no neighbor 172.16.0.36 activate + neighbor 172.16.0.37 activate + no neighbor 172.16.0.38 activate + neighbor 172.16.0.39 activate + no neighbor 172.16.0.42 activate + neighbor 172.16.0.43 activate + no neighbor 172.16.0.46 activate + no neighbor 172.16.0.47 activate + neighbor 172.16.0.48 activate + no neighbor 172.16.0.49 activate + neighbor 172.16.0.50 activate + no neighbor 172.16.0.51 activate + no neighbor 172.16.0.56 activate + neighbor 172.16.0.57 activate + no neighbor 172.16.0.62 activate + neighbor 172.16.0.63 activate + no neighbor 172.16.0.68 activate + neighbor 172.16.0.69 activate + no neighbor 172.16.0.70 activate + neighbor 172.16.0.71 activate + neighbor 172.16.0.72 activate + no neighbor 172.16.0.73 activate + neighbor 172.16.0.74 activate + no neighbor 172.16.0.75 activate + no neighbor 172.16.0.76 activate + no neighbor 172.16.0.77 activate + no neighbor 172.16.0.78 activate + no neighbor 172.16.0.79 activate + neighbor 172.16.0.80 activate + neighbor 172.16.0.81 activate + no neighbor 172.16.0.82 activate + neighbor 172.16.0.83 activate + no neighbor 172.16.0.86 activate + no neighbor 172.16.0.87 activate + neighbor 172.16.0.88 activate + no neighbor 172.16.0.89 activate + no neighbor 172.16.0.92 activate + neighbor 172.16.0.93 activate + no neighbor 172.16.0.94 activate + no neighbor 172.16.0.95 activate + neighbor 172.16.0.96 activate + no neighbor 172.16.0.97 activate + no neighbor 172.16.0.100 activate + neighbor 172.16.0.101 activate + neighbor 172.16.0.106 activate + no neighbor 172.16.0.107 activate + neighbor 172.16.0.108 activate + neighbor 172.16.0.109 activate + neighbor 172.16.0.110 activate + neighbor 172.16.0.111 activate + neighbor 172.16.0.112 activate + neighbor 172.16.0.113 activate + neighbor 172.16.0.114 activate + neighbor 172.16.0.115 activate + no neighbor 172.16.0.140 activate + neighbor 172.16.0.141 activate + no neighbor 172.16.0.142 activate + neighbor 172.16.0.143 activate + no neighbor 172.16.0.148 activate + neighbor 172.16.0.149 activate + no neighbor 172.16.0.152 activate + neighbor 172.16.0.153 activate + no neighbor 172.16.0.156 activate + neighbor 172.16.0.157 activate + no neighbor 172.16.0.158 activate + neighbor 172.16.0.159 activate + no neighbor 172.16.0.160 activate + neighbor 172.16.0.161 activate + no neighbor 172.16.0.162 activate + neighbor 172.16.0.163 activate + no neighbor 172.16.0.164 activate + neighbor 172.16.0.165 activate + no neighbor 172.16.0.238 activate + neighbor 172.16.0.239 activate + neighbor 172.16.0.242 activate + neighbor 172.16.0.243 activate + neighbor 172.16.0.244 activate + no neighbor 172.16.0.245 activate + neighbor 172.16.0.254 activate + neighbor 172.16.0.255 activate + neighbor 172.16.1.6 activate + no neighbor 172.16.1.7 activate + no neighbor 172.16.1.10 activate + no neighbor 172.16.1.11 activate + neighbor 172.16.1.14 activate + no neighbor 172.16.1.15 activate + neighbor 172.16.1.26 activate + no neighbor 172.16.1.27 activate + neighbor 172.16.1.36 activate + neighbor 172.16.1.37 activate + neighbor 172.16.1.40 activate + no neighbor 172.16.1.41 activate + no neighbor 172.16.1.44 activate + no neighbor 172.16.1.45 activate + neighbor 172.16.1.52 activate + no neighbor 172.16.1.53 activate + no neighbor 172.16.1.58 activate + neighbor 172.16.1.59 activate + neighbor 172.16.1.64 activate + no neighbor 172.16.1.65 activate + no neighbor 172.16.1.70 activate + neighbor 172.16.1.71 activate + no neighbor 172.16.1.72 activate + neighbor 172.16.1.73 activate + neighbor 172.16.1.86 activate + no neighbor 172.16.1.87 activate + neighbor 172.16.1.90 activate + no neighbor 172.16.1.91 activate + no neighbor 172.16.1.98 activate + no neighbor 172.16.1.99 activate + neighbor 172.16.1.100 activate + neighbor 172.16.1.101 activate + no neighbor 172.16.1.111 activate + neighbor 172.16.1.114 activate + neighbor 172.16.1.115 activate + no neighbor 172.16.1.130 activate + neighbor 172.16.1.131 activate + neighbor 172.16.1.138 activate + no neighbor 172.16.1.139 activate + no neighbor 172.16.1.144 activate + neighbor 172.16.1.145 activate + no neighbor 172.16.1.146 activate + neighbor 172.16.1.147 activate + no neighbor 172.16.1.156 activate + no neighbor 172.16.1.158 activate + neighbor 172.16.1.159 activate + neighbor 172.16.1.162 activate + no neighbor 172.16.1.163 activate + neighbor 172.16.1.166 activate + no neighbor 172.16.1.167 activate + no neighbor 172.16.1.170 activate + neighbor 172.16.1.171 activate + neighbor 172.16.1.172 activate + no neighbor 172.16.1.173 activate + neighbor 172.16.1.176 activate + no neighbor 172.16.1.177 activate + neighbor 172.16.1.178 activate + no neighbor 172.16.1.179 activate + no neighbor 172.16.1.182 activate + neighbor 172.16.1.183 activate + neighbor 172.16.1.184 activate + neighbor 172.16.1.185 activate + no neighbor 172.16.1.186 activate + no neighbor 172.16.1.187 activate + neighbor 172.16.1.190 activate + no neighbor 172.16.1.191 activate + no neighbor 172.16.1.194 activate + neighbor 172.16.1.195 activate + no neighbor 172.16.1.196 activate + neighbor 172.16.1.197 activate + no neighbor 172.16.1.200 activate + no neighbor 172.16.1.201 activate + neighbor 172.16.1.204 activate + neighbor 172.16.1.205 activate + no neighbor 172.16.1.216 activate + neighbor 172.16.1.217 activate + neighbor 172.16.1.219 activate + no neighbor 172.16.1.222 activate + neighbor 172.16.1.223 activate + ! + address-family vpn-ipv4 + neighbor 172.16.0.1 activate + neighbor 172.16.0.3 activate + neighbor 172.16.0.5 activate + neighbor 172.16.0.7 activate + neighbor 172.16.0.9 activate + neighbor 172.16.0.15 activate + neighbor 172.16.0.16 activate + neighbor 172.16.0.21 activate + neighbor 172.16.0.23 activate + neighbor 172.16.0.24 activate + neighbor 172.16.0.26 activate + neighbor 172.16.0.27 activate + neighbor 172.16.0.30 activate + neighbor 172.16.0.32 activate + neighbor 172.16.0.33 activate + neighbor 172.16.0.34 activate + neighbor 172.16.0.36 activate + neighbor 172.16.0.38 activate + neighbor 172.16.0.42 activate + neighbor 172.16.0.46 activate + neighbor 172.16.0.47 activate + neighbor 172.16.0.49 activate + neighbor 172.16.0.51 activate + neighbor 172.16.0.56 activate + neighbor 172.16.0.62 activate + neighbor 172.16.0.68 activate + neighbor 172.16.0.70 activate + neighbor 172.16.0.73 activate + neighbor 172.16.0.75 activate + neighbor 172.16.0.76 activate + neighbor 172.16.0.77 activate + neighbor 172.16.0.78 activate + neighbor 172.16.0.79 activate + neighbor 172.16.0.82 activate + neighbor 172.16.0.86 activate + neighbor 172.16.0.87 activate + neighbor 172.16.0.89 activate + neighbor 172.16.0.92 activate + neighbor 172.16.0.94 activate + neighbor 172.16.0.95 activate + neighbor 172.16.0.97 activate + neighbor 172.16.0.100 activate + neighbor 172.16.0.107 activate + neighbor 172.16.0.140 activate + neighbor 172.16.0.142 activate + neighbor 172.16.0.148 activate + neighbor 172.16.0.152 activate + neighbor 172.16.0.156 activate + neighbor 172.16.0.158 activate + neighbor 172.16.0.160 activate + neighbor 172.16.0.162 activate + neighbor 172.16.0.164 activate + neighbor 172.16.0.238 activate + neighbor 172.16.0.245 activate + neighbor 172.16.1.7 activate + neighbor 172.16.1.10 activate + neighbor 172.16.1.11 activate + neighbor 172.16.1.15 activate + neighbor 172.16.1.27 activate + neighbor 172.16.1.41 activate + neighbor 172.16.1.44 activate + neighbor 172.16.1.45 activate + neighbor 172.16.1.53 activate + neighbor 172.16.1.58 activate + neighbor 172.16.1.65 activate + neighbor 172.16.1.70 activate + neighbor 172.16.1.72 activate + neighbor 172.16.1.87 activate + neighbor 172.16.1.91 activate + neighbor 172.16.1.98 activate + neighbor 172.16.1.99 activate + neighbor 172.16.1.111 activate + neighbor 172.16.1.130 activate + neighbor 172.16.1.139 activate + neighbor 172.16.1.144 activate + neighbor 172.16.1.146 activate + neighbor 172.16.1.156 activate + neighbor 172.16.1.158 activate + neighbor 172.16.1.163 activate + neighbor 172.16.1.167 activate + neighbor 172.16.1.170 activate + neighbor 172.16.1.173 activate + neighbor 172.16.1.177 activate + neighbor 172.16.1.179 activate + neighbor 172.16.1.182 activate + neighbor 172.16.1.186 activate + neighbor 172.16.1.187 activate + neighbor 172.16.1.191 activate + neighbor 172.16.1.194 activate + neighbor 172.16.1.196 activate + neighbor 172.16.1.200 activate + neighbor 172.16.1.201 activate + neighbor 172.16.1.216 activate + neighbor 172.16.1.222 activate + ! + vrf vrf1 + rd 65342:1 + route-target import vpn-ipv4 65342:1 + route-target export vpn-ipv4 65342:1 + router-id 209.42.167.133 + neighbor 169.254.4.69 remote-as 65000 + neighbor 169.254.4.69 local-as 209321 no-prepend replace-as + neighbor 169.254.4.69 passive + neighbor 169.254.4.69 description USER-505 + neighbor 169.254.4.69 route-map RM-USER-505-IN in + neighbor 169.254.4.69 route-map RM-USER-505-OUT out + neighbor 169.254.4.69 maximum-routes 1 + neighbor 169.254.4.69 maximum-accepted-routes 1 + neighbor 169.254.4.155 remote-as 65000 + neighbor 169.254.4.155 local-as 209321 no-prepend replace-as + neighbor 169.254.4.155 passive + neighbor 169.254.4.155 description USER-502 + neighbor 169.254.4.155 route-map RM-USER-502-IN in + neighbor 169.254.4.155 route-map RM-USER-502-OUT out + neighbor 169.254.4.155 maximum-routes 1 + neighbor 169.254.4.155 maximum-accepted-routes 1 + neighbor 169.254.7.7 remote-as 65000 + neighbor 169.254.7.7 local-as 209321 no-prepend replace-as + neighbor 169.254.7.7 passive + neighbor 169.254.7.7 description USER-500 + neighbor 169.254.7.7 route-map RM-USER-500-IN in + neighbor 169.254.7.7 route-map RM-USER-500-OUT out + neighbor 169.254.7.7 maximum-routes 1 + neighbor 169.254.7.7 maximum-accepted-routes 1 +! +router isis 1 + net 49.0000.ac10.016e.0000.00 + router-id ipv4 172.16.1.110 + log-adjacency-changes + ! + address-family ipv4 unicast + ! + segment-routing mpls + no shutdown +! +router multicast + ipv4 + routing + software-forwarding kernel + ! + ipv6 + software-forwarding kernel +! +router pim sparse-mode + ipv4 + rp address 10.0.0.0 233.84.178.0/24 override +! +router msdp + peer 172.16.0.101 + mesh-group DZ-1 + local-interface Loopback256 + description sao001-dz002 + ! + peer 172.16.0.106 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ams-01 + ! + peer 172.16.0.108 + mesh-group DZ-1 + local-interface Loopback256 + description ams001-dz002 + ! + peer 172.16.0.109 + mesh-group DZ-1 + local-interface Loopback256 + description dub001-dz002 + ! + peer 172.16.0.110 + mesh-group DZ-1 + local-interface Loopback256 + description was001-dz001 + ! + peer 172.16.0.111 + mesh-group DZ-1 + local-interface Loopback256 + description nyc001-dz002 + ! + peer 172.16.0.112 + mesh-group DZ-1 + local-interface Loopback256 + description sjc001-dz002 + ! + peer 172.16.0.113 + mesh-group DZ-1 + local-interface Loopback256 + description hkg001-dz002 + ! + peer 172.16.0.114 + mesh-group DZ-1 + local-interface Loopback256 + description chi001-dz001 + ! + peer 172.16.0.115 + mesh-group DZ-1 + local-interface Loopback256 + description chi001-dz002 + ! + peer 172.16.0.14 + mesh-group DZ-1 + local-interface Loopback256 + description tyo001-dz002 + ! + peer 172.16.0.141 + mesh-group DZ-1 + local-interface Loopback256 + description dz-tor1-sw01 + ! + peer 172.16.0.143 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mtl11-sw01 + ! + peer 172.16.0.149 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny7-sw02 + ! + peer 172.16.0.153 + mesh-group DZ-1 + local-interface Loopback256 + description dz-dc10-sw01 + ! + peer 172.16.0.157 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ch2-sw01 + ! + peer 172.16.0.159 + mesh-group DZ-1 + local-interface Loopback256 + description dz-fr5-sw01 + ! + peer 172.16.0.161 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sea10-sw01 + ! + peer 172.16.0.163 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sg1-sw01 + ! + peer 172.16.0.165 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ty9-sw01 + ! + peer 172.16.0.17 + mesh-group DZ-1 + local-interface Loopback256 + description fra001-dz002 + ! + peer 172.16.0.2 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny7-sw01 + ! + peer 172.16.0.20 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny5-sw01 + ! + peer 172.16.0.22 + mesh-group DZ-1 + local-interface Loopback256 + description mrs001-dz001 + ! + peer 172.16.0.239 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-sin-sg3 + ! + peer 172.16.0.242 + mesh-group DZ-1 + local-interface Loopback256 + description dz-fra-01 + ! + peer 172.16.0.243 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sxb-01 + ! + peer 172.16.0.244 + mesh-group DZ-1 + local-interface Loopback256 + description dz-waw-01 + ! + peer 172.16.0.25 + mesh-group DZ-1 + local-interface Loopback256 + description dub001-dz001 + ! + peer 172.16.0.254 + mesh-group DZ-1 + local-interface Loopback256 + description dz-slc-sw01 + ! + peer 172.16.0.255 + mesh-group DZ-1 + local-interface Loopback256 + description cherlita + ! + peer 172.16.0.28 + mesh-group DZ-1 + local-interface Loopback256 + description mrs001-dz002 + ! + peer 172.16.0.29 + mesh-group DZ-1 + local-interface Loopback256 + description fr2-dzx-001 + ! + peer 172.16.0.31 + mesh-group DZ-1 + local-interface Loopback256 + description lon001-dz002 + ! + peer 172.16.0.35 + mesh-group DZ-1 + local-interface Loopback256 + description was001-dz002 + ! + peer 172.16.0.37 + mesh-group DZ-1 + local-interface Loopback256 + description nyc001-dz001 + ! + peer 172.16.0.39 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mrs-01 + ! + peer 172.16.0.4 + mesh-group DZ-1 + local-interface Loopback256 + description sea001-dz001 + ! + peer 172.16.0.43 + mesh-group DZ-1 + local-interface Loopback256 + description tyo001-dz001 + ! + peer 172.16.0.48 + mesh-group DZ-1 + local-interface Loopback256 + description sjc001-dz001 + ! + peer 172.16.0.50 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mad-01 + ! + peer 172.16.0.57 + mesh-group DZ-1 + local-interface Loopback256 + description nyc002-dz002 + ! + peer 172.16.0.6 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ld4-sw01 + ! + peer 172.16.0.63 + mesh-group DZ-1 + local-interface Loopback256 + description lax001-dz002 + ! + peer 172.16.0.69 + mesh-group DZ-1 + local-interface Loopback256 + description sin001-dz002 + ! + peer 172.16.0.71 + mesh-group DZ-1 + local-interface Loopback256 + description tyo002-dz002 + ! + peer 172.16.0.72 + mesh-group DZ-1 + local-interface Loopback256 + description dz-chi-sw01 + ! + peer 172.16.0.74 + mesh-group DZ-1 + local-interface Loopback256 + description hkg001-dz001 + ! + peer 172.16.0.8 + mesh-group DZ-1 + local-interface Loopback256 + description sea001-dz002 + ! + peer 172.16.0.80 + mesh-group DZ-1 + local-interface Loopback256 + description bom001-dz001 + ! + peer 172.16.0.81 + mesh-group DZ-1 + local-interface Loopback256 + description dz-lax-sw01 + ! + peer 172.16.0.83 + mesh-group DZ-1 + local-interface Loopback256 + description bom001-dz002 + ! + peer 172.16.0.88 + mesh-group DZ-1 + local-interface Loopback256 + description dfw001-dz001 + ! + peer 172.16.0.93 + mesh-group DZ-1 + local-interface Loopback256 + description dfw001-dz002 + ! + peer 172.16.0.96 + mesh-group DZ-1 + local-interface Loopback256 + description sao001-dz001 + ! + peer 172.16.1.100 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-slc1-tsw + ! + peer 172.16.1.101 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-dub-db2 + ! + peer 172.16.1.114 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-mrs-mrs1 + ! + peer 172.16.1.115 + mesh-group DZ-1 + local-interface Loopback256 + description swdzd01-lon2 + ! + peer 172.16.1.131 + mesh-group DZ-1 + local-interface Loopback256 + description lts-dzd-001 + ! + peer 172.16.1.138 + mesh-group DZ-1 + local-interface Loopback256 + description dz-slc2-sw01 + ! + peer 172.16.1.14 + mesh-group DZ-1 + local-interface Loopback256 + description dz-muc-01 + ! + peer 172.16.1.145 + mesh-group DZ-1 + local-interface Loopback256 + description au1c-dz01 + ! + peer 172.16.1.147 + mesh-group DZ-1 + local-interface Loopback256 + description la2r-dz01 + ! + peer 172.16.1.159 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-lax1-tsw + ! + peer 172.16.1.162 + mesh-group DZ-1 + local-interface Loopback256 + description laconic-was-sw01 + ! + peer 172.16.1.166 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-sea1-tsw + ! + peer 172.16.1.171 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-dal1-tsw + ! + peer 172.16.1.172 + mesh-group DZ-1 + local-interface Loopback256 + description laconic-dfw-sw01 + ! + peer 172.16.1.176 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-ewr1-tsw + ! + peer 172.16.1.178 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-iad1-tsw + ! + peer 172.16.1.183 + mesh-group DZ-1 + local-interface Loopback256 + description fra-velia + ! + peer 172.16.1.184 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-chi1-tsw + ! + peer 172.16.1.185 + mesh-group DZ-1 + local-interface Loopback256 + description dz103a-lon1-tsw + ! + peer 172.16.1.190 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-fra2-tsw + ! + peer 172.16.1.195 + mesh-group DZ-1 + local-interface Loopback256 + description allnodes-fra1 + ! + peer 172.16.1.197 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-ams2-tsw + ! + peer 172.16.1.204 + mesh-group DZ-1 + local-interface Loopback256 + description dz115a-tyo2-tsw + ! + peer 172.16.1.205 + mesh-group DZ-1 + local-interface Loopback256 + description dzd-tok-01 + ! + peer 172.16.1.217 + mesh-group DZ-1 + local-interface Loopback256 + description dzd-fra-01 + ! + peer 172.16.1.219 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-tyo-ty8 + ! + peer 172.16.1.223 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-sgp1-tsw + ! + peer 172.16.1.26 + mesh-group DZ-1 + local-interface Loopback256 + description ce2-dzd-001 + ! + peer 172.16.1.36 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-nyc-ny2 + ! + peer 172.16.1.37 + mesh-group DZ-1 + local-interface Loopback256 + description swerry + ! + peer 172.16.1.40 + mesh-group DZ-1 + local-interface Loopback256 + description cherydam + ! + peer 172.16.1.52 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-ash-dc3 + ! + peer 172.16.1.59 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-dal-da3 + ! + peer 172.16.1.6 + mesh-group DZ-1 + local-interface Loopback256 + description frankry + ! + peer 172.16.1.64 + mesh-group DZ-1 + local-interface Loopback256 + description bdc-dzd-001 + ! + peer 172.16.1.71 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-lon-ths + ! + peer 172.16.1.73 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-lax-la2 + ! + peer 172.16.1.86 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-fra-fr5 + ! + peer 172.16.1.90 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-ams-ams1 +! +management ssh + authentication protocol public-key +! +end \ No newline at end of file diff --git a/docs/switch-configs/was-sw01-running.cfg b/docs/switch-configs/was-sw01-running.cfg new file mode 100644 index 00000000..07b95139 --- /dev/null +++ b/docs/switch-configs/was-sw01-running.cfg @@ -0,0 +1,2404 @@ +! Command: show running-config +! device: laconic-was-sw01 (DCS-7280CR3A-32S, EOS-4.34.0F) +! +! boot system flash:/EOS-4.34.0F-x86_64.swi +! +no aaa root +! +username install privilege 15 role network-admin secret sha512 $6$HoBlLIZZ3TgjgrIv$XHGTGSilu6ZS3X8VnPG4RZ.lUYVoX5pVOO70WKToBv.K3URG6P32qLdR0iBM2oaKVfq8KEYHlUrdEs5Ky.Att0 +username install ssh-key ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFfXOvzoBAtK2IcqbjTWH8PWwqBNhXkRCZ2xR40EQ7at rix@bitwise3.localhost +! +hardware counter feature gre tunnel interface out +hardware counter feature gre tunnel interface in +! +daemon doublezero-agent + exec /usr/local/bin/doublezero-agent -pubkey DLajvcrHuZpbrJKY31Bgdd7oymCADDUPN1N77Rvd2QxN -controller 35.85.74.71:7000 -verbose + no shutdown +! +daemon doublezero-telemetry + exec /usr/local/bin/doublezero-telemetry --local-device-pubkey DLajvcrHuZpbrJKY31Bgdd7oymCADDUPN1N77Rvd2QxN --env mainnet --keypair /mnt/flash/metrics-publisher.json + no shutdown +! +daemon eapilocal + exec /usr/bin/EosSdkRpcAgent --daemon-name eapilocal + no shutdown +! +hardware access-list update default-result permit +! +no service interface inactive port-id allocation disabled +! +service routing protocols model multi-agent +! +logging buffered 128000 +no logging console +logging facility local7 +! +management api eos-sdk-rpc + transport grpc eapilocal + localhost loopback + service all + no disabled +! +hostname laconic-was-sw01 +ip name-server vrf default 1.1.1.1 +ip name-server vrf default 8.8.8.8 +ip name-server vrf default 9.9.9.9 +! +spanning-tree mode mstp +! +system l1 + unsupported speed action error + unsupported error-correction action error +! +vrf instance management +! +vrf instance vrf1 +! +management api netconf + transport ssh netconf + vrf management +! +monitor telemetry influx + destination influxdb DZ_INFLUX + url https://us-east-1-1.aws.cloud2.influxdata.com + database name doublezero-mainnet-beta + retention policy autogen + vrf management + username DZ password 7 095F46084C5445524F5A401E391E12390F182A533804044601684B1017512E1E4F0E454371661B5D5511125C5A0640113D0E26521F0B5D170E365013111B030E0F3A3C7E673612463E4522047F6A71660C5635390368425E655B647B004F342E547527230C0F5305092132454A5F1E3F335D + tag global dzd_pubkey DLajvcrHuZpbrJKY31Bgdd7oymCADDUPN1N77Rvd2QxN + tag global location was +! +interface Ethernet1/1 + mtu 2048 + speed forced 10000full + no switchport + ip address 64.92.84.81/31 + ip verify unicast source reachable-via rx allow-default + ip access-group SEC-DIA-IN in + service-policy type pbr input VALIDATOR-RELAY +! +interface Ethernet1/2 + shutdown +! +interface Ethernet1/3 + shutdown +! +interface Ethernet1/4 + shutdown +! +interface Ethernet3/1 +! +interface Ethernet4/1 + mtu 2048 + no switchport + ip address 172.16.1.188/31 + pim ipv4 sparse-mode + isis enable 1 + isis circuit-type level-2 + isis hello-interval 1 + isis metric 25550 + isis hello padding + isis network point-to-point +! +interface Ethernet5/1 + mtu 2048 + no switchport + ip address 172.16.1.198/31 + pim ipv4 sparse-mode + isis enable 1 + isis circuit-type level-2 + isis hello-interval 1 + isis metric 147 + isis hello padding + isis network point-to-point +! +interface Ethernet6/1 +! +interface Ethernet7/1 +! +interface Ethernet8/1 +! +interface Ethernet9/1 +! +interface Ethernet10/1 +! +interface Ethernet11/1 +! +interface Ethernet12/1 +! +interface Ethernet13/1 +! +interface Ethernet14/1 +! +interface Ethernet15/1 +! +interface Ethernet16/1 +! +interface Ethernet17/1 + shutdown +! +interface Ethernet18/1 +! +interface Ethernet19/1 +! +interface Ethernet20/1 +! +interface Ethernet21/1 +! +interface Ethernet22/1 +! +interface Ethernet23/1 +! +interface Ethernet24/1 +! +interface Ethernet25/1 +! +interface Ethernet26/1 +! +interface Ethernet27/1 +! +interface Ethernet28/1 +! +interface Ethernet29/1 +! +interface Ethernet30/1 +! +interface Ethernet31/1 +! +interface Ethernet32/1 +! +interface Loopback100 + ip address 137.239.194.64/32 + isis enable 1 +! +interface Loopback101 + ip address 137.239.194.65/32 +! +interface Loopback255 + ip address 172.16.1.158/32 + node-segment ipv4 index 79 + isis enable 1 +! +interface Loopback256 + ip address 172.16.1.162/32 + isis enable 1 +! +interface Loopback1000 + description RP Address + ip address 10.0.0.0/32 +! +interface Management1 + vrf management + ip address 137.239.200.198/30 +! +interface Tunnel500 +! +hardware tcam + profile tunnel-interface-acl + feature acl port ip + sequence 45 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control ttl + action count drop mirror + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan eth ipv4 forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + ! + feature acl port ip egress mpls-tunnelled-match + sequence 95 + ! + feature acl port ipv6 + sequence 25 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-ops-3b l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + packet ipv6 forwarding routed multicast + packet ipv6 ipv6 forwarding routed decap + ! + feature acl port ipv6 egress + sequence 105 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + ! + feature acl port mac + sequence 55 + key size limit 160 + key field dst-mac ether-type src-mac + action count drop mirror + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + packet ipv6 forwarding routed decap + packet ipv6 forwarding routed multicast + packet ipv6 ipv6 forwarding routed decap + packet mpls forwarding bridged decap + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + packet non-ip forwarding bridged + ! + feature acl subintf ip + sequence 40 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control ttl + action count drop + packet ipv4 forwarding routed + ! + feature acl subintf ipv6 + sequence 15 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop + packet ipv6 forwarding routed + ! + feature acl tunnel interface ip + key size limit 160 + key field inner-dst-ip inner-ip-frag inner-ip-protocol inner-l4-dst-port inner-l4-src-port inner-src-ip inner-tcp-control inner-tos inner-ttl l4-ops-7b + action count drop + packet ipv4 non-vxlan forwarding routed decap + ! + feature acl vlan ip + sequence 35 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control ttl + action count drop + packet ipv4 forwarding routed + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan eth ipv4 forwarding routed decap + ! + feature acl vlan ipv6 + sequence 10 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop + packet ipv6 forwarding routed + packet ipv6 ipv6 forwarding routed decap + ! + feature acl vlan ipv6 egress + sequence 20 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count drop mirror + packet ipv6 forwarding bridged + packet ipv6 forwarding routed + ! + feature counter lfib + sequence 85 + ! + feature forwarding-destination mpls + sequence 100 + ! + feature mirror ip + sequence 80 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control + action count mirror set-policer + packet ipv4 forwarding bridged + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 non-vxlan forwarding routed decap + ! + feature mpls + sequence 5 + key size limit 160 + action drop redirect set-ecn + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + ! + feature mpls pop ingress + sequence 90 + ! + feature pbr ip + sequence 60 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops-18b l4-src-port src-ip tcp-control + action count redirect + packet ipv4 forwarding routed + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + ! + feature pbr ipv6 + sequence 30 + key field dst-ipv6 ipv6-next-header l4-dst-port l4-src-port src-ipv6-high src-ipv6-low tcp-control + action count redirect + packet ipv6 forwarding routed + ! + feature pbr mpls + sequence 65 + key size limit 160 + key field mpls-inner-ip-tos + action count drop redirect + packet mpls ipv4 forwarding mpls + packet mpls ipv6 forwarding mpls + packet mpls non-ip forwarding mpls + ! + feature qos ip + sequence 75 + key size limit 160 + key field dscp dst-ip ip-frag ip-protocol l4-dst-port l4-ops l4-src-port src-ip tcp-control + action set-dscp set-policer set-tc + packet ipv4 forwarding routed + packet ipv4 forwarding routed multicast + packet ipv4 mpls ipv4 forwarding mpls decap + packet ipv4 mpls ipv6 forwarding mpls decap + packet ipv4 non-vxlan forwarding routed decap + ! + feature qos ipv6 + sequence 70 + key field dst-ipv6 ipv6-next-header ipv6-traffic-class l4-dst-port l4-src-port src-ipv6-high src-ipv6-low + action set-dscp set-policer set-tc + packet ipv6 forwarding routed + ! + feature tunnel vxlan + sequence 50 + key size limit 160 + packet ipv4 vxlan eth ipv4 forwarding routed decap + packet ipv4 vxlan forwarding bridged decap + system profile tunnel-interface-acl +! +ip access-list MAIN-CONTROL-PLANE-ACL + counters per-entry + 10 permit icmp any any + 20 permit ip any any tracked + 30 permit udp any any eq bfd ttl eq 255 + 40 permit udp any any eq bfd-echo ttl eq 254 + 50 permit udp any any eq multihop-bfd micro-bfd sbfd + 60 permit udp any eq sbfd any eq sbfd-initiator + 70 permit ospf any any + 80 permit tcp any any eq ssh telnet www snmp bgp https msdp ldp netconf-ssh gnmi + 90 permit udp any any eq bootps bootpc ntp snmp ptp-event ptp-general rip ldp + 100 permit tcp any any eq mlag ttl eq 255 + 110 permit udp any any eq mlag ttl eq 255 + 120 permit vrrp any any + 130 permit ahp any any + 140 permit pim any any + 150 permit igmp any any + 160 permit tcp any any range 5900 5910 + 170 permit tcp any any range 50000 50100 + 180 permit udp any any range 51000 51100 + 190 permit tcp any any eq 3333 + 200 permit tcp any any eq nat ttl eq 255 + 210 permit tcp any eq bgp any + 220 permit rsvp any any + 230 permit tcp any any eq 9340 + 240 permit tcp any any eq 9559 + 250 permit udp any any eq 8503 + 260 permit udp any any eq lsp-ping + 270 permit udp any eq lsp-ping any + 280 remark Permit TWAMP (UDP 862) + 290 permit udp any any eq 862 +! +ip access-list SEC-DIA-IN + counters per-entry + 10 deny ip 0.0.0.0/8 any + 20 deny ip 10.0.0.0/8 any + 30 deny ip 100.64.0.0/10 any + 40 deny ip 127.0.0.0/8 any + 50 deny ip 169.254.0.0/16 any + 60 deny ip 172.16.0.0/12 any + 70 deny ip 192.0.0.0/24 any + 80 deny ip 192.0.2.0/24 any + 90 deny ip 192.168.0.0/16 any + 100 deny ip 198.18.0.0/15 any + 110 deny ip 198.51.100.0/24 any + 120 deny ip 203.0.113.0/24 any + 130 deny ip 224.0.0.0/3 any + 140 permit ip any any +! +ip access-list SEC-USER-PUB-MCAST-IN + counters per-entry + 10 permit icmp any any + 20 permit tcp any any eq bgp + 30 permit ip any host 224.0.0.13 + 40 permit ip any 233.84.178.0/24 + 50 deny ip any any +! +ip access-list SEC-USER-SUB-MCAST-IN + counters per-entry + 10 permit icmp any any + 20 permit tcp any any eq bgp + 30 permit ip any host 224.0.0.13 + 40 deny ip any any +! +ip access-list SHRED-RELAY + 10 permit udp any any eq 20000 +! +ip access-list VALIDATOR-RELAY-ACL + 10 permit udp any any eq 8001 + 20 permit udp any any range 9000 9025 + 30 permit tcp any any eq 8001 +! +class-map type pbr match-any VALIDATOR-RELAY-CLASS + 10 match ip access-group VALIDATOR-RELAY-ACL +! +ip routing +no ip routing vrf management +ip routing vrf vrf1 +! +ip community-list COMM-ALL_MCAST_USERS permit 21682:1300 +ip community-list COMM-ALL_USERS permit 21682:1200 +ip community-list COMM-WAS_USERS permit 21682:10024 +! +system control-plane + ip access-group MAIN-CONTROL-PLANE-ACL in +! +monitor session 1 ip access-group SHRED-RELAY +monitor session 1 source Ethernet1/1 rx +monitor session 1 destination Cpu +! +ip route 0.0.0.0/0 64.92.84.80 +ip route vrf management 0.0.0.0/0 137.239.200.197 +! +mpls ip +! +mpls icmp ttl-exceeded tunneling +mpls icmp ip source-interface Loopback255 +! +ntp server 0.pool.ntp.org +ntp server 1.pool.ntp.org +ntp server 2.pool.ntp.org +! +policy-map type pbr VALIDATOR-RELAY + 10 class VALIDATOR-RELAY-CLASS + set nexthop 172.16.1.189 +! +router bgp 65342 + router-id 172.16.1.158 + timers bgp 1 3 + distance bgp 20 200 200 + neighbor 172.16.0.1 remote-as 65342 + neighbor 172.16.0.1 next-hop-self + neighbor 172.16.0.1 update-source Loopback255 + neighbor 172.16.0.1 description dz-ny7-sw01-vpnv4 + neighbor 172.16.0.1 timers 3 9 + neighbor 172.16.0.1 send-community + neighbor 172.16.0.2 remote-as 65342 + neighbor 172.16.0.2 next-hop-self + neighbor 172.16.0.2 update-source Loopback256 + neighbor 172.16.0.2 description dz-ny7-sw01-ipv4 + neighbor 172.16.0.2 timers 3 9 + neighbor 172.16.0.2 send-community + neighbor 172.16.0.3 remote-as 65342 + neighbor 172.16.0.3 next-hop-self + neighbor 172.16.0.3 update-source Loopback255 + neighbor 172.16.0.3 description sea001-dz001-vpnv4 + neighbor 172.16.0.3 timers 3 9 + neighbor 172.16.0.3 send-community + neighbor 172.16.0.4 remote-as 65342 + neighbor 172.16.0.4 next-hop-self + neighbor 172.16.0.4 update-source Loopback256 + neighbor 172.16.0.4 description sea001-dz001-ipv4 + neighbor 172.16.0.4 timers 3 9 + neighbor 172.16.0.4 send-community + neighbor 172.16.0.5 remote-as 65342 + neighbor 172.16.0.5 next-hop-self + neighbor 172.16.0.5 update-source Loopback255 + neighbor 172.16.0.5 description dz-ld4-sw01-vpnv4 + neighbor 172.16.0.5 timers 3 9 + neighbor 172.16.0.5 send-community + neighbor 172.16.0.6 remote-as 65342 + neighbor 172.16.0.6 next-hop-self + neighbor 172.16.0.6 update-source Loopback256 + neighbor 172.16.0.6 description dz-ld4-sw01-ipv4 + neighbor 172.16.0.6 timers 3 9 + neighbor 172.16.0.6 send-community + neighbor 172.16.0.7 remote-as 65342 + neighbor 172.16.0.7 next-hop-self + neighbor 172.16.0.7 update-source Loopback255 + neighbor 172.16.0.7 description sea001-dz002-vpnv4 + neighbor 172.16.0.7 timers 3 9 + neighbor 172.16.0.7 send-community + neighbor 172.16.0.8 remote-as 65342 + neighbor 172.16.0.8 next-hop-self + neighbor 172.16.0.8 update-source Loopback256 + neighbor 172.16.0.8 description sea001-dz002-ipv4 + neighbor 172.16.0.8 timers 3 9 + neighbor 172.16.0.8 send-community + neighbor 172.16.0.9 remote-as 65342 + neighbor 172.16.0.9 next-hop-self + neighbor 172.16.0.9 update-source Loopback255 + neighbor 172.16.0.9 description tyo001-dz002-vpnv4 + neighbor 172.16.0.9 timers 3 9 + neighbor 172.16.0.9 send-community + neighbor 172.16.0.14 remote-as 65342 + neighbor 172.16.0.14 next-hop-self + neighbor 172.16.0.14 update-source Loopback256 + neighbor 172.16.0.14 description tyo001-dz002-ipv4 + neighbor 172.16.0.14 timers 3 9 + neighbor 172.16.0.14 send-community + neighbor 172.16.0.15 remote-as 65342 + neighbor 172.16.0.15 next-hop-self + neighbor 172.16.0.15 update-source Loopback255 + neighbor 172.16.0.15 description dz-ny5-sw01-vpnv4 + neighbor 172.16.0.15 timers 3 9 + neighbor 172.16.0.15 send-community + neighbor 172.16.0.16 remote-as 65342 + neighbor 172.16.0.16 next-hop-self + neighbor 172.16.0.16 update-source Loopback255 + neighbor 172.16.0.16 description fra001-dz002-vpnv4 + neighbor 172.16.0.16 timers 3 9 + neighbor 172.16.0.16 send-community + neighbor 172.16.0.17 remote-as 65342 + neighbor 172.16.0.17 next-hop-self + neighbor 172.16.0.17 update-source Loopback256 + neighbor 172.16.0.17 description fra001-dz002-ipv4 + neighbor 172.16.0.17 timers 3 9 + neighbor 172.16.0.17 send-community + neighbor 172.16.0.20 remote-as 65342 + neighbor 172.16.0.20 next-hop-self + neighbor 172.16.0.20 update-source Loopback256 + neighbor 172.16.0.20 description dz-ny5-sw01-ipv4 + neighbor 172.16.0.20 timers 3 9 + neighbor 172.16.0.20 send-community + neighbor 172.16.0.21 remote-as 65342 + neighbor 172.16.0.21 next-hop-self + neighbor 172.16.0.21 update-source Loopback255 + neighbor 172.16.0.21 description mrs001-dz001-vpnv4 + neighbor 172.16.0.21 timers 3 9 + neighbor 172.16.0.21 send-community + neighbor 172.16.0.22 remote-as 65342 + neighbor 172.16.0.22 next-hop-self + neighbor 172.16.0.22 update-source Loopback256 + neighbor 172.16.0.22 description mrs001-dz001-ipv4 + neighbor 172.16.0.22 timers 3 9 + neighbor 172.16.0.22 send-community + neighbor 172.16.0.23 remote-as 65342 + neighbor 172.16.0.23 next-hop-self + neighbor 172.16.0.23 update-source Loopback255 + neighbor 172.16.0.23 description fr2-dzx-001-vpnv4 + neighbor 172.16.0.23 timers 3 9 + neighbor 172.16.0.23 send-community + neighbor 172.16.0.24 remote-as 65342 + neighbor 172.16.0.24 next-hop-self + neighbor 172.16.0.24 update-source Loopback255 + neighbor 172.16.0.24 description dub001-dz001-vpnv4 + neighbor 172.16.0.24 timers 3 9 + neighbor 172.16.0.24 send-community + neighbor 172.16.0.25 remote-as 65342 + neighbor 172.16.0.25 next-hop-self + neighbor 172.16.0.25 update-source Loopback256 + neighbor 172.16.0.25 description dub001-dz001-ipv4 + neighbor 172.16.0.25 timers 3 9 + neighbor 172.16.0.25 send-community + neighbor 172.16.0.26 remote-as 65342 + neighbor 172.16.0.26 next-hop-self + neighbor 172.16.0.26 update-source Loopback255 + neighbor 172.16.0.26 description dub001-dz002-vpnv4 + neighbor 172.16.0.26 timers 3 9 + neighbor 172.16.0.26 send-community + neighbor 172.16.0.27 remote-as 65342 + neighbor 172.16.0.27 next-hop-self + neighbor 172.16.0.27 update-source Loopback255 + neighbor 172.16.0.27 description mrs001-dz002-vpnv4 + neighbor 172.16.0.27 timers 3 9 + neighbor 172.16.0.27 send-community + neighbor 172.16.0.28 remote-as 65342 + neighbor 172.16.0.28 next-hop-self + neighbor 172.16.0.28 update-source Loopback256 + neighbor 172.16.0.28 description mrs001-dz002-ipv4 + neighbor 172.16.0.28 timers 3 9 + neighbor 172.16.0.28 send-community + neighbor 172.16.0.29 remote-as 65342 + neighbor 172.16.0.29 next-hop-self + neighbor 172.16.0.29 update-source Loopback256 + neighbor 172.16.0.29 description fr2-dzx-001-ipv4 + neighbor 172.16.0.29 timers 3 9 + neighbor 172.16.0.29 send-community + neighbor 172.16.0.30 remote-as 65342 + neighbor 172.16.0.30 next-hop-self + neighbor 172.16.0.30 update-source Loopback255 + neighbor 172.16.0.30 description lon001-dz002-vpnv4 + neighbor 172.16.0.30 timers 3 9 + neighbor 172.16.0.30 send-community + neighbor 172.16.0.31 remote-as 65342 + neighbor 172.16.0.31 next-hop-self + neighbor 172.16.0.31 update-source Loopback256 + neighbor 172.16.0.31 description lon001-dz002-ipv4 + neighbor 172.16.0.31 timers 3 9 + neighbor 172.16.0.31 send-community + neighbor 172.16.0.32 remote-as 65342 + neighbor 172.16.0.32 next-hop-self + neighbor 172.16.0.32 update-source Loopback255 + neighbor 172.16.0.32 description was001-dz001-vpnv4 + neighbor 172.16.0.32 timers 3 9 + neighbor 172.16.0.32 send-community + neighbor 172.16.0.33 remote-as 65342 + neighbor 172.16.0.33 next-hop-self + neighbor 172.16.0.33 update-source Loopback255 + neighbor 172.16.0.33 description dz-mrs-01-vpnv4 + neighbor 172.16.0.33 timers 3 9 + neighbor 172.16.0.33 send-community + neighbor 172.16.0.34 remote-as 65342 + neighbor 172.16.0.34 next-hop-self + neighbor 172.16.0.34 update-source Loopback255 + neighbor 172.16.0.34 description was001-dz002-vpnv4 + neighbor 172.16.0.34 timers 3 9 + neighbor 172.16.0.34 send-community + neighbor 172.16.0.35 remote-as 65342 + neighbor 172.16.0.35 next-hop-self + neighbor 172.16.0.35 update-source Loopback256 + neighbor 172.16.0.35 description was001-dz002-ipv4 + neighbor 172.16.0.35 timers 3 9 + neighbor 172.16.0.35 send-community + neighbor 172.16.0.36 remote-as 65342 + neighbor 172.16.0.36 next-hop-self + neighbor 172.16.0.36 update-source Loopback255 + neighbor 172.16.0.36 description nyc001-dz001-vpnv4 + neighbor 172.16.0.36 timers 3 9 + neighbor 172.16.0.36 send-community + neighbor 172.16.0.37 remote-as 65342 + neighbor 172.16.0.37 next-hop-self + neighbor 172.16.0.37 update-source Loopback256 + neighbor 172.16.0.37 description nyc001-dz001-ipv4 + neighbor 172.16.0.37 timers 3 9 + neighbor 172.16.0.37 send-community + neighbor 172.16.0.38 remote-as 65342 + neighbor 172.16.0.38 next-hop-self + neighbor 172.16.0.38 update-source Loopback255 + neighbor 172.16.0.38 description nyc001-dz002-vpnv4 + neighbor 172.16.0.38 timers 3 9 + neighbor 172.16.0.38 send-community + neighbor 172.16.0.39 remote-as 65342 + neighbor 172.16.0.39 next-hop-self + neighbor 172.16.0.39 update-source Loopback256 + neighbor 172.16.0.39 description dz-mrs-01-ipv4 + neighbor 172.16.0.39 timers 3 9 + neighbor 172.16.0.39 send-community + neighbor 172.16.0.42 remote-as 65342 + neighbor 172.16.0.42 next-hop-self + neighbor 172.16.0.42 update-source Loopback255 + neighbor 172.16.0.42 description tyo001-dz001-vpnv4 + neighbor 172.16.0.42 timers 3 9 + neighbor 172.16.0.42 send-community + neighbor 172.16.0.43 remote-as 65342 + neighbor 172.16.0.43 next-hop-self + neighbor 172.16.0.43 update-source Loopback256 + neighbor 172.16.0.43 description tyo001-dz001-ipv4 + neighbor 172.16.0.43 timers 3 9 + neighbor 172.16.0.43 send-community + neighbor 172.16.0.46 remote-as 65342 + neighbor 172.16.0.46 next-hop-self + neighbor 172.16.0.46 update-source Loopback255 + neighbor 172.16.0.46 description dz-mad-01-vpnv4 + neighbor 172.16.0.46 timers 3 9 + neighbor 172.16.0.46 send-community + neighbor 172.16.0.47 remote-as 65342 + neighbor 172.16.0.47 next-hop-self + neighbor 172.16.0.47 update-source Loopback255 + neighbor 172.16.0.47 description sjc001-dz001-vpnv4 + neighbor 172.16.0.47 timers 3 9 + neighbor 172.16.0.47 send-community + neighbor 172.16.0.48 remote-as 65342 + neighbor 172.16.0.48 next-hop-self + neighbor 172.16.0.48 update-source Loopback256 + neighbor 172.16.0.48 description sjc001-dz001-ipv4 + neighbor 172.16.0.48 timers 3 9 + neighbor 172.16.0.48 send-community + neighbor 172.16.0.49 remote-as 65342 + neighbor 172.16.0.49 next-hop-self + neighbor 172.16.0.49 update-source Loopback255 + neighbor 172.16.0.49 description sjc001-dz002-vpnv4 + neighbor 172.16.0.49 timers 3 9 + neighbor 172.16.0.49 send-community + neighbor 172.16.0.50 remote-as 65342 + neighbor 172.16.0.50 next-hop-self + neighbor 172.16.0.50 update-source Loopback256 + neighbor 172.16.0.50 description dz-mad-01-ipv4 + neighbor 172.16.0.50 timers 3 9 + neighbor 172.16.0.50 send-community + neighbor 172.16.0.51 remote-as 65342 + neighbor 172.16.0.51 next-hop-self + neighbor 172.16.0.51 update-source Loopback255 + neighbor 172.16.0.51 description dz-chi-sw01-vpnv4 + neighbor 172.16.0.51 timers 3 9 + neighbor 172.16.0.51 send-community + neighbor 172.16.0.56 remote-as 65342 + neighbor 172.16.0.56 next-hop-self + neighbor 172.16.0.56 update-source Loopback255 + neighbor 172.16.0.56 description nyc002-dz002-vpnv4 + neighbor 172.16.0.56 timers 3 9 + neighbor 172.16.0.56 send-community + neighbor 172.16.0.57 remote-as 65342 + neighbor 172.16.0.57 next-hop-self + neighbor 172.16.0.57 update-source Loopback256 + neighbor 172.16.0.57 description nyc002-dz002-ipv4 + neighbor 172.16.0.57 timers 3 9 + neighbor 172.16.0.57 send-community + neighbor 172.16.0.62 remote-as 65342 + neighbor 172.16.0.62 next-hop-self + neighbor 172.16.0.62 update-source Loopback255 + neighbor 172.16.0.62 description lax001-dz002-vpnv4 + neighbor 172.16.0.62 timers 3 9 + neighbor 172.16.0.62 send-community + neighbor 172.16.0.63 remote-as 65342 + neighbor 172.16.0.63 next-hop-self + neighbor 172.16.0.63 update-source Loopback256 + neighbor 172.16.0.63 description lax001-dz002-ipv4 + neighbor 172.16.0.63 timers 3 9 + neighbor 172.16.0.63 send-community + neighbor 172.16.0.68 remote-as 65342 + neighbor 172.16.0.68 next-hop-self + neighbor 172.16.0.68 update-source Loopback255 + neighbor 172.16.0.68 description sin001-dz002-vpnv4 + neighbor 172.16.0.68 timers 3 9 + neighbor 172.16.0.68 send-community + neighbor 172.16.0.69 remote-as 65342 + neighbor 172.16.0.69 next-hop-self + neighbor 172.16.0.69 update-source Loopback256 + neighbor 172.16.0.69 description sin001-dz002-ipv4 + neighbor 172.16.0.69 timers 3 9 + neighbor 172.16.0.69 send-community + neighbor 172.16.0.70 remote-as 65342 + neighbor 172.16.0.70 next-hop-self + neighbor 172.16.0.70 update-source Loopback255 + neighbor 172.16.0.70 description tyo002-dz002-vpnv4 + neighbor 172.16.0.70 timers 3 9 + neighbor 172.16.0.70 send-community + neighbor 172.16.0.71 remote-as 65342 + neighbor 172.16.0.71 next-hop-self + neighbor 172.16.0.71 update-source Loopback256 + neighbor 172.16.0.71 description tyo002-dz002-ipv4 + neighbor 172.16.0.71 timers 3 9 + neighbor 172.16.0.71 send-community + neighbor 172.16.0.72 remote-as 65342 + neighbor 172.16.0.72 next-hop-self + neighbor 172.16.0.72 update-source Loopback256 + neighbor 172.16.0.72 description dz-chi-sw01-ipv4 + neighbor 172.16.0.72 timers 3 9 + neighbor 172.16.0.72 send-community + neighbor 172.16.0.73 remote-as 65342 + neighbor 172.16.0.73 next-hop-self + neighbor 172.16.0.73 update-source Loopback255 + neighbor 172.16.0.73 description hkg001-dz001-vpnv4 + neighbor 172.16.0.73 timers 3 9 + neighbor 172.16.0.73 send-community + neighbor 172.16.0.74 remote-as 65342 + neighbor 172.16.0.74 next-hop-self + neighbor 172.16.0.74 update-source Loopback256 + neighbor 172.16.0.74 description hkg001-dz001-ipv4 + neighbor 172.16.0.74 timers 3 9 + neighbor 172.16.0.74 send-community + neighbor 172.16.0.75 remote-as 65342 + neighbor 172.16.0.75 next-hop-self + neighbor 172.16.0.75 update-source Loopback255 + neighbor 172.16.0.75 description hkg001-dz002-vpnv4 + neighbor 172.16.0.75 timers 3 9 + neighbor 172.16.0.75 send-community + neighbor 172.16.0.76 remote-as 65342 + neighbor 172.16.0.76 next-hop-self + neighbor 172.16.0.76 update-source Loopback255 + neighbor 172.16.0.76 description chi001-dz001-vpnv4 + neighbor 172.16.0.76 timers 3 9 + neighbor 172.16.0.76 send-community + neighbor 172.16.0.77 remote-as 65342 + neighbor 172.16.0.77 next-hop-self + neighbor 172.16.0.77 update-source Loopback255 + neighbor 172.16.0.77 description chi001-dz002-vpnv4 + neighbor 172.16.0.77 timers 3 9 + neighbor 172.16.0.77 send-community + neighbor 172.16.0.78 remote-as 65342 + neighbor 172.16.0.78 next-hop-self + neighbor 172.16.0.78 update-source Loopback255 + neighbor 172.16.0.78 description dz-lax-sw01-vpnv4 + neighbor 172.16.0.78 timers 3 9 + neighbor 172.16.0.78 send-community + neighbor 172.16.0.79 remote-as 65342 + neighbor 172.16.0.79 next-hop-self + neighbor 172.16.0.79 update-source Loopback255 + neighbor 172.16.0.79 description bom001-dz001-vpnv4 + neighbor 172.16.0.79 timers 3 9 + neighbor 172.16.0.79 send-community + neighbor 172.16.0.80 remote-as 65342 + neighbor 172.16.0.80 next-hop-self + neighbor 172.16.0.80 update-source Loopback256 + neighbor 172.16.0.80 description bom001-dz001-ipv4 + neighbor 172.16.0.80 timers 3 9 + neighbor 172.16.0.80 send-community + neighbor 172.16.0.81 remote-as 65342 + neighbor 172.16.0.81 next-hop-self + neighbor 172.16.0.81 update-source Loopback256 + neighbor 172.16.0.81 description dz-lax-sw01-ipv4 + neighbor 172.16.0.81 timers 3 9 + neighbor 172.16.0.81 send-community + neighbor 172.16.0.82 remote-as 65342 + neighbor 172.16.0.82 next-hop-self + neighbor 172.16.0.82 update-source Loopback255 + neighbor 172.16.0.82 description bom001-dz002-vpnv4 + neighbor 172.16.0.82 timers 3 9 + neighbor 172.16.0.82 send-community + neighbor 172.16.0.83 remote-as 65342 + neighbor 172.16.0.83 next-hop-self + neighbor 172.16.0.83 update-source Loopback256 + neighbor 172.16.0.83 description bom001-dz002-ipv4 + neighbor 172.16.0.83 timers 3 9 + neighbor 172.16.0.83 send-community + neighbor 172.16.0.86 remote-as 65342 + neighbor 172.16.0.86 next-hop-self + neighbor 172.16.0.86 update-source Loopback255 + neighbor 172.16.0.86 description dz-ams-01-vpnv4 + neighbor 172.16.0.86 timers 3 9 + neighbor 172.16.0.86 send-community + neighbor 172.16.0.87 remote-as 65342 + neighbor 172.16.0.87 next-hop-self + neighbor 172.16.0.87 update-source Loopback255 + neighbor 172.16.0.87 description dfw001-dz001-vpnv4 + neighbor 172.16.0.87 timers 3 9 + neighbor 172.16.0.87 send-community + neighbor 172.16.0.88 remote-as 65342 + neighbor 172.16.0.88 next-hop-self + neighbor 172.16.0.88 update-source Loopback256 + neighbor 172.16.0.88 description dfw001-dz001-ipv4 + neighbor 172.16.0.88 timers 3 9 + neighbor 172.16.0.88 send-community + neighbor 172.16.0.89 remote-as 65342 + neighbor 172.16.0.89 next-hop-self + neighbor 172.16.0.89 update-source Loopback255 + neighbor 172.16.0.89 description dz-fra-01-vpnv4 + neighbor 172.16.0.89 timers 3 9 + neighbor 172.16.0.89 send-community + neighbor 172.16.0.92 remote-as 65342 + neighbor 172.16.0.92 next-hop-self + neighbor 172.16.0.92 update-source Loopback255 + neighbor 172.16.0.92 description dfw001-dz002-vpnv4 + neighbor 172.16.0.92 timers 3 9 + neighbor 172.16.0.92 send-community + neighbor 172.16.0.93 remote-as 65342 + neighbor 172.16.0.93 next-hop-self + neighbor 172.16.0.93 update-source Loopback256 + neighbor 172.16.0.93 description dfw001-dz002-ipv4 + neighbor 172.16.0.93 timers 3 9 + neighbor 172.16.0.93 send-community + neighbor 172.16.0.94 remote-as 65342 + neighbor 172.16.0.94 next-hop-self + neighbor 172.16.0.94 update-source Loopback255 + neighbor 172.16.0.94 description dz-sxb-01-vpnv4 + neighbor 172.16.0.94 timers 3 9 + neighbor 172.16.0.94 send-community + neighbor 172.16.0.95 remote-as 65342 + neighbor 172.16.0.95 next-hop-self + neighbor 172.16.0.95 update-source Loopback255 + neighbor 172.16.0.95 description sao001-dz001-vpnv4 + neighbor 172.16.0.95 timers 3 9 + neighbor 172.16.0.95 send-community + neighbor 172.16.0.96 remote-as 65342 + neighbor 172.16.0.96 next-hop-self + neighbor 172.16.0.96 update-source Loopback256 + neighbor 172.16.0.96 description sao001-dz001-ipv4 + neighbor 172.16.0.96 timers 3 9 + neighbor 172.16.0.96 send-community + neighbor 172.16.0.97 remote-as 65342 + neighbor 172.16.0.97 next-hop-self + neighbor 172.16.0.97 update-source Loopback255 + neighbor 172.16.0.97 description dz-waw-01-vpnv4 + neighbor 172.16.0.97 timers 3 9 + neighbor 172.16.0.97 send-community + neighbor 172.16.0.100 remote-as 65342 + neighbor 172.16.0.100 next-hop-self + neighbor 172.16.0.100 update-source Loopback255 + neighbor 172.16.0.100 description sao001-dz002-vpnv4 + neighbor 172.16.0.100 timers 3 9 + neighbor 172.16.0.100 send-community + neighbor 172.16.0.101 remote-as 65342 + neighbor 172.16.0.101 next-hop-self + neighbor 172.16.0.101 update-source Loopback256 + neighbor 172.16.0.101 description sao001-dz002-ipv4 + neighbor 172.16.0.101 timers 3 9 + neighbor 172.16.0.101 send-community + neighbor 172.16.0.106 remote-as 65342 + neighbor 172.16.0.106 next-hop-self + neighbor 172.16.0.106 update-source Loopback256 + neighbor 172.16.0.106 description dz-ams-01-ipv4 + neighbor 172.16.0.106 timers 3 9 + neighbor 172.16.0.106 send-community + neighbor 172.16.0.107 remote-as 65342 + neighbor 172.16.0.107 next-hop-self + neighbor 172.16.0.107 update-source Loopback255 + neighbor 172.16.0.107 description ams001-dz002-vpnv4 + neighbor 172.16.0.107 timers 3 9 + neighbor 172.16.0.107 send-community + neighbor 172.16.0.108 remote-as 65342 + neighbor 172.16.0.108 next-hop-self + neighbor 172.16.0.108 update-source Loopback256 + neighbor 172.16.0.108 description ams001-dz002-ipv4 + neighbor 172.16.0.108 timers 3 9 + neighbor 172.16.0.108 send-community + neighbor 172.16.0.109 remote-as 65342 + neighbor 172.16.0.109 next-hop-self + neighbor 172.16.0.109 update-source Loopback256 + neighbor 172.16.0.109 description dub001-dz002-ipv4 + neighbor 172.16.0.109 timers 3 9 + neighbor 172.16.0.109 send-community + neighbor 172.16.0.110 remote-as 65342 + neighbor 172.16.0.110 next-hop-self + neighbor 172.16.0.110 update-source Loopback256 + neighbor 172.16.0.110 description was001-dz001-ipv4 + neighbor 172.16.0.110 timers 3 9 + neighbor 172.16.0.110 send-community + neighbor 172.16.0.111 remote-as 65342 + neighbor 172.16.0.111 next-hop-self + neighbor 172.16.0.111 update-source Loopback256 + neighbor 172.16.0.111 description nyc001-dz002-ipv4 + neighbor 172.16.0.111 timers 3 9 + neighbor 172.16.0.111 send-community + neighbor 172.16.0.112 remote-as 65342 + neighbor 172.16.0.112 next-hop-self + neighbor 172.16.0.112 update-source Loopback256 + neighbor 172.16.0.112 description sjc001-dz002-ipv4 + neighbor 172.16.0.112 timers 3 9 + neighbor 172.16.0.112 send-community + neighbor 172.16.0.113 remote-as 65342 + neighbor 172.16.0.113 next-hop-self + neighbor 172.16.0.113 update-source Loopback256 + neighbor 172.16.0.113 description hkg001-dz002-ipv4 + neighbor 172.16.0.113 timers 3 9 + neighbor 172.16.0.113 send-community + neighbor 172.16.0.114 remote-as 65342 + neighbor 172.16.0.114 next-hop-self + neighbor 172.16.0.114 update-source Loopback256 + neighbor 172.16.0.114 description chi001-dz001-ipv4 + neighbor 172.16.0.114 timers 3 9 + neighbor 172.16.0.114 send-community + neighbor 172.16.0.115 remote-as 65342 + neighbor 172.16.0.115 next-hop-self + neighbor 172.16.0.115 update-source Loopback256 + neighbor 172.16.0.115 description chi001-dz002-ipv4 + neighbor 172.16.0.115 timers 3 9 + neighbor 172.16.0.115 send-community + neighbor 172.16.0.140 remote-as 65342 + neighbor 172.16.0.140 next-hop-self + neighbor 172.16.0.140 update-source Loopback255 + neighbor 172.16.0.140 description dz-tor1-sw01-vpnv4 + neighbor 172.16.0.140 timers 3 9 + neighbor 172.16.0.140 send-community + neighbor 172.16.0.141 remote-as 65342 + neighbor 172.16.0.141 next-hop-self + neighbor 172.16.0.141 update-source Loopback256 + neighbor 172.16.0.141 description dz-tor1-sw01-ipv4 + neighbor 172.16.0.141 timers 3 9 + neighbor 172.16.0.141 send-community + neighbor 172.16.0.142 remote-as 65342 + neighbor 172.16.0.142 next-hop-self + neighbor 172.16.0.142 update-source Loopback255 + neighbor 172.16.0.142 description dz-mtl11-sw01-vpnv4 + neighbor 172.16.0.142 timers 3 9 + neighbor 172.16.0.142 send-community + neighbor 172.16.0.143 remote-as 65342 + neighbor 172.16.0.143 next-hop-self + neighbor 172.16.0.143 update-source Loopback256 + neighbor 172.16.0.143 description dz-mtl11-sw01-ipv4 + neighbor 172.16.0.143 timers 3 9 + neighbor 172.16.0.143 send-community + neighbor 172.16.0.148 remote-as 65342 + neighbor 172.16.0.148 next-hop-self + neighbor 172.16.0.148 update-source Loopback255 + neighbor 172.16.0.148 description dz-ny7-sw02-vpnv4 + neighbor 172.16.0.148 timers 3 9 + neighbor 172.16.0.148 send-community + neighbor 172.16.0.149 remote-as 65342 + neighbor 172.16.0.149 next-hop-self + neighbor 172.16.0.149 update-source Loopback256 + neighbor 172.16.0.149 description dz-ny7-sw02-ipv4 + neighbor 172.16.0.149 timers 3 9 + neighbor 172.16.0.149 send-community + neighbor 172.16.0.152 remote-as 65342 + neighbor 172.16.0.152 next-hop-self + neighbor 172.16.0.152 update-source Loopback255 + neighbor 172.16.0.152 description dz-dc10-sw01-vpnv4 + neighbor 172.16.0.152 timers 3 9 + neighbor 172.16.0.152 send-community + neighbor 172.16.0.153 remote-as 65342 + neighbor 172.16.0.153 next-hop-self + neighbor 172.16.0.153 update-source Loopback256 + neighbor 172.16.0.153 description dz-dc10-sw01-ipv4 + neighbor 172.16.0.153 timers 3 9 + neighbor 172.16.0.153 send-community + neighbor 172.16.0.156 remote-as 65342 + neighbor 172.16.0.156 next-hop-self + neighbor 172.16.0.156 update-source Loopback255 + neighbor 172.16.0.156 description dz-ch2-sw01-vpnv4 + neighbor 172.16.0.156 timers 3 9 + neighbor 172.16.0.156 send-community + neighbor 172.16.0.157 remote-as 65342 + neighbor 172.16.0.157 next-hop-self + neighbor 172.16.0.157 update-source Loopback256 + neighbor 172.16.0.157 description dz-ch2-sw01-ipv4 + neighbor 172.16.0.157 timers 3 9 + neighbor 172.16.0.157 send-community + neighbor 172.16.0.158 remote-as 65342 + neighbor 172.16.0.158 next-hop-self + neighbor 172.16.0.158 update-source Loopback255 + neighbor 172.16.0.158 description dz-fr5-sw01-vpnv4 + neighbor 172.16.0.158 timers 3 9 + neighbor 172.16.0.158 send-community + neighbor 172.16.0.159 remote-as 65342 + neighbor 172.16.0.159 next-hop-self + neighbor 172.16.0.159 update-source Loopback256 + neighbor 172.16.0.159 description dz-fr5-sw01-ipv4 + neighbor 172.16.0.159 timers 3 9 + neighbor 172.16.0.159 send-community + neighbor 172.16.0.160 remote-as 65342 + neighbor 172.16.0.160 next-hop-self + neighbor 172.16.0.160 update-source Loopback255 + neighbor 172.16.0.160 description dz-sea10-sw01-vpnv4 + neighbor 172.16.0.160 timers 3 9 + neighbor 172.16.0.160 send-community + neighbor 172.16.0.161 remote-as 65342 + neighbor 172.16.0.161 next-hop-self + neighbor 172.16.0.161 update-source Loopback256 + neighbor 172.16.0.161 description dz-sea10-sw01-ipv4 + neighbor 172.16.0.161 timers 3 9 + neighbor 172.16.0.161 send-community + neighbor 172.16.0.162 remote-as 65342 + neighbor 172.16.0.162 next-hop-self + neighbor 172.16.0.162 update-source Loopback255 + neighbor 172.16.0.162 description dz-sg1-sw01-vpnv4 + neighbor 172.16.0.162 timers 3 9 + neighbor 172.16.0.162 send-community + neighbor 172.16.0.163 remote-as 65342 + neighbor 172.16.0.163 next-hop-self + neighbor 172.16.0.163 update-source Loopback256 + neighbor 172.16.0.163 description dz-sg1-sw01-ipv4 + neighbor 172.16.0.163 timers 3 9 + neighbor 172.16.0.163 send-community + neighbor 172.16.0.164 remote-as 65342 + neighbor 172.16.0.164 next-hop-self + neighbor 172.16.0.164 update-source Loopback255 + neighbor 172.16.0.164 description dz-ty9-sw01-vpnv4 + neighbor 172.16.0.164 timers 3 9 + neighbor 172.16.0.164 send-community + neighbor 172.16.0.165 remote-as 65342 + neighbor 172.16.0.165 next-hop-self + neighbor 172.16.0.165 update-source Loopback256 + neighbor 172.16.0.165 description dz-ty9-sw01-ipv4 + neighbor 172.16.0.165 timers 3 9 + neighbor 172.16.0.165 send-community + neighbor 172.16.0.238 remote-as 65342 + neighbor 172.16.0.238 next-hop-self + neighbor 172.16.0.238 update-source Loopback255 + neighbor 172.16.0.238 description dgt-dzd-sin-sg3-vpnv4 + neighbor 172.16.0.238 timers 3 9 + neighbor 172.16.0.238 send-community + neighbor 172.16.0.239 remote-as 65342 + neighbor 172.16.0.239 next-hop-self + neighbor 172.16.0.239 update-source Loopback256 + neighbor 172.16.0.239 description dgt-dzd-sin-sg3-ipv4 + neighbor 172.16.0.239 timers 3 9 + neighbor 172.16.0.239 send-community + neighbor 172.16.0.242 remote-as 65342 + neighbor 172.16.0.242 next-hop-self + neighbor 172.16.0.242 update-source Loopback256 + neighbor 172.16.0.242 description dz-fra-01-ipv4 + neighbor 172.16.0.242 timers 3 9 + neighbor 172.16.0.242 send-community + neighbor 172.16.0.243 remote-as 65342 + neighbor 172.16.0.243 next-hop-self + neighbor 172.16.0.243 update-source Loopback256 + neighbor 172.16.0.243 description dz-sxb-01-ipv4 + neighbor 172.16.0.243 timers 3 9 + neighbor 172.16.0.243 send-community + neighbor 172.16.0.244 remote-as 65342 + neighbor 172.16.0.244 next-hop-self + neighbor 172.16.0.244 update-source Loopback256 + neighbor 172.16.0.244 description dz-waw-01-ipv4 + neighbor 172.16.0.244 timers 3 9 + neighbor 172.16.0.244 send-community + neighbor 172.16.0.245 remote-as 65342 + neighbor 172.16.0.245 next-hop-self + neighbor 172.16.0.245 update-source Loopback255 + neighbor 172.16.0.245 description dz-slc-sw01-vpnv4 + neighbor 172.16.0.245 timers 3 9 + neighbor 172.16.0.245 send-community + neighbor 172.16.0.254 remote-as 65342 + neighbor 172.16.0.254 next-hop-self + neighbor 172.16.0.254 update-source Loopback256 + neighbor 172.16.0.254 description dz-slc-sw01-ipv4 + neighbor 172.16.0.254 timers 3 9 + neighbor 172.16.0.254 send-community + neighbor 172.16.0.255 remote-as 65342 + neighbor 172.16.0.255 next-hop-self + neighbor 172.16.0.255 update-source Loopback256 + neighbor 172.16.0.255 description cherlita-ipv4 + neighbor 172.16.0.255 timers 3 9 + neighbor 172.16.0.255 send-community + neighbor 172.16.1.6 remote-as 65342 + neighbor 172.16.1.6 next-hop-self + neighbor 172.16.1.6 update-source Loopback256 + neighbor 172.16.1.6 description frankry-ipv4 + neighbor 172.16.1.6 timers 3 9 + neighbor 172.16.1.6 send-community + neighbor 172.16.1.7 remote-as 65342 + neighbor 172.16.1.7 next-hop-self + neighbor 172.16.1.7 update-source Loopback255 + neighbor 172.16.1.7 description cherlita-vpnv4 + neighbor 172.16.1.7 timers 3 9 + neighbor 172.16.1.7 send-community + neighbor 172.16.1.10 remote-as 65342 + neighbor 172.16.1.10 next-hop-self + neighbor 172.16.1.10 update-source Loopback255 + neighbor 172.16.1.10 description frankry-vpnv4 + neighbor 172.16.1.10 timers 3 9 + neighbor 172.16.1.10 send-community + neighbor 172.16.1.11 remote-as 65342 + neighbor 172.16.1.11 next-hop-self + neighbor 172.16.1.11 update-source Loopback255 + neighbor 172.16.1.11 description dz-muc-01-vpnv4 + neighbor 172.16.1.11 timers 3 9 + neighbor 172.16.1.11 send-community + neighbor 172.16.1.14 remote-as 65342 + neighbor 172.16.1.14 next-hop-self + neighbor 172.16.1.14 update-source Loopback256 + neighbor 172.16.1.14 description dz-muc-01-ipv4 + neighbor 172.16.1.14 timers 3 9 + neighbor 172.16.1.14 send-community + neighbor 172.16.1.15 remote-as 65342 + neighbor 172.16.1.15 next-hop-self + neighbor 172.16.1.15 update-source Loopback255 + neighbor 172.16.1.15 description ce2-dzd-001-vpnv4 + neighbor 172.16.1.15 timers 3 9 + neighbor 172.16.1.15 send-community + neighbor 172.16.1.26 remote-as 65342 + neighbor 172.16.1.26 next-hop-self + neighbor 172.16.1.26 update-source Loopback256 + neighbor 172.16.1.26 description ce2-dzd-001-ipv4 + neighbor 172.16.1.26 timers 3 9 + neighbor 172.16.1.26 send-community + neighbor 172.16.1.27 remote-as 65342 + neighbor 172.16.1.27 next-hop-self + neighbor 172.16.1.27 update-source Loopback255 + neighbor 172.16.1.27 description dgt-dzd-nyc-ny2-vpnv4 + neighbor 172.16.1.27 timers 3 9 + neighbor 172.16.1.27 send-community + neighbor 172.16.1.36 remote-as 65342 + neighbor 172.16.1.36 next-hop-self + neighbor 172.16.1.36 update-source Loopback256 + neighbor 172.16.1.36 description dgt-dzd-nyc-ny2-ipv4 + neighbor 172.16.1.36 timers 3 9 + neighbor 172.16.1.36 send-community + neighbor 172.16.1.37 remote-as 65342 + neighbor 172.16.1.37 next-hop-self + neighbor 172.16.1.37 update-source Loopback256 + neighbor 172.16.1.37 description swerry-ipv4 + neighbor 172.16.1.37 timers 3 9 + neighbor 172.16.1.37 send-community + neighbor 172.16.1.40 remote-as 65342 + neighbor 172.16.1.40 next-hop-self + neighbor 172.16.1.40 update-source Loopback256 + neighbor 172.16.1.40 description cherydam-ipv4 + neighbor 172.16.1.40 timers 3 9 + neighbor 172.16.1.40 send-community + neighbor 172.16.1.41 remote-as 65342 + neighbor 172.16.1.41 next-hop-self + neighbor 172.16.1.41 update-source Loopback255 + neighbor 172.16.1.41 description swerry-vpnv4 + neighbor 172.16.1.41 timers 3 9 + neighbor 172.16.1.41 send-community + neighbor 172.16.1.44 remote-as 65342 + neighbor 172.16.1.44 next-hop-self + neighbor 172.16.1.44 update-source Loopback255 + neighbor 172.16.1.44 description cherydam-vpnv4 + neighbor 172.16.1.44 timers 3 9 + neighbor 172.16.1.44 send-community + neighbor 172.16.1.45 remote-as 65342 + neighbor 172.16.1.45 next-hop-self + neighbor 172.16.1.45 update-source Loopback255 + neighbor 172.16.1.45 description dgt-dzd-ash-dc3-vpnv4 + neighbor 172.16.1.45 timers 3 9 + neighbor 172.16.1.45 send-community + neighbor 172.16.1.52 remote-as 65342 + neighbor 172.16.1.52 next-hop-self + neighbor 172.16.1.52 update-source Loopback256 + neighbor 172.16.1.52 description dgt-dzd-ash-dc3-ipv4 + neighbor 172.16.1.52 timers 3 9 + neighbor 172.16.1.52 send-community + neighbor 172.16.1.53 remote-as 65342 + neighbor 172.16.1.53 next-hop-self + neighbor 172.16.1.53 update-source Loopback255 + neighbor 172.16.1.53 description bdc-dzd-001-vpnv4 + neighbor 172.16.1.53 timers 3 9 + neighbor 172.16.1.53 send-community + neighbor 172.16.1.58 remote-as 65342 + neighbor 172.16.1.58 next-hop-self + neighbor 172.16.1.58 update-source Loopback255 + neighbor 172.16.1.58 description dgt-dzd-dal-da3-vpnv4 + neighbor 172.16.1.58 timers 3 9 + neighbor 172.16.1.58 send-community + neighbor 172.16.1.59 remote-as 65342 + neighbor 172.16.1.59 next-hop-self + neighbor 172.16.1.59 update-source Loopback256 + neighbor 172.16.1.59 description dgt-dzd-dal-da3-ipv4 + neighbor 172.16.1.59 timers 3 9 + neighbor 172.16.1.59 send-community + neighbor 172.16.1.64 remote-as 65342 + neighbor 172.16.1.64 next-hop-self + neighbor 172.16.1.64 update-source Loopback256 + neighbor 172.16.1.64 description bdc-dzd-001-ipv4 + neighbor 172.16.1.64 timers 3 9 + neighbor 172.16.1.64 send-community + neighbor 172.16.1.65 remote-as 65342 + neighbor 172.16.1.65 next-hop-self + neighbor 172.16.1.65 update-source Loopback255 + neighbor 172.16.1.65 description dgt-dzd-lon-ths-vpnv4 + neighbor 172.16.1.65 timers 3 9 + neighbor 172.16.1.65 send-community + neighbor 172.16.1.70 remote-as 65342 + neighbor 172.16.1.70 next-hop-self + neighbor 172.16.1.70 update-source Loopback255 + neighbor 172.16.1.70 description dgt-dzd-lax-la2-vpnv4 + neighbor 172.16.1.70 timers 3 9 + neighbor 172.16.1.70 send-community + neighbor 172.16.1.71 remote-as 65342 + neighbor 172.16.1.71 next-hop-self + neighbor 172.16.1.71 update-source Loopback256 + neighbor 172.16.1.71 description dgt-dzd-lon-ths-ipv4 + neighbor 172.16.1.71 timers 3 9 + neighbor 172.16.1.71 send-community + neighbor 172.16.1.72 remote-as 65342 + neighbor 172.16.1.72 next-hop-self + neighbor 172.16.1.72 update-source Loopback255 + neighbor 172.16.1.72 description dgt-dzd-fra-fr5-vpnv4 + neighbor 172.16.1.72 timers 3 9 + neighbor 172.16.1.72 send-community + neighbor 172.16.1.73 remote-as 65342 + neighbor 172.16.1.73 next-hop-self + neighbor 172.16.1.73 update-source Loopback256 + neighbor 172.16.1.73 description dgt-dzd-lax-la2-ipv4 + neighbor 172.16.1.73 timers 3 9 + neighbor 172.16.1.73 send-community + neighbor 172.16.1.86 remote-as 65342 + neighbor 172.16.1.86 next-hop-self + neighbor 172.16.1.86 update-source Loopback256 + neighbor 172.16.1.86 description dgt-dzd-fra-fr5-ipv4 + neighbor 172.16.1.86 timers 3 9 + neighbor 172.16.1.86 send-community + neighbor 172.16.1.87 remote-as 65342 + neighbor 172.16.1.87 next-hop-self + neighbor 172.16.1.87 update-source Loopback255 + neighbor 172.16.1.87 description dgt-dzd-ams-ams1-vpnv4 + neighbor 172.16.1.87 timers 3 9 + neighbor 172.16.1.87 send-community + neighbor 172.16.1.90 remote-as 65342 + neighbor 172.16.1.90 next-hop-self + neighbor 172.16.1.90 update-source Loopback256 + neighbor 172.16.1.90 description dgt-dzd-ams-ams1-ipv4 + neighbor 172.16.1.90 timers 3 9 + neighbor 172.16.1.90 send-community + neighbor 172.16.1.91 remote-as 65342 + neighbor 172.16.1.91 next-hop-self + neighbor 172.16.1.91 update-source Loopback255 + neighbor 172.16.1.91 description dz100a-slc1-tsw-vpnv4 + neighbor 172.16.1.91 timers 3 9 + neighbor 172.16.1.91 send-community + neighbor 172.16.1.98 remote-as 65342 + neighbor 172.16.1.98 next-hop-self + neighbor 172.16.1.98 update-source Loopback255 + neighbor 172.16.1.98 description dgt-dzd-dub-db2-vpnv4 + neighbor 172.16.1.98 timers 3 9 + neighbor 172.16.1.98 send-community + neighbor 172.16.1.99 remote-as 65342 + neighbor 172.16.1.99 next-hop-self + neighbor 172.16.1.99 update-source Loopback255 + neighbor 172.16.1.99 description swdzd01-lon2-vpnv4 + neighbor 172.16.1.99 timers 3 9 + neighbor 172.16.1.99 send-community + neighbor 172.16.1.100 remote-as 65342 + neighbor 172.16.1.100 next-hop-self + neighbor 172.16.1.100 update-source Loopback256 + neighbor 172.16.1.100 description dz100a-slc1-tsw-ipv4 + neighbor 172.16.1.100 timers 3 9 + neighbor 172.16.1.100 send-community + neighbor 172.16.1.101 remote-as 65342 + neighbor 172.16.1.101 next-hop-self + neighbor 172.16.1.101 update-source Loopback256 + neighbor 172.16.1.101 description dgt-dzd-dub-db2-ipv4 + neighbor 172.16.1.101 timers 3 9 + neighbor 172.16.1.101 send-community + neighbor 172.16.1.110 remote-as 65342 + neighbor 172.16.1.110 next-hop-self + neighbor 172.16.1.110 update-source Loopback255 + neighbor 172.16.1.110 description laconic-mia-sw01-vpnv4 + neighbor 172.16.1.110 timers 3 9 + neighbor 172.16.1.110 send-community + neighbor 172.16.1.111 remote-as 65342 + neighbor 172.16.1.111 next-hop-self + neighbor 172.16.1.111 update-source Loopback255 + neighbor 172.16.1.111 description dgt-dzd-mrs-mrs1-vpnv4 + neighbor 172.16.1.111 timers 3 9 + neighbor 172.16.1.111 send-community + neighbor 172.16.1.114 remote-as 65342 + neighbor 172.16.1.114 next-hop-self + neighbor 172.16.1.114 update-source Loopback256 + neighbor 172.16.1.114 description dgt-dzd-mrs-mrs1-ipv4 + neighbor 172.16.1.114 timers 3 9 + neighbor 172.16.1.114 send-community + neighbor 172.16.1.115 remote-as 65342 + neighbor 172.16.1.115 next-hop-self + neighbor 172.16.1.115 update-source Loopback256 + neighbor 172.16.1.115 description swdzd01-lon2-ipv4 + neighbor 172.16.1.115 timers 3 9 + neighbor 172.16.1.115 send-community + neighbor 172.16.1.130 remote-as 65342 + neighbor 172.16.1.130 next-hop-self + neighbor 172.16.1.130 update-source Loopback255 + neighbor 172.16.1.130 description lts-dzd-001-vpnv4 + neighbor 172.16.1.130 timers 3 9 + neighbor 172.16.1.130 send-community + neighbor 172.16.1.131 remote-as 65342 + neighbor 172.16.1.131 next-hop-self + neighbor 172.16.1.131 update-source Loopback256 + neighbor 172.16.1.131 description lts-dzd-001-ipv4 + neighbor 172.16.1.131 timers 3 9 + neighbor 172.16.1.131 send-community + neighbor 172.16.1.138 remote-as 65342 + neighbor 172.16.1.138 next-hop-self + neighbor 172.16.1.138 update-source Loopback256 + neighbor 172.16.1.138 description dz-slc2-sw01-ipv4 + neighbor 172.16.1.138 timers 3 9 + neighbor 172.16.1.138 send-community + neighbor 172.16.1.139 remote-as 65342 + neighbor 172.16.1.139 next-hop-self + neighbor 172.16.1.139 update-source Loopback255 + neighbor 172.16.1.139 description dz-slc2-sw01-vpnv4 + neighbor 172.16.1.139 timers 3 9 + neighbor 172.16.1.139 send-community + neighbor 172.16.1.144 remote-as 65342 + neighbor 172.16.1.144 next-hop-self + neighbor 172.16.1.144 update-source Loopback255 + neighbor 172.16.1.144 description au1c-dz01-vpnv4 + neighbor 172.16.1.144 timers 3 9 + neighbor 172.16.1.144 send-community + neighbor 172.16.1.145 remote-as 65342 + neighbor 172.16.1.145 next-hop-self + neighbor 172.16.1.145 update-source Loopback256 + neighbor 172.16.1.145 description au1c-dz01-ipv4 + neighbor 172.16.1.145 timers 3 9 + neighbor 172.16.1.145 send-community + neighbor 172.16.1.146 remote-as 65342 + neighbor 172.16.1.146 next-hop-self + neighbor 172.16.1.146 update-source Loopback255 + neighbor 172.16.1.146 description la2r-dz01-vpnv4 + neighbor 172.16.1.146 timers 3 9 + neighbor 172.16.1.146 send-community + neighbor 172.16.1.147 remote-as 65342 + neighbor 172.16.1.147 next-hop-self + neighbor 172.16.1.147 update-source Loopback256 + neighbor 172.16.1.147 description la2r-dz01-ipv4 + neighbor 172.16.1.147 timers 3 9 + neighbor 172.16.1.147 send-community + neighbor 172.16.1.156 remote-as 65342 + neighbor 172.16.1.156 next-hop-self + neighbor 172.16.1.156 update-source Loopback255 + neighbor 172.16.1.156 description dz100a-lax1-tsw-vpnv4 + neighbor 172.16.1.156 timers 3 9 + neighbor 172.16.1.156 send-community + neighbor 172.16.1.157 remote-as 65342 + neighbor 172.16.1.157 next-hop-self + neighbor 172.16.1.157 update-source Loopback256 + neighbor 172.16.1.157 description laconic-mia-sw01-ipv4 + neighbor 172.16.1.157 timers 3 9 + neighbor 172.16.1.157 send-community + neighbor 172.16.1.159 remote-as 65342 + neighbor 172.16.1.159 next-hop-self + neighbor 172.16.1.159 update-source Loopback256 + neighbor 172.16.1.159 description dz100a-lax1-tsw-ipv4 + neighbor 172.16.1.159 timers 3 9 + neighbor 172.16.1.159 send-community + neighbor 172.16.1.163 remote-as 65342 + neighbor 172.16.1.163 next-hop-self + neighbor 172.16.1.163 update-source Loopback255 + neighbor 172.16.1.163 description dz100a-sea1-tsw-vpnv4 + neighbor 172.16.1.163 timers 3 9 + neighbor 172.16.1.163 send-community + neighbor 172.16.1.166 remote-as 65342 + neighbor 172.16.1.166 next-hop-self + neighbor 172.16.1.166 update-source Loopback256 + neighbor 172.16.1.166 description dz100a-sea1-tsw-ipv4 + neighbor 172.16.1.166 timers 3 9 + neighbor 172.16.1.166 send-community + neighbor 172.16.1.167 remote-as 65342 + neighbor 172.16.1.167 next-hop-self + neighbor 172.16.1.167 update-source Loopback255 + neighbor 172.16.1.167 description laconic-dfw-sw01-vpnv4 + neighbor 172.16.1.167 timers 3 9 + neighbor 172.16.1.167 send-community + neighbor 172.16.1.170 remote-as 65342 + neighbor 172.16.1.170 next-hop-self + neighbor 172.16.1.170 update-source Loopback255 + neighbor 172.16.1.170 description dz100a-ewr1-tsw-vpnv4 + neighbor 172.16.1.170 timers 3 9 + neighbor 172.16.1.170 send-community + neighbor 172.16.1.171 remote-as 65342 + neighbor 172.16.1.171 next-hop-self + neighbor 172.16.1.171 update-source Loopback256 + neighbor 172.16.1.171 description dz100a-dal1-tsw-ipv4 + neighbor 172.16.1.171 timers 3 9 + neighbor 172.16.1.171 send-community + neighbor 172.16.1.172 remote-as 65342 + neighbor 172.16.1.172 next-hop-self + neighbor 172.16.1.172 update-source Loopback256 + neighbor 172.16.1.172 description laconic-dfw-sw01-ipv4 + neighbor 172.16.1.172 timers 3 9 + neighbor 172.16.1.172 send-community + neighbor 172.16.1.173 remote-as 65342 + neighbor 172.16.1.173 next-hop-self + neighbor 172.16.1.173 update-source Loopback255 + neighbor 172.16.1.173 description dz100a-iad1-tsw-vpnv4 + neighbor 172.16.1.173 timers 3 9 + neighbor 172.16.1.173 send-community + neighbor 172.16.1.176 remote-as 65342 + neighbor 172.16.1.176 next-hop-self + neighbor 172.16.1.176 update-source Loopback256 + neighbor 172.16.1.176 description dz100a-ewr1-tsw-ipv4 + neighbor 172.16.1.176 timers 3 9 + neighbor 172.16.1.176 send-community + neighbor 172.16.1.177 remote-as 65342 + neighbor 172.16.1.177 next-hop-self + neighbor 172.16.1.177 update-source Loopback255 + neighbor 172.16.1.177 description dz100a-dal1-tsw-vpnv4 + neighbor 172.16.1.177 timers 3 9 + neighbor 172.16.1.177 send-community + neighbor 172.16.1.178 remote-as 65342 + neighbor 172.16.1.178 next-hop-self + neighbor 172.16.1.178 update-source Loopback256 + neighbor 172.16.1.178 description dz100a-iad1-tsw-ipv4 + neighbor 172.16.1.178 timers 3 9 + neighbor 172.16.1.178 send-community + neighbor 172.16.1.179 remote-as 65342 + neighbor 172.16.1.179 next-hop-self + neighbor 172.16.1.179 update-source Loopback255 + neighbor 172.16.1.179 description fra-velia-vpnv4 + neighbor 172.16.1.179 timers 3 9 + neighbor 172.16.1.179 send-community + neighbor 172.16.1.182 remote-as 65342 + neighbor 172.16.1.182 next-hop-self + neighbor 172.16.1.182 update-source Loopback255 + neighbor 172.16.1.182 description dz103a-lon1-tsw-vpnv4 + neighbor 172.16.1.182 timers 3 9 + neighbor 172.16.1.182 send-community + neighbor 172.16.1.183 remote-as 65342 + neighbor 172.16.1.183 next-hop-self + neighbor 172.16.1.183 update-source Loopback256 + neighbor 172.16.1.183 description fra-velia-ipv4 + neighbor 172.16.1.183 timers 3 9 + neighbor 172.16.1.183 send-community + neighbor 172.16.1.184 remote-as 65342 + neighbor 172.16.1.184 next-hop-self + neighbor 172.16.1.184 update-source Loopback256 + neighbor 172.16.1.184 description dz100a-chi1-tsw-ipv4 + neighbor 172.16.1.184 timers 3 9 + neighbor 172.16.1.184 send-community + neighbor 172.16.1.185 remote-as 65342 + neighbor 172.16.1.185 next-hop-self + neighbor 172.16.1.185 update-source Loopback256 + neighbor 172.16.1.185 description dz103a-lon1-tsw-ipv4 + neighbor 172.16.1.185 timers 3 9 + neighbor 172.16.1.185 send-community + neighbor 172.16.1.186 remote-as 65342 + neighbor 172.16.1.186 next-hop-self + neighbor 172.16.1.186 update-source Loopback255 + neighbor 172.16.1.186 description allnodes-fra1-vpnv4 + neighbor 172.16.1.186 timers 3 9 + neighbor 172.16.1.186 send-community + neighbor 172.16.1.187 remote-as 65342 + neighbor 172.16.1.187 next-hop-self + neighbor 172.16.1.187 update-source Loopback255 + neighbor 172.16.1.187 description dz100a-fra2-tsw-vpnv4 + neighbor 172.16.1.187 timers 3 9 + neighbor 172.16.1.187 send-community + neighbor 172.16.1.190 remote-as 65342 + neighbor 172.16.1.190 next-hop-self + neighbor 172.16.1.190 update-source Loopback256 + neighbor 172.16.1.190 description dz100a-fra2-tsw-ipv4 + neighbor 172.16.1.190 timers 3 9 + neighbor 172.16.1.190 send-community + neighbor 172.16.1.191 remote-as 65342 + neighbor 172.16.1.191 next-hop-self + neighbor 172.16.1.191 update-source Loopback255 + neighbor 172.16.1.191 description dz100a-chi1-tsw-vpnv4 + neighbor 172.16.1.191 timers 3 9 + neighbor 172.16.1.191 send-community + neighbor 172.16.1.194 remote-as 65342 + neighbor 172.16.1.194 next-hop-self + neighbor 172.16.1.194 update-source Loopback255 + neighbor 172.16.1.194 description dz100a-ams2-tsw-vpnv4 + neighbor 172.16.1.194 timers 3 9 + neighbor 172.16.1.194 send-community + neighbor 172.16.1.195 remote-as 65342 + neighbor 172.16.1.195 next-hop-self + neighbor 172.16.1.195 update-source Loopback256 + neighbor 172.16.1.195 description allnodes-fra1-ipv4 + neighbor 172.16.1.195 timers 3 9 + neighbor 172.16.1.195 send-community + neighbor 172.16.1.196 remote-as 65342 + neighbor 172.16.1.196 next-hop-self + neighbor 172.16.1.196 update-source Loopback255 + neighbor 172.16.1.196 description dzd-fra-01-vpnv4 + neighbor 172.16.1.196 timers 3 9 + neighbor 172.16.1.196 send-community + neighbor 172.16.1.197 remote-as 65342 + neighbor 172.16.1.197 next-hop-self + neighbor 172.16.1.197 update-source Loopback256 + neighbor 172.16.1.197 description dz100a-ams2-tsw-ipv4 + neighbor 172.16.1.197 timers 3 9 + neighbor 172.16.1.197 send-community + neighbor 172.16.1.200 remote-as 65342 + neighbor 172.16.1.200 next-hop-self + neighbor 172.16.1.200 update-source Loopback255 + neighbor 172.16.1.200 description dzd-tok-01-vpnv4 + neighbor 172.16.1.200 timers 3 9 + neighbor 172.16.1.200 send-community + neighbor 172.16.1.201 remote-as 65342 + neighbor 172.16.1.201 next-hop-self + neighbor 172.16.1.201 update-source Loopback255 + neighbor 172.16.1.201 description dz115a-tyo2-tsw-vpnv4 + neighbor 172.16.1.201 timers 3 9 + neighbor 172.16.1.201 send-community + neighbor 172.16.1.204 remote-as 65342 + neighbor 172.16.1.204 next-hop-self + neighbor 172.16.1.204 update-source Loopback256 + neighbor 172.16.1.204 description dz115a-tyo2-tsw-ipv4 + neighbor 172.16.1.204 timers 3 9 + neighbor 172.16.1.204 send-community + neighbor 172.16.1.205 remote-as 65342 + neighbor 172.16.1.205 next-hop-self + neighbor 172.16.1.205 update-source Loopback256 + neighbor 172.16.1.205 description dzd-tok-01-ipv4 + neighbor 172.16.1.205 timers 3 9 + neighbor 172.16.1.205 send-community + neighbor 172.16.1.216 remote-as 65342 + neighbor 172.16.1.216 next-hop-self + neighbor 172.16.1.216 update-source Loopback255 + neighbor 172.16.1.216 description dgt-dzd-tyo-ty8-vpnv4 + neighbor 172.16.1.216 timers 3 9 + neighbor 172.16.1.216 send-community + neighbor 172.16.1.217 remote-as 65342 + neighbor 172.16.1.217 next-hop-self + neighbor 172.16.1.217 update-source Loopback256 + neighbor 172.16.1.217 description dzd-fra-01-ipv4 + neighbor 172.16.1.217 timers 3 9 + neighbor 172.16.1.217 send-community + neighbor 172.16.1.219 remote-as 65342 + neighbor 172.16.1.219 next-hop-self + neighbor 172.16.1.219 update-source Loopback256 + neighbor 172.16.1.219 description dgt-dzd-tyo-ty8-ipv4 + neighbor 172.16.1.219 timers 3 9 + neighbor 172.16.1.219 send-community + neighbor 172.16.1.222 remote-as 65342 + neighbor 172.16.1.222 next-hop-self + neighbor 172.16.1.222 update-source Loopback255 + neighbor 172.16.1.222 description dz100a-sgp1-tsw-vpnv4 + neighbor 172.16.1.222 timers 3 9 + neighbor 172.16.1.222 send-community + neighbor 172.16.1.223 remote-as 65342 + neighbor 172.16.1.223 next-hop-self + neighbor 172.16.1.223 update-source Loopback256 + neighbor 172.16.1.223 description dz100a-sgp1-tsw-ipv4 + neighbor 172.16.1.223 timers 3 9 + neighbor 172.16.1.223 send-community + ! + address-family ipv4 + no neighbor 172.16.0.1 activate + neighbor 172.16.0.2 activate + no neighbor 172.16.0.3 activate + neighbor 172.16.0.4 activate + no neighbor 172.16.0.5 activate + neighbor 172.16.0.6 activate + no neighbor 172.16.0.7 activate + neighbor 172.16.0.8 activate + no neighbor 172.16.0.9 activate + neighbor 172.16.0.14 activate + no neighbor 172.16.0.15 activate + no neighbor 172.16.0.16 activate + neighbor 172.16.0.17 activate + neighbor 172.16.0.20 activate + no neighbor 172.16.0.21 activate + neighbor 172.16.0.22 activate + no neighbor 172.16.0.23 activate + no neighbor 172.16.0.24 activate + neighbor 172.16.0.25 activate + no neighbor 172.16.0.26 activate + no neighbor 172.16.0.27 activate + neighbor 172.16.0.28 activate + neighbor 172.16.0.29 activate + no neighbor 172.16.0.30 activate + neighbor 172.16.0.31 activate + no neighbor 172.16.0.32 activate + no neighbor 172.16.0.33 activate + no neighbor 172.16.0.34 activate + neighbor 172.16.0.35 activate + no neighbor 172.16.0.36 activate + neighbor 172.16.0.37 activate + no neighbor 172.16.0.38 activate + neighbor 172.16.0.39 activate + no neighbor 172.16.0.42 activate + neighbor 172.16.0.43 activate + no neighbor 172.16.0.46 activate + no neighbor 172.16.0.47 activate + neighbor 172.16.0.48 activate + no neighbor 172.16.0.49 activate + neighbor 172.16.0.50 activate + no neighbor 172.16.0.51 activate + no neighbor 172.16.0.56 activate + neighbor 172.16.0.57 activate + no neighbor 172.16.0.62 activate + neighbor 172.16.0.63 activate + no neighbor 172.16.0.68 activate + neighbor 172.16.0.69 activate + no neighbor 172.16.0.70 activate + neighbor 172.16.0.71 activate + neighbor 172.16.0.72 activate + no neighbor 172.16.0.73 activate + neighbor 172.16.0.74 activate + no neighbor 172.16.0.75 activate + no neighbor 172.16.0.76 activate + no neighbor 172.16.0.77 activate + no neighbor 172.16.0.78 activate + no neighbor 172.16.0.79 activate + neighbor 172.16.0.80 activate + neighbor 172.16.0.81 activate + no neighbor 172.16.0.82 activate + neighbor 172.16.0.83 activate + no neighbor 172.16.0.86 activate + no neighbor 172.16.0.87 activate + neighbor 172.16.0.88 activate + no neighbor 172.16.0.89 activate + no neighbor 172.16.0.92 activate + neighbor 172.16.0.93 activate + no neighbor 172.16.0.94 activate + no neighbor 172.16.0.95 activate + neighbor 172.16.0.96 activate + no neighbor 172.16.0.97 activate + no neighbor 172.16.0.100 activate + neighbor 172.16.0.101 activate + neighbor 172.16.0.106 activate + no neighbor 172.16.0.107 activate + neighbor 172.16.0.108 activate + neighbor 172.16.0.109 activate + neighbor 172.16.0.110 activate + neighbor 172.16.0.111 activate + neighbor 172.16.0.112 activate + neighbor 172.16.0.113 activate + neighbor 172.16.0.114 activate + neighbor 172.16.0.115 activate + no neighbor 172.16.0.140 activate + neighbor 172.16.0.141 activate + no neighbor 172.16.0.142 activate + neighbor 172.16.0.143 activate + no neighbor 172.16.0.148 activate + neighbor 172.16.0.149 activate + no neighbor 172.16.0.152 activate + neighbor 172.16.0.153 activate + no neighbor 172.16.0.156 activate + neighbor 172.16.0.157 activate + no neighbor 172.16.0.158 activate + neighbor 172.16.0.159 activate + no neighbor 172.16.0.160 activate + neighbor 172.16.0.161 activate + no neighbor 172.16.0.162 activate + neighbor 172.16.0.163 activate + no neighbor 172.16.0.164 activate + neighbor 172.16.0.165 activate + no neighbor 172.16.0.238 activate + neighbor 172.16.0.239 activate + neighbor 172.16.0.242 activate + neighbor 172.16.0.243 activate + neighbor 172.16.0.244 activate + no neighbor 172.16.0.245 activate + neighbor 172.16.0.254 activate + neighbor 172.16.0.255 activate + neighbor 172.16.1.6 activate + no neighbor 172.16.1.7 activate + no neighbor 172.16.1.10 activate + no neighbor 172.16.1.11 activate + neighbor 172.16.1.14 activate + no neighbor 172.16.1.15 activate + neighbor 172.16.1.26 activate + no neighbor 172.16.1.27 activate + neighbor 172.16.1.36 activate + neighbor 172.16.1.37 activate + neighbor 172.16.1.40 activate + no neighbor 172.16.1.41 activate + no neighbor 172.16.1.44 activate + no neighbor 172.16.1.45 activate + neighbor 172.16.1.52 activate + no neighbor 172.16.1.53 activate + no neighbor 172.16.1.58 activate + neighbor 172.16.1.59 activate + neighbor 172.16.1.64 activate + no neighbor 172.16.1.65 activate + no neighbor 172.16.1.70 activate + neighbor 172.16.1.71 activate + no neighbor 172.16.1.72 activate + neighbor 172.16.1.73 activate + neighbor 172.16.1.86 activate + no neighbor 172.16.1.87 activate + neighbor 172.16.1.90 activate + no neighbor 172.16.1.91 activate + no neighbor 172.16.1.98 activate + no neighbor 172.16.1.99 activate + neighbor 172.16.1.100 activate + neighbor 172.16.1.101 activate + no neighbor 172.16.1.110 activate + no neighbor 172.16.1.111 activate + neighbor 172.16.1.114 activate + neighbor 172.16.1.115 activate + no neighbor 172.16.1.130 activate + neighbor 172.16.1.131 activate + neighbor 172.16.1.138 activate + no neighbor 172.16.1.139 activate + no neighbor 172.16.1.144 activate + neighbor 172.16.1.145 activate + no neighbor 172.16.1.146 activate + neighbor 172.16.1.147 activate + no neighbor 172.16.1.156 activate + neighbor 172.16.1.157 activate + neighbor 172.16.1.159 activate + no neighbor 172.16.1.163 activate + neighbor 172.16.1.166 activate + no neighbor 172.16.1.167 activate + no neighbor 172.16.1.170 activate + neighbor 172.16.1.171 activate + neighbor 172.16.1.172 activate + no neighbor 172.16.1.173 activate + neighbor 172.16.1.176 activate + no neighbor 172.16.1.177 activate + neighbor 172.16.1.178 activate + no neighbor 172.16.1.179 activate + no neighbor 172.16.1.182 activate + neighbor 172.16.1.183 activate + neighbor 172.16.1.184 activate + neighbor 172.16.1.185 activate + no neighbor 172.16.1.186 activate + no neighbor 172.16.1.187 activate + neighbor 172.16.1.190 activate + no neighbor 172.16.1.191 activate + no neighbor 172.16.1.194 activate + neighbor 172.16.1.195 activate + no neighbor 172.16.1.196 activate + neighbor 172.16.1.197 activate + no neighbor 172.16.1.200 activate + no neighbor 172.16.1.201 activate + neighbor 172.16.1.204 activate + neighbor 172.16.1.205 activate + no neighbor 172.16.1.216 activate + neighbor 172.16.1.217 activate + neighbor 172.16.1.219 activate + no neighbor 172.16.1.222 activate + neighbor 172.16.1.223 activate + ! + address-family vpn-ipv4 + neighbor 172.16.0.1 activate + neighbor 172.16.0.3 activate + neighbor 172.16.0.5 activate + neighbor 172.16.0.7 activate + neighbor 172.16.0.9 activate + neighbor 172.16.0.15 activate + neighbor 172.16.0.16 activate + neighbor 172.16.0.21 activate + neighbor 172.16.0.23 activate + neighbor 172.16.0.24 activate + neighbor 172.16.0.26 activate + neighbor 172.16.0.27 activate + neighbor 172.16.0.30 activate + neighbor 172.16.0.32 activate + neighbor 172.16.0.33 activate + neighbor 172.16.0.34 activate + neighbor 172.16.0.36 activate + neighbor 172.16.0.38 activate + neighbor 172.16.0.42 activate + neighbor 172.16.0.46 activate + neighbor 172.16.0.47 activate + neighbor 172.16.0.49 activate + neighbor 172.16.0.51 activate + neighbor 172.16.0.56 activate + neighbor 172.16.0.62 activate + neighbor 172.16.0.68 activate + neighbor 172.16.0.70 activate + neighbor 172.16.0.73 activate + neighbor 172.16.0.75 activate + neighbor 172.16.0.76 activate + neighbor 172.16.0.77 activate + neighbor 172.16.0.78 activate + neighbor 172.16.0.79 activate + neighbor 172.16.0.82 activate + neighbor 172.16.0.86 activate + neighbor 172.16.0.87 activate + neighbor 172.16.0.89 activate + neighbor 172.16.0.92 activate + neighbor 172.16.0.94 activate + neighbor 172.16.0.95 activate + neighbor 172.16.0.97 activate + neighbor 172.16.0.100 activate + neighbor 172.16.0.107 activate + neighbor 172.16.0.140 activate + neighbor 172.16.0.142 activate + neighbor 172.16.0.148 activate + neighbor 172.16.0.152 activate + neighbor 172.16.0.156 activate + neighbor 172.16.0.158 activate + neighbor 172.16.0.160 activate + neighbor 172.16.0.162 activate + neighbor 172.16.0.164 activate + neighbor 172.16.0.238 activate + neighbor 172.16.0.245 activate + neighbor 172.16.1.7 activate + neighbor 172.16.1.10 activate + neighbor 172.16.1.11 activate + neighbor 172.16.1.15 activate + neighbor 172.16.1.27 activate + neighbor 172.16.1.41 activate + neighbor 172.16.1.44 activate + neighbor 172.16.1.45 activate + neighbor 172.16.1.53 activate + neighbor 172.16.1.58 activate + neighbor 172.16.1.65 activate + neighbor 172.16.1.70 activate + neighbor 172.16.1.72 activate + neighbor 172.16.1.87 activate + neighbor 172.16.1.91 activate + neighbor 172.16.1.98 activate + neighbor 172.16.1.99 activate + neighbor 172.16.1.110 activate + neighbor 172.16.1.111 activate + neighbor 172.16.1.130 activate + neighbor 172.16.1.139 activate + neighbor 172.16.1.144 activate + neighbor 172.16.1.146 activate + neighbor 172.16.1.156 activate + neighbor 172.16.1.163 activate + neighbor 172.16.1.167 activate + neighbor 172.16.1.170 activate + neighbor 172.16.1.173 activate + neighbor 172.16.1.177 activate + neighbor 172.16.1.179 activate + neighbor 172.16.1.182 activate + neighbor 172.16.1.186 activate + neighbor 172.16.1.187 activate + neighbor 172.16.1.191 activate + neighbor 172.16.1.194 activate + neighbor 172.16.1.196 activate + neighbor 172.16.1.200 activate + neighbor 172.16.1.201 activate + neighbor 172.16.1.216 activate + neighbor 172.16.1.222 activate + ! + vrf vrf1 + rd 65342:1 + route-target import vpn-ipv4 65342:1 + route-target export vpn-ipv4 65342:1 + router-id 64.92.84.81 +! +router isis 1 + net 49.0000.ac10.019e.0000.00 + router-id ipv4 172.16.1.158 + log-adjacency-changes + ! + address-family ipv4 unicast + ! + segment-routing mpls + no shutdown +! +router multicast + ipv4 + routing + software-forwarding kernel + ! + ipv6 + software-forwarding kernel +! +router pim sparse-mode + ipv4 + rp address 10.0.0.0 233.84.178.0/24 override +! +router msdp + peer 172.16.0.101 + mesh-group DZ-1 + local-interface Loopback256 + description sao001-dz002 + ! + peer 172.16.0.106 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ams-01 + ! + peer 172.16.0.108 + mesh-group DZ-1 + local-interface Loopback256 + description ams001-dz002 + ! + peer 172.16.0.109 + mesh-group DZ-1 + local-interface Loopback256 + description dub001-dz002 + ! + peer 172.16.0.110 + mesh-group DZ-1 + local-interface Loopback256 + description was001-dz001 + ! + peer 172.16.0.111 + mesh-group DZ-1 + local-interface Loopback256 + description nyc001-dz002 + ! + peer 172.16.0.112 + mesh-group DZ-1 + local-interface Loopback256 + description sjc001-dz002 + ! + peer 172.16.0.113 + mesh-group DZ-1 + local-interface Loopback256 + description hkg001-dz002 + ! + peer 172.16.0.114 + mesh-group DZ-1 + local-interface Loopback256 + description chi001-dz001 + ! + peer 172.16.0.115 + mesh-group DZ-1 + local-interface Loopback256 + description chi001-dz002 + ! + peer 172.16.0.14 + mesh-group DZ-1 + local-interface Loopback256 + description tyo001-dz002 + ! + peer 172.16.0.141 + mesh-group DZ-1 + local-interface Loopback256 + description dz-tor1-sw01 + ! + peer 172.16.0.143 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mtl11-sw01 + ! + peer 172.16.0.149 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny7-sw02 + ! + peer 172.16.0.153 + mesh-group DZ-1 + local-interface Loopback256 + description dz-dc10-sw01 + ! + peer 172.16.0.157 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ch2-sw01 + ! + peer 172.16.0.159 + mesh-group DZ-1 + local-interface Loopback256 + description dz-fr5-sw01 + ! + peer 172.16.0.161 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sea10-sw01 + ! + peer 172.16.0.163 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sg1-sw01 + ! + peer 172.16.0.165 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ty9-sw01 + ! + peer 172.16.0.17 + mesh-group DZ-1 + local-interface Loopback256 + description fra001-dz002 + ! + peer 172.16.0.2 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny7-sw01 + ! + peer 172.16.0.20 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ny5-sw01 + ! + peer 172.16.0.22 + mesh-group DZ-1 + local-interface Loopback256 + description mrs001-dz001 + ! + peer 172.16.0.239 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-sin-sg3 + ! + peer 172.16.0.242 + mesh-group DZ-1 + local-interface Loopback256 + description dz-fra-01 + ! + peer 172.16.0.243 + mesh-group DZ-1 + local-interface Loopback256 + description dz-sxb-01 + ! + peer 172.16.0.244 + mesh-group DZ-1 + local-interface Loopback256 + description dz-waw-01 + ! + peer 172.16.0.25 + mesh-group DZ-1 + local-interface Loopback256 + description dub001-dz001 + ! + peer 172.16.0.254 + mesh-group DZ-1 + local-interface Loopback256 + description dz-slc-sw01 + ! + peer 172.16.0.255 + mesh-group DZ-1 + local-interface Loopback256 + description cherlita + ! + peer 172.16.0.28 + mesh-group DZ-1 + local-interface Loopback256 + description mrs001-dz002 + ! + peer 172.16.0.29 + mesh-group DZ-1 + local-interface Loopback256 + description fr2-dzx-001 + ! + peer 172.16.0.31 + mesh-group DZ-1 + local-interface Loopback256 + description lon001-dz002 + ! + peer 172.16.0.35 + mesh-group DZ-1 + local-interface Loopback256 + description was001-dz002 + ! + peer 172.16.0.37 + mesh-group DZ-1 + local-interface Loopback256 + description nyc001-dz001 + ! + peer 172.16.0.39 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mrs-01 + ! + peer 172.16.0.4 + mesh-group DZ-1 + local-interface Loopback256 + description sea001-dz001 + ! + peer 172.16.0.43 + mesh-group DZ-1 + local-interface Loopback256 + description tyo001-dz001 + ! + peer 172.16.0.48 + mesh-group DZ-1 + local-interface Loopback256 + description sjc001-dz001 + ! + peer 172.16.0.50 + mesh-group DZ-1 + local-interface Loopback256 + description dz-mad-01 + ! + peer 172.16.0.57 + mesh-group DZ-1 + local-interface Loopback256 + description nyc002-dz002 + ! + peer 172.16.0.6 + mesh-group DZ-1 + local-interface Loopback256 + description dz-ld4-sw01 + ! + peer 172.16.0.63 + mesh-group DZ-1 + local-interface Loopback256 + description lax001-dz002 + ! + peer 172.16.0.69 + mesh-group DZ-1 + local-interface Loopback256 + description sin001-dz002 + ! + peer 172.16.0.71 + mesh-group DZ-1 + local-interface Loopback256 + description tyo002-dz002 + ! + peer 172.16.0.72 + mesh-group DZ-1 + local-interface Loopback256 + description dz-chi-sw01 + ! + peer 172.16.0.74 + mesh-group DZ-1 + local-interface Loopback256 + description hkg001-dz001 + ! + peer 172.16.0.8 + mesh-group DZ-1 + local-interface Loopback256 + description sea001-dz002 + ! + peer 172.16.0.80 + mesh-group DZ-1 + local-interface Loopback256 + description bom001-dz001 + ! + peer 172.16.0.81 + mesh-group DZ-1 + local-interface Loopback256 + description dz-lax-sw01 + ! + peer 172.16.0.83 + mesh-group DZ-1 + local-interface Loopback256 + description bom001-dz002 + ! + peer 172.16.0.88 + mesh-group DZ-1 + local-interface Loopback256 + description dfw001-dz001 + ! + peer 172.16.0.93 + mesh-group DZ-1 + local-interface Loopback256 + description dfw001-dz002 + ! + peer 172.16.0.96 + mesh-group DZ-1 + local-interface Loopback256 + description sao001-dz001 + ! + peer 172.16.1.100 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-slc1-tsw + ! + peer 172.16.1.101 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-dub-db2 + ! + peer 172.16.1.114 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-mrs-mrs1 + ! + peer 172.16.1.115 + mesh-group DZ-1 + local-interface Loopback256 + description swdzd01-lon2 + ! + peer 172.16.1.131 + mesh-group DZ-1 + local-interface Loopback256 + description lts-dzd-001 + ! + peer 172.16.1.138 + mesh-group DZ-1 + local-interface Loopback256 + description dz-slc2-sw01 + ! + peer 172.16.1.14 + mesh-group DZ-1 + local-interface Loopback256 + description dz-muc-01 + ! + peer 172.16.1.145 + mesh-group DZ-1 + local-interface Loopback256 + description au1c-dz01 + ! + peer 172.16.1.147 + mesh-group DZ-1 + local-interface Loopback256 + description la2r-dz01 + ! + peer 172.16.1.157 + mesh-group DZ-1 + local-interface Loopback256 + description laconic-mia-sw01 + ! + peer 172.16.1.159 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-lax1-tsw + ! + peer 172.16.1.166 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-sea1-tsw + ! + peer 172.16.1.171 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-dal1-tsw + ! + peer 172.16.1.172 + mesh-group DZ-1 + local-interface Loopback256 + description laconic-dfw-sw01 + ! + peer 172.16.1.176 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-ewr1-tsw + ! + peer 172.16.1.178 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-iad1-tsw + ! + peer 172.16.1.183 + mesh-group DZ-1 + local-interface Loopback256 + description fra-velia + ! + peer 172.16.1.184 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-chi1-tsw + ! + peer 172.16.1.185 + mesh-group DZ-1 + local-interface Loopback256 + description dz103a-lon1-tsw + ! + peer 172.16.1.190 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-fra2-tsw + ! + peer 172.16.1.195 + mesh-group DZ-1 + local-interface Loopback256 + description allnodes-fra1 + ! + peer 172.16.1.197 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-ams2-tsw + ! + peer 172.16.1.204 + mesh-group DZ-1 + local-interface Loopback256 + description dz115a-tyo2-tsw + ! + peer 172.16.1.205 + mesh-group DZ-1 + local-interface Loopback256 + description dzd-tok-01 + ! + peer 172.16.1.217 + mesh-group DZ-1 + local-interface Loopback256 + description dzd-fra-01 + ! + peer 172.16.1.219 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-tyo-ty8 + ! + peer 172.16.1.223 + mesh-group DZ-1 + local-interface Loopback256 + description dz100a-sgp1-tsw + ! + peer 172.16.1.26 + mesh-group DZ-1 + local-interface Loopback256 + description ce2-dzd-001 + ! + peer 172.16.1.36 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-nyc-ny2 + ! + peer 172.16.1.37 + mesh-group DZ-1 + local-interface Loopback256 + description swerry + ! + peer 172.16.1.40 + mesh-group DZ-1 + local-interface Loopback256 + description cherydam + ! + peer 172.16.1.52 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-ash-dc3 + ! + peer 172.16.1.59 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-dal-da3 + ! + peer 172.16.1.6 + mesh-group DZ-1 + local-interface Loopback256 + description frankry + ! + peer 172.16.1.64 + mesh-group DZ-1 + local-interface Loopback256 + description bdc-dzd-001 + ! + peer 172.16.1.71 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-lon-ths + ! + peer 172.16.1.73 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-lax-la2 + ! + peer 172.16.1.86 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-fra-fr5 + ! + peer 172.16.1.90 + mesh-group DZ-1 + local-interface Loopback256 + description dgt-dzd-ams-ams1 +! +management ssh + authentication protocol public-key +! +end \ No newline at end of file From 6841d5e3c3a8e0b70639a2ab6503b25de7b27310 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Fri, 6 Mar 2026 21:08:48 +0000 Subject: [PATCH 02/62] feat: ashburn validator relay playbooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three playbooks for routing all validator traffic through 137.239.194.65: - was-sw01: Loopback101 + PBR redirect on Et1/1 (already applied/committed) Will be simplified to a static route in next iteration. - mia-sw01: ACL permit for src 137.239.194.65 on Tunnel500 + default route in vrf1 via egress-vrf default to was-sw01 backbone. No PBR needed — per-tunnel ACLs already scope what enters vrf1. - biscayne: DNAT inbound (137.239.194.65 → kind node), SNAT + policy routing outbound (validator sport 8001,9000-9025 → doublezero0 GRE). Inbound already applied. Co-Authored-By: Claude Opus 4.6 --- playbooks/ashburn-relay-biscayne.yml | 356 +++++++++++++++++++++++++++ playbooks/ashburn-relay-mia-sw01.yml | 215 ++++++++++++++++ playbooks/ashburn-relay-was-sw01.yml | 197 +++++++++++++++ 3 files changed, 768 insertions(+) create mode 100644 playbooks/ashburn-relay-biscayne.yml create mode 100644 playbooks/ashburn-relay-mia-sw01.yml create mode 100644 playbooks/ashburn-relay-was-sw01.yml diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml new file mode 100644 index 00000000..75053483 --- /dev/null +++ b/playbooks/ashburn-relay-biscayne.yml @@ -0,0 +1,356 @@ +--- +# Configure biscayne for Ashburn validator relay +# +# Sets up inbound DNAT (137.239.194.65 → kind node) and outbound SNAT + +# policy routing (validator traffic → doublezero0 → mia-sw01 → was-sw01). +# +# Usage: +# # Full setup (inbound + outbound) +# ansible-playbook playbooks/ashburn-relay-biscayne.yml +# +# # Inbound only (DNAT rules) +# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t inbound +# +# # Outbound only (SNAT + policy routing) +# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound +# +# # Pre-flight checks only +# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t preflight +# +# # Rollback +# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true + +- name: Configure biscayne Ashburn validator relay + hosts: biscayne + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + kind_node_ip: 172.20.0.2 + kind_network: 172.20.0.0/16 + tunnel_gateway: 169.254.7.6 + tunnel_device: doublezero0 + fwmark: 100 + rt_table_name: ashburn + rt_table_id: 100 + gossip_port: 8001 + dynamic_port_range_start: 9000 + dynamic_port_range_end: 9025 + rollback: false + + tasks: + # ------------------------------------------------------------------ + # Rollback + # ------------------------------------------------------------------ + - name: Rollback all Ashburn relay rules + when: rollback | bool + block: + - name: Remove Ashburn IP from loopback + ansible.builtin.command: + cmd: ip addr del {{ ashburn_ip }}/32 dev lo + failed_when: false + + - name: Remove inbound DNAT rules + ansible.builtin.shell: + cmd: | + set -o pipefail + iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true + iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true + iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true + executable: /bin/bash + + - name: Remove outbound mangle rules + ansible.builtin.shell: + cmd: | + set -o pipefail + iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + executable: /bin/bash + + - name: Remove outbound SNAT rule + ansible.builtin.shell: + cmd: iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true + executable: /bin/bash + + - name: Remove policy routing + ansible.builtin.shell: + cmd: | + ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true + ip route del default table {{ rt_table_name }} 2>/dev/null || true + executable: /bin/bash + + - name: Persist cleaned iptables + ansible.builtin.command: + cmd: netfilter-persistent save + + - name: Remove if-up.d script + ansible.builtin.file: + path: /etc/network/if-up.d/ashburn-routing + state: absent + + - name: Rollback complete + ansible.builtin.debug: + msg: "Ashburn relay rules removed. Old SHRED-RELAY DNAT (64.92.84.81:20000) is still in place." + + - name: End play after rollback + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Pre-flight checks + # ------------------------------------------------------------------ + - name: Check doublezero0 tunnel is up + ansible.builtin.command: + cmd: ip link show {{ tunnel_device }} + register: tunnel_status + changed_when: false + failed_when: "'UP' not in tunnel_status.stdout" + tags: [preflight, inbound, outbound] + + - name: Check kind node is reachable + ansible.builtin.command: + cmd: ping -c 1 -W 2 {{ kind_node_ip }} + register: kind_ping + changed_when: false + failed_when: kind_ping.rc != 0 + tags: [preflight, inbound] + + - name: Verify Docker preserves source ports (5 sec sample) + ansible.builtin.shell: + cmd: | + set -o pipefail + # Check if any validator traffic is flowing with original sport + timeout 5 tcpdump -i br-cf46a62ab5b2 -nn -c 5 'udp src port 8001 or udp src portrange 9000-9025' 2>&1 | tail -5 || echo "No validator traffic captured in 5s (validator may not be running)" + executable: /bin/bash + register: sport_check + changed_when: false + failed_when: false + tags: [preflight] + + - name: Show sport preservation check + ansible.builtin.debug: + var: sport_check.stdout_lines + tags: [preflight] + + - name: Show existing iptables nat rules + ansible.builtin.shell: + cmd: iptables -t nat -L -v -n --line-numbers | head -60 + executable: /bin/bash + register: existing_nat + changed_when: false + tags: [preflight] + + - name: Display existing NAT rules + ansible.builtin.debug: + var: existing_nat.stdout_lines + tags: [preflight] + + # ------------------------------------------------------------------ + # Inbound: DNAT for 137.239.194.65 → kind node + # ------------------------------------------------------------------ + - name: Add Ashburn IP to loopback + ansible.builtin.command: + cmd: ip addr add {{ ashburn_ip }}/32 dev lo + register: add_ip + changed_when: add_ip.rc == 0 + failed_when: "add_ip.rc != 0 and 'RTNETLINK answers: File exists' not in add_ip.stderr" + tags: [inbound] + + - name: Add DNAT for gossip UDP + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: udp + destination: "{{ ashburn_ip }}" + destination_port: "{{ gossip_port }}" + jump: DNAT + to_destination: "{{ kind_node_ip }}:{{ gossip_port }}" + tags: [inbound] + + - name: Add DNAT for gossip TCP + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: tcp + destination: "{{ ashburn_ip }}" + destination_port: "{{ gossip_port }}" + jump: DNAT + to_destination: "{{ kind_node_ip }}:{{ gossip_port }}" + tags: [inbound] + + - name: Add DNAT for dynamic ports (UDP 9000-9025) + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: udp + destination: "{{ ashburn_ip }}" + destination_port: "{{ dynamic_port_range_start }}:{{ dynamic_port_range_end }}" + jump: DNAT + to_destination: "{{ kind_node_ip }}" + tags: [inbound] + + # ------------------------------------------------------------------ + # Outbound: fwmark + SNAT + policy routing + # ------------------------------------------------------------------ + - name: Mark outbound validator UDP gossip traffic + ansible.builtin.iptables: + table: mangle + chain: PREROUTING + protocol: udp + source: "{{ kind_network }}" + source_port: "{{ gossip_port }}" + jump: MARK + set_mark: "{{ fwmark }}" + tags: [outbound] + + - name: Mark outbound validator UDP dynamic port traffic + ansible.builtin.iptables: + table: mangle + chain: PREROUTING + protocol: udp + source: "{{ kind_network }}" + source_port: "{{ dynamic_port_range_start }}:{{ dynamic_port_range_end }}" + jump: MARK + set_mark: "{{ fwmark }}" + tags: [outbound] + + - name: Mark outbound validator TCP gossip traffic + ansible.builtin.iptables: + table: mangle + chain: PREROUTING + protocol: tcp + source: "{{ kind_network }}" + source_port: "{{ gossip_port }}" + jump: MARK + set_mark: "{{ fwmark }}" + tags: [outbound] + + - name: SNAT marked traffic to Ashburn IP (before Docker MASQUERADE) + ansible.builtin.shell: + cmd: | + set -o pipefail + # Check if rule already exists + if iptables -t nat -C POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null; then + echo "SNAT rule already exists" + else + iptables -t nat -I POSTROUTING 1 -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} + echo "SNAT rule inserted at position 1" + fi + executable: /bin/bash + register: snat_result + changed_when: "'inserted' in snat_result.stdout" + tags: [outbound] + + - name: Show SNAT result + ansible.builtin.debug: + var: snat_result.stdout + tags: [outbound] + + - name: Ensure rt_tables entry exists + ansible.builtin.lineinfile: + path: /etc/iproute2/rt_tables + line: "{{ rt_table_id }} {{ rt_table_name }}" + regexp: "^{{ rt_table_id }}\\s" + tags: [outbound] + + - name: Add policy routing rule for fwmark + ansible.builtin.shell: + cmd: | + if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then + echo "rule already exists" + else + ip rule add fwmark {{ fwmark }} table {{ rt_table_name }} + echo "rule added" + fi + executable: /bin/bash + register: rule_result + changed_when: "'added' in rule_result.stdout" + tags: [outbound] + + - name: Add default route via doublezero0 in ashburn table + ansible.builtin.shell: + cmd: ip route replace default via {{ tunnel_gateway }} dev {{ tunnel_device }} table {{ rt_table_name }} + executable: /bin/bash + changed_when: true + tags: [outbound] + + # ------------------------------------------------------------------ + # Persistence + # ------------------------------------------------------------------ + - name: Save iptables rules + ansible.builtin.command: + cmd: netfilter-persistent save + tags: [inbound, outbound] + + - name: Install if-up.d persistence script + ansible.builtin.copy: + src: files/ashburn-routing-ifup.sh + dest: /etc/network/if-up.d/ashburn-routing + mode: '0755' + owner: root + group: root + tags: [outbound] + + # ------------------------------------------------------------------ + # Verification + # ------------------------------------------------------------------ + - name: Show NAT rules + ansible.builtin.shell: + cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 + executable: /bin/bash + register: nat_rules + changed_when: false + tags: [inbound, outbound] + + - name: Show mangle rules + ansible.builtin.shell: + cmd: iptables -t mangle -L -v -n 2>&1 + executable: /bin/bash + register: mangle_rules + changed_when: false + tags: [outbound] + + - name: Show policy routing + ansible.builtin.shell: + cmd: | + echo "=== ip rule ===" + ip rule show + echo "" + echo "=== ashburn routing table ===" + ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty" + executable: /bin/bash + register: routing_info + changed_when: false + tags: [outbound] + + - name: Show loopback addresses + ansible.builtin.shell: + cmd: ip addr show lo | grep inet + executable: /bin/bash + register: lo_addrs + changed_when: false + tags: [inbound] + + - name: Display verification + ansible.builtin.debug: + msg: + nat_rules: "{{ nat_rules.stdout_lines }}" + mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}" + routing: "{{ routing_info.stdout_lines | default([]) }}" + loopback: "{{ lo_addrs.stdout_lines }}" + tags: [inbound, outbound] + + - name: Summary + ansible.builtin.debug: + msg: | + === Ashburn Relay Setup Complete === + Ashburn IP: {{ ashburn_ip }} (on lo) + Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} + Outbound SNAT: {{ kind_network }} sport 8001,9000-9025 → {{ ashburn_ip }} + Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_gateway }} dev {{ tunnel_device }} + Persisted: iptables-persistent + /etc/network/if-up.d/ashburn-routing + + Next steps: + 1. Verify inbound: ping {{ ashburn_ip }} from external host + 2. Verify outbound: tcpdump on was-sw01 for src {{ ashburn_ip }} + 3. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml new file mode 100644 index 00000000..6af443ad --- /dev/null +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -0,0 +1,215 @@ +--- +# Configure laconic-mia-sw01 for outbound validator traffic redirect +# +# Redirects outbound traffic from biscayne (src 137.239.194.65) arriving +# via the doublezero0 GRE tunnel to was-sw01 via the backbone, preventing +# BCP38 drops at mia-sw01's ISP uplink. +# +# Approach: The existing per-tunnel ACL (SEC-USER-500-IN) controls what +# traffic enters vrf1 from Tunnel500. We add 137.239.194.65 to the ACL +# and add a default route in vrf1 via egress-vrf default pointing to +# was-sw01's backbone IP. No PBR needed — the ACL is the filter. +# +# The other vrf1 tunnels (502, 504, 505) have their own ACLs that only +# permit their specific source IPs, so the default route won't affect them. +# +# Usage: +# # Pre-flight checks only (safe, read-only) +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml +# +# # Apply config (after reviewing pre-flight output) +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ +# -e apply=true +# +# # Commit persisted config +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e commit=true +# +# # Rollback +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true + +- name: Configure mia-sw01 outbound validator redirect + hosts: mia-sw01 + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + apply: false + commit: false + rollback: false + tunnel_interface: Tunnel500 + tunnel_vrf: vrf1 + tunnel_acl: SEC-USER-500-IN + backbone_interface: Ethernet4/1 + session_name: validator-outbound + checkpoint_name: pre-validator-outbound + + tasks: + # ------------------------------------------------------------------ + # Rollback path + # ------------------------------------------------------------------ + - name: Rollback to checkpoint + when: rollback | bool + block: + - name: Execute rollback + arista.eos.eos_command: + commands: + - "rollback running-config checkpoint {{ checkpoint_name }}" + - write memory + register: rollback_result + + - name: Show rollback result + ansible.builtin.debug: + var: rollback_result.stdout_lines + + - name: End play after rollback + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Commit finalization + # ------------------------------------------------------------------ + - name: Finalize pending session + when: commit | bool + block: + - name: Commit session and write memory + arista.eos.eos_command: + commands: + - "configure session {{ session_name }} commit" + - write memory + register: commit_result + + - name: Show commit result + ansible.builtin.debug: + var: commit_result.stdout_lines + + - name: End play after commit + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Pre-flight checks (always run unless commit/rollback) + # ------------------------------------------------------------------ + - name: Show tunnel interface config + arista.eos.eos_command: + commands: + - "show running-config interfaces {{ tunnel_interface }}" + register: tunnel_config + tags: [preflight] + + - name: Display tunnel config + ansible.builtin.debug: + var: tunnel_config.stdout_lines + tags: [preflight] + + - name: Show tunnel ACL + arista.eos.eos_command: + commands: + - "show running-config | section ip access-list {{ tunnel_acl }}" + register: acl_config + tags: [preflight] + + - name: Display tunnel ACL + ansible.builtin.debug: + var: acl_config.stdout_lines + tags: [preflight] + + - name: Check VRF routing + arista.eos.eos_command: + commands: + - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" + - "show ip route vrf {{ tunnel_vrf }} {{ backbone_peer }}" + - "show ip route {{ backbone_peer }}" + register: vrf_routing + tags: [preflight] + + - name: Display VRF routing check + ansible.builtin.debug: + var: vrf_routing.stdout_lines + tags: [preflight] + + - name: Pre-flight summary + when: not (apply | bool) + ansible.builtin.debug: + msg: | + === Pre-flight complete === + Review the output above: + 1. {{ tunnel_interface }} ACL ({{ tunnel_acl }}): does it permit src {{ ashburn_ip }}? + 2. {{ tunnel_vrf }} default route: does one exist? + 3. Backbone nexthop {{ backbone_peer }}: reachable in default VRF? + + To apply config: + ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ + -e apply=true + tags: [preflight] + + - name: End play if not applying + when: not (apply | bool) + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Apply config via session with 5-minute auto-revert + # ------------------------------------------------------------------ + - name: Save checkpoint + arista.eos.eos_command: + commands: + - "configure checkpoint save {{ checkpoint_name }}" + + - name: Apply config session + arista.eos.eos_command: + commands: + - command: "configure session {{ session_name }}" + # Permit Ashburn IP through the tunnel ACL (insert before deny) + - command: "ip access-list {{ tunnel_acl }}" + - command: "45 permit ip host {{ ashburn_ip }} any" + - command: exit + # Default route in vrf1 via backbone to was-sw01 (egress-vrf default) + # Safe because per-tunnel ACLs already restrict what enters vrf1 + - command: "ip route vrf {{ tunnel_vrf }} 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }}" + + - name: Show session diff + arista.eos.eos_command: + commands: + - "configure session {{ session_name }}" + - show session-config diffs + - exit + register: session_diff + + - name: Display session diff + ansible.builtin.debug: + var: session_diff.stdout_lines + + - name: Commit with 5-minute auto-revert + arista.eos.eos_command: + commands: + - "configure session {{ session_name }} commit timer 00:05:00" + + # ------------------------------------------------------------------ + # Verify + # ------------------------------------------------------------------ + - name: Verify config + arista.eos.eos_command: + commands: + - "show running-config | section ip access-list {{ tunnel_acl }}" + - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" + register: verify + + - name: Display verification + ansible.builtin.debug: + var: verify.stdout_lines + + - name: Reminder + ansible.builtin.debug: + msg: | + === Config applied with 5-minute auto-revert === + Session: {{ session_name }} + Checkpoint: {{ checkpoint_name }} + + Changes applied: + 1. ACL {{ tunnel_acl }}: added "45 permit ip host {{ ashburn_ip }} any" + 2. Default route in {{ tunnel_vrf }}: 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }} + + The config will auto-revert in 5 minutes unless committed. + Verify on the switch, then commit: + configure session {{ session_name }} commit + write memory + + To revert immediately: + ansible-playbook ... -e rollback=true diff --git a/playbooks/ashburn-relay-was-sw01.yml b/playbooks/ashburn-relay-was-sw01.yml new file mode 100644 index 00000000..1566fb0a --- /dev/null +++ b/playbooks/ashburn-relay-was-sw01.yml @@ -0,0 +1,197 @@ +--- +# Configure laconic-was-sw01 for full validator traffic relay +# +# Replaces the old SHRED-RELAY (TVU-only, port 20000) with VALIDATOR-RELAY +# covering all validator ports (8001, 9000-9025). Adds Loopback101 for +# 137.239.194.65. +# +# Uses EOS config session with 5-minute auto-revert for safety. +# After verification, run with -e commit=true to finalize. +# +# Usage: +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e commit=true +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true + +- name: Configure was-sw01 inbound validator relay + hosts: was-sw01 + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + commit: false + rollback: false + session_name: validator-relay + checkpoint_name: pre-validator-relay + + tasks: + # ------------------------------------------------------------------ + # Rollback path + # ------------------------------------------------------------------ + - name: Rollback to checkpoint + when: rollback | bool + block: + - name: Execute rollback + arista.eos.eos_command: + commands: + - "rollback running-config checkpoint {{ checkpoint_name }}" + - write memory + register: rollback_result + + - name: Show rollback result + ansible.builtin.debug: + var: rollback_result.stdout_lines + + - name: End play after rollback + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Commit finalization + # ------------------------------------------------------------------ + - name: Finalize pending session + when: commit | bool + block: + - name: Commit session and write memory + arista.eos.eos_command: + commands: + - "configure session {{ session_name }} commit" + - write memory + register: commit_result + + - name: Show commit result + ansible.builtin.debug: + var: commit_result.stdout_lines + + - name: End play after commit + ansible.builtin.meta: end_play + + # ------------------------------------------------------------------ + # Pre-checks + # ------------------------------------------------------------------ + - name: Show current traffic-policy on Et1/1 + arista.eos.eos_command: + commands: + - show running-config interfaces Ethernet1/1 + register: et1_config + + - name: Show current config + ansible.builtin.debug: + var: et1_config.stdout_lines + + - name: Show existing PBR policy on Et1/1 + arista.eos.eos_command: + commands: + - "show running-config | include service-policy" + register: existing_pbr + + - name: Show existing PBR config + ansible.builtin.debug: + var: existing_pbr.stdout_lines + + # ------------------------------------------------------------------ + # Save checkpoint + # ------------------------------------------------------------------ + - name: Save checkpoint for rollback + arista.eos.eos_command: + commands: + - "configure checkpoint save {{ checkpoint_name }}" + register: checkpoint_result + + - name: Show checkpoint result + ansible.builtin.debug: + var: checkpoint_result.stdout_lines + + # ------------------------------------------------------------------ + # Apply via config session with 5-minute auto-revert + # + # eos_config writes directly to running-config, bypassing sessions. + # Use eos_command with raw CLI to get the safety net. + # ------------------------------------------------------------------ + - name: Apply config session with auto-revert + arista.eos.eos_command: + commands: + # Enter named config session + - command: "configure session {{ session_name }}" + # Loopback101 for Ashburn IP + - command: interface Loopback101 + - command: "ip address {{ ashburn_ip }}/32" + - command: exit + # ACL covering all validator ports + - command: ip access-list VALIDATOR-RELAY-ACL + - command: 10 permit udp any any eq 8001 + - command: 20 permit udp any any range 9000 9025 + - command: 30 permit tcp any any eq 8001 + - command: exit + # PBR class-map referencing the ACL + - command: class-map type pbr match-any VALIDATOR-RELAY-CLASS + - command: match ip access-group VALIDATOR-RELAY-ACL + - command: exit + # PBR policy-map with nexthop redirect + - command: policy-map type pbr VALIDATOR-RELAY + - command: class VALIDATOR-RELAY-CLASS + - command: "set nexthop {{ backbone_peer }}" + - command: exit + - command: exit + # Apply PBR policy on Et1/1 + - command: interface Ethernet1/1 + - command: service-policy type pbr input VALIDATOR-RELAY + - command: exit + tags: [config] + + - name: Show session diff + arista.eos.eos_command: + commands: + - "configure session {{ session_name }}" + - show session-config diffs + - exit + register: session_diff + + - name: Display session diff + ansible.builtin.debug: + var: session_diff.stdout_lines + + - name: Commit with 5-minute auto-revert + arista.eos.eos_command: + commands: + - "configure session {{ session_name }} commit timer 00:05:00" + tags: [config] + + # ------------------------------------------------------------------ + # Verify + # ------------------------------------------------------------------ + - name: Show PBR policy on Et1/1 + arista.eos.eos_command: + commands: + - show running-config interfaces Ethernet1/1 + - show running-config section policy-map + - show ip interface Loopback101 + register: pbr_interface + + - name: Display verification + ansible.builtin.debug: + var: pbr_interface.stdout_lines + + - name: Show Loopback101 + arista.eos.eos_command: + commands: + - show ip interface Loopback101 + register: lo101 + + - name: Display Loopback101 + ansible.builtin.debug: + var: lo101.stdout_lines + + - name: Reminder + ansible.builtin.debug: + msg: | + === Config applied with 5-minute auto-revert === + Session: {{ session_name }} + Checkpoint: {{ checkpoint_name }} + + The config will auto-revert in 5 minutes unless committed. + Verify PBR policy is applied, then commit from the switch CLI: + configure session {{ session_name }} commit + write memory + + To revert immediately: + ansible-playbook ... -e rollback=true From 0b52fc99d72e726000eadb3677be0cf2d5f0e445 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 01:44:25 +0000 Subject: [PATCH 03/62] fix: ashburn relay playbooks and document DZ tunnel ACL root cause MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Playbook fixes from testing: - ashburn-relay-biscayne: insert DNAT rules at position 1 before Docker's ADDRTYPE LOCAL rule (was being swallowed at position 3+) - ashburn-relay-mia-sw01: add inbound route for 137.239.194.65 via egress-vrf vrf1 (nexthop only, no interface — EOS silently drops cross-VRF routes that specify a tunnel interface) - ashburn-relay-was-sw01: replace PBR with static route, remove Loopback101 Bug doc (bug-ashburn-tunnel-port-filtering.md): root cause is the DoubleZero agent on mia-sw01 overwrites SEC-USER-500-IN ACL, dropping outbound gossip with src 137.239.194.65. The DZ agent controls Tunnel500's lifecycle. Fix requires a separate GRE tunnel using mia-sw01's free LAN IP (209.42.167.137) to bypass DZ infrastructure. Also adds all repo docs, scripts, inventory, and remaining playbooks. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 3 + CLAUDE.md | 204 + README.md | 3 + ansible.cfg | 13 + docs/arista-eos-reference.md | 114 + docs/arista-scraped/acls-and-route-maps.md | 8439 +++++++++ ...ess-per-port-for-ipv4-and-ipv6-counters.md | 181 + .../inter-vrf-local-route-leaking.md | 305 + docs/arista-scraped/ipv4.md | 15434 ++++++++++++++++ docs/arista-scraped/nexthop-groups.md | 1167 ++ docs/arista-scraped/static-inter-vrf-route.md | 82 + docs/arista-scraped/traffic-management.md | 10551 +++++++++++ docs/ashburn-validator-relay.md | 275 + docs/blue-green-upgrades.md | 416 + docs/bug-ashburn-tunnel-port-filtering.md | 85 + docs/bug-laconic-so-etcd-cleanup.md | 51 + docs/bug-laconic-so-ingress-conflict.md | 75 + docs/doublezero-multicast-access.md | 121 + docs/doublezero-status.md | 121 + docs/feature-kind-local-registry.md | 65 + docs/known-issues.md | 78 + docs/shred-collector-relay.md | 191 + docs/tvu-shred-relay.md | 161 + inventory/biscayne.yml | 14 + inventory/switches.yml | 23 + playbooks/ashburn-relay-biscayne.yml | 107 +- playbooks/ashburn-relay-mia-sw01.yml | 24 +- playbooks/ashburn-relay-was-sw01.yml | 139 +- playbooks/biscayne-boot.yml | 107 + playbooks/biscayne-recover.yml | 220 + playbooks/biscayne-redeploy.yml | 321 + playbooks/biscayne-stop.yml | 106 + playbooks/connect-doublezero-multicast.yml | 134 + playbooks/files/ashburn-routing-ifup.sh | 18 + playbooks/fix-pv-mounts.yml | 166 + playbooks/health-check.yml | 340 + scripts/check-shred-completeness.sh | 98 + scripts/pane-exec.sh | 38 + scripts/scrape-arista-docs.mjs | 151 + scripts/shred-unwrap.py | 34 + scripts/snapshot-download.py | 546 + 41 files changed, 40587 insertions(+), 134 deletions(-) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 ansible.cfg create mode 100644 docs/arista-eos-reference.md create mode 100644 docs/arista-scraped/acls-and-route-maps.md create mode 100644 docs/arista-scraped/ingress-and-egress-per-port-for-ipv4-and-ipv6-counters.md create mode 100644 docs/arista-scraped/inter-vrf-local-route-leaking.md create mode 100644 docs/arista-scraped/ipv4.md create mode 100644 docs/arista-scraped/nexthop-groups.md create mode 100644 docs/arista-scraped/static-inter-vrf-route.md create mode 100644 docs/arista-scraped/traffic-management.md create mode 100644 docs/ashburn-validator-relay.md create mode 100644 docs/blue-green-upgrades.md create mode 100644 docs/bug-ashburn-tunnel-port-filtering.md create mode 100644 docs/bug-laconic-so-etcd-cleanup.md create mode 100644 docs/bug-laconic-so-ingress-conflict.md create mode 100644 docs/doublezero-multicast-access.md create mode 100644 docs/doublezero-status.md create mode 100644 docs/feature-kind-local-registry.md create mode 100644 docs/known-issues.md create mode 100644 docs/shred-collector-relay.md create mode 100644 docs/tvu-shred-relay.md create mode 100644 inventory/biscayne.yml create mode 100644 inventory/switches.yml create mode 100644 playbooks/biscayne-boot.yml create mode 100644 playbooks/biscayne-recover.yml create mode 100644 playbooks/biscayne-redeploy.yml create mode 100644 playbooks/biscayne-stop.yml create mode 100644 playbooks/connect-doublezero-multicast.yml create mode 100644 playbooks/files/ashburn-routing-ifup.sh create mode 100644 playbooks/fix-pv-mounts.yml create mode 100644 playbooks/health-check.yml create mode 100755 scripts/check-shred-completeness.sh create mode 100755 scripts/pane-exec.sh create mode 100644 scripts/scrape-arista-docs.mjs create mode 100644 scripts/shred-unwrap.py create mode 100755 scripts/snapshot-download.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..06aea24a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv/ +sessions.duckdb +sessions.duckdb.wal diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..138d8d75 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,204 @@ +# Biscayne Agave Runbook + +## Cluster Operations + +### Shutdown Order + +The agave validator runs inside a kind-based k8s cluster managed by `laconic-so`. +The kind node is a Docker container. **Never restart or kill the kind node container +while the validator is running.** Agave uses `io_uring` for async I/O, and on ZFS, +killing the process can produce unkillable kernel threads (D-state in +`io_wq_put_and_exit` blocked on ZFS transaction commits). This deadlocks the +container's PID namespace, making `docker stop`, `docker restart`, `docker exec`, +and even `reboot` hang. + +Correct shutdown sequence: + +1. Scale the deployment to 0 and wait for the pod to terminate: + ``` + kubectl scale deployment laconic-70ce4c4b47e23b85-deployment \ + -n laconic-laconic-70ce4c4b47e23b85 --replicas=0 + kubectl wait --for=delete pod -l app=laconic-70ce4c4b47e23b85-deployment \ + -n laconic-laconic-70ce4c4b47e23b85 --timeout=120s + ``` +2. Only then restart the kind node if needed: + ``` + docker restart laconic-70ce4c4b47e23b85-control-plane + ``` +3. Scale back up: + ``` + kubectl scale deployment laconic-70ce4c4b47e23b85-deployment \ + -n laconic-laconic-70ce4c4b47e23b85 --replicas=1 + ``` + +### Ramdisk + +The accounts directory must be on a ramdisk for performance. `/dev/ram0` loses its +filesystem on reboot and must be reformatted before mounting. + +**Boot ordering is handled by systemd units** (installed by `biscayne-boot.yml`): +- `format-ramdisk.service`: runs `mkfs.xfs -f /dev/ram0` before `local-fs.target` +- fstab entry: mounts `/dev/ram0` at `/srv/solana/ramdisk` with + `x-systemd.requires=format-ramdisk.service` +- `ramdisk-accounts.service`: creates `/srv/solana/ramdisk/accounts` and sets + ownership after the mount + +These units run before docker, so the kind node's bind mounts always see the +ramdisk. **No manual intervention is needed after reboot.** + +**Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt`. Because +the ramdisk is mounted at `/srv/solana/ramdisk` and symlinked/overlaid through +`/srv/kind/solana/ramdisk`, mount propagation makes it visible inside the kind +node at `/mnt/solana/ramdisk` without restarting the kind node. **Do NOT restart +the kind node just to pick up a ramdisk mount.** + +### KUBECONFIG + +kubectl must be told where the kubeconfig is when running as root or via ansible: +``` +KUBECONFIG=/home/rix/.kube/config kubectl ... +``` + +The ansible playbooks set `environment: KUBECONFIG: /home/rix/.kube/config`. + +### SSH Agent + +SSH to biscayne goes through a ProxyCommand jump host (abernathy.ch2.vaasl.io). +The SSH agent socket rotates when the user reconnects. Find the current one: +``` +ls -t /tmp/ssh-*/agent.* | head -1 +``` +Then export it: +``` +export SSH_AUTH_SOCK=/tmp/ssh-XXXX/agent.NNNN +``` + +### io_uring/ZFS Deadlock — Root Cause + +When agave-validator is killed while performing I/O against ZFS-backed paths (not +the ramdisk), io_uring worker threads get stuck in D-state: +``` +io_wq_put_and_exit → dsl_dir_tempreserve_space (ZFS module) +``` +These threads are unkillable (SIGKILL has no effect on D-state processes). They +prevent the container's PID namespace from being reaped (`zap_pid_ns_processes` +waits forever), which breaks `docker stop`, `docker restart`, `docker exec`, and +even `reboot`. The only fix is a hard power cycle. + +**Prevention**: Always scale the deployment to 0 and wait for the pod to terminate +before any destructive operation (namespace delete, kind restart, host reboot). +The `biscayne-stop.yml` playbook enforces this. + +### laconic-so Architecture + +`laconic-so` manages kind clusters atomically — `deployment start` creates the +kind cluster, namespace, PVs, PVCs, and deployment in one shot. There is no way +to create the cluster without deploying the pod. + +Key code paths in stack-orchestrator: +- `deploy_k8s.py:up()` — creates everything atomically +- `cluster_info.py:get_pvs()` — translates host paths using `kind-mount-root` +- `helpers_k8s.py:get_kind_pv_bind_mount_path()` — strips `kind-mount-root` + prefix and prepends `/mnt/` +- `helpers_k8s.py:_generate_kind_mounts()` — when `kind-mount-root` is set, + emits a single `/srv/kind` → `/mnt` mount instead of individual mounts + +The `kind-mount-root: /srv/kind` setting in `spec.yml` means all data volumes +whose host paths start with `/srv/kind` get translated to `/mnt/...` inside the +kind node via a single bind mount. + +### Key Identifiers + +- Kind cluster: `laconic-70ce4c4b47e23b85` +- Namespace: `laconic-laconic-70ce4c4b47e23b85` +- Deployment: `laconic-70ce4c4b47e23b85-deployment` +- Kind node container: `laconic-70ce4c4b47e23b85-control-plane` +- Deployment dir: `/srv/deployments/agave` +- Snapshot dir: `/srv/solana/snapshots` +- Ledger dir: `/srv/solana/ledger` +- Accounts dir: `/srv/solana/ramdisk/accounts` +- Log dir: `/srv/solana/log` +- Host bind mount root: `/srv/kind` -> kind node `/mnt` +- laconic-so: `/home/rix/.local/bin/laconic-so` (editable install) + +### PV Mount Paths (inside kind node) + +| PV Name | hostPath | +|----------------------|-------------------------------| +| validator-snapshots | /mnt/solana/snapshots | +| validator-ledger | /mnt/solana/ledger | +| validator-accounts | /mnt/solana/ramdisk/accounts | +| validator-log | /mnt/solana/log | + +### Snapshot Freshness + +If the snapshot is more than **20,000 slots behind** the current mainnet tip, it is +too old. Stop the validator, download a fresh snapshot, and restart. Do NOT let it +try to catch up from an old snapshot — it will take too long and may never converge. + +Check with: +``` +# Snapshot slot (from filename) +ls /srv/solana/snapshots/snapshot-*.tar.* + +# Current mainnet slot +curl -s -X POST -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"getSlot","params":[{"commitment":"finalized"}]}' \ + https://api.mainnet-beta.solana.com +``` + +### Snapshot Leapfrog Recovery + +When the validator is stuck in a repair-dependent gap (incomplete shreds from a +relay outage or insufficient turbine coverage), "grinding through" doesn't work. +At 0.4 slots/sec replay through incomplete blocks vs 2.5 slots/sec chain +production, the gap grows faster than it shrinks. + +**Strategy**: Download a fresh snapshot whose slot lands *past* the incomplete zone, +into the range where turbine+relay shreds are accumulating in the blockstore. +**Keep the existing ledger** — it has those shreds. The validator replays from +local blockstore data instead of waiting on repair. + +**Steps**: +1. Let the validator run — turbine+relay accumulate shreds at the tip +2. Monitor shred completeness at the tip: + `scripts/check-shred-completeness.sh 500` +3. When there's a contiguous run of complete blocks (>100 slots), note the + starting slot of that run +4. Scale to 0, wipe accounts (ramdisk), wipe old snapshots +5. **Do NOT wipe ledger** — it has the turbine shreds +6. Download a fresh snapshot (its slot should be within the complete run) +7. Scale to 1 — validator replays from local blockstore at 3-5 slots/sec + +**Why this works**: Turbine delivers ~60% of shreds in real-time. Repair fills +the rest for recent slots quickly (peers prioritize recent data). The only +problem is repair for *old* slots (minutes/hours behind) which peers deprioritize. +By snapshotting past the gap, we skip the old-slot repair bottleneck entirely. + +### Shred Relay (Ashburn) + +The TVU shred relay from laconic-was-sw01 provides ~4,000 additional shreds/sec. +Without it, turbine alone delivers ~60% of blocks. With it, completeness improves +but still requires repair for full coverage. + +**Current state**: Old pipeline (monitor session + socat + shred-unwrap.py). +The traffic-policy redirect was never committed (auto-revert after 5 min timer). +See `docs/tvu-shred-relay.md` for the traffic-policy config that needs to be +properly applied. + +**Boot dependency**: `shred-unwrap.py` must be running on biscayne for the old +pipeline to work. It is NOT persistent across reboots. The iptables DNAT rule +for the new pipeline IS persistent (iptables-persistent installed). + +### Redeploy Flow + +See `playbooks/biscayne-redeploy.yml`. The scale-to-0 pattern is required because +`laconic-so` creates the cluster and deploys the pod atomically: + +1. Delete namespace (teardown) +2. Optionally wipe data +3. `laconic-so deployment start` (creates cluster + pod) +4. Immediately scale to 0 +5. Download snapshot via aria2c +6. Scale to 1 +7. Verify diff --git a/README.md b/README.md new file mode 100644 index 00000000..f8a5d480 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# biscayne-agave-runbook + +Ansible playbooks for operating the kind-based agave-stack deployment on biscayne.vaasl.io. diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 00000000..1ef6ab0d --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/ +stdout_callback = ansible.builtin.default +result_format = yaml +callbacks_enabled = profile_tasks +retry_files_enabled = false + +[privilege_escalation] +become = true +become_method = sudo + +[ssh_connection] +pipelining = true diff --git a/docs/arista-eos-reference.md b/docs/arista-eos-reference.md new file mode 100644 index 00000000..dc6c3e68 --- /dev/null +++ b/docs/arista-eos-reference.md @@ -0,0 +1,114 @@ +# Arista EOS Reference Notes + +Collected from live switch CLI (`?` help) and Arista documentation search +results. Switch platform: 7280CR3A, EOS 4.34.0F. + +## PBR (Policy-Based Routing) + +EOS uses `policy-map type pbr` — NOT `traffic-policy` (which is a different +feature for ASIC-level traffic policies, not available on all platforms/modes). + +### Syntax + +``` +! ACL to match traffic +ip access-list + 10 permit [ports] + +! Class-map referencing the ACL +class-map type pbr match-any + match ip access-group + +! Policy-map with nexthop redirect +policy-map type pbr + class + set nexthop ! direct nexthop IP + set nexthop recursive ! recursive resolution + ! set nexthop-group ! nexthop group + ! set ttl ! TTL override + +! Apply on interface +interface + service-policy type pbr input +``` + +### PBR `set` options (from CLI `?`) + +``` +set ? + nexthop Next hop IP address for forwarding + nexthop-group next hop group name + ttl TTL effective with nexthop/nexthop-group +``` + +``` +set nexthop ? + A.B.C.D next hop IP address + A:B:C:D:E:F:G:H next hop IPv6 address + recursive Enable Recursive Next hop resolution +``` + +**No VRF qualifier on `set nexthop`.** The nexthop must be reachable in the +VRF where the policy is applied. For cross-VRF PBR, use a static inter-VRF +route to make the nexthop reachable (see below). + +## Static Inter-VRF Routes + +Source: [EOS 4.34.0F - Static Inter-VRF Route](https://www.arista.com/en/um-eos/eos-static-inter-vrf-route) + +Allows configuring a static route in one VRF with a nexthop evaluated in a +different VRF. Uses the `egress-vrf` keyword. + +### Syntax + +``` +ip route vrf / egress-vrf +ip route vrf / egress-vrf +``` + +### Examples (from Arista docs) + +``` +! Route in vrf1 with nexthop resolved in default VRF +ip route vrf vrf1 1.0.1.0/24 egress-vrf default 1.0.0.2 + +! show ip route vrf vrf1 output: +! S 1.0.1.0/24 [1/0] via 1.0.0.2, Vlan2180 (egress VRF default) +``` + +### Key points + +- For bidirectional traffic, static inter-VRF routes must be configured in + both VRFs. +- ECMP next-hop sets across same or heterogeneous egress VRFs are supported. +- The `show ip route vrf` output displays the egress VRF name when it differs + from the source VRF. + +## Inter-VRF Local Route Leaking + +Source: [EOS 4.35.1F - Inter-VRF Local Route Leaking](https://www.arista.com/en/um-eos/eos-inter-vrf-local-route-leaking) + +An alternative to static inter-VRF routes that leaks routes dynamically from +one VRF (source) to another VRF (destination) on the same router. + +## Config Sessions + +``` +configure session ! enter named session +show session-config diffs ! MUST be run from inside the session +commit timer HH:MM:SS ! commit with auto-revert timer +abort ! discard session +``` + +From enable mode: +``` +configure session commit ! finalize a pending session +``` + +## Checkpoints and Rollback + +``` +configure checkpoint save +rollback running-config checkpoint +write memory +``` diff --git a/docs/arista-scraped/acls-and-route-maps.md b/docs/arista-scraped/acls-and-route-maps.md new file mode 100644 index 00000000..378a275b --- /dev/null +++ b/docs/arista-scraped/acls-and-route-maps.md @@ -0,0 +1,8439 @@ + + + +# ACLs and Route Maps + + +The switch uses rule-based lists to control packet access to ports and to select routes for redistribution to routing domains defined by dynamic routing protocols. + + +This section describes the construction of Access Control Lists (ACLs), prefix lists, and route maps and includes the following topics: + + +- Introduction + +- Access Control Lists + +- Service ACLs + +- Sub-interface ACLs + +- RACL Sharing on SVIs + +- Route Maps + +- Prefix Lists + +- Port ACLs with User-Defined Fields + +- ACL, Route Map, and Prefix List Commands + + +## Introduction + + +The following provides an introduction to Access Control Lists (ACL), Service ACLs, Route Maps, Prefix Lists, and Router Access Control List (RACL) Divergence: + + +The switch processes ACLs, Service ACLs, route maps, and prefix lists in order, beginning with the first rule and continuing until a match is found. + + +An ACL contains a list of rules that control the inbound and outbound flow of packets into Ethernet interfaces, subinterfaces, port-channel interfaces, or the switch control plane. The switch supports implementing various filtering criteria, including IP and MAC addresses and TCP/UDP ports, with include/exclude options without compromising its performance or feature set. Filtering syntax is the industry standard. + + +Note: EOS supports egress IPv4 and IPv6 Port Access Control Lists (PACLs) by default. To enable egress MAC PACLs, add the configuration to the current TCAM profile. + + +A Service ACL applies a control-plane process to control connections to, or packets processed by, the agent process. + + +A route map contains a list of rules that control the redistribution of IP routes into a protocol domain based on criteria such as route metrics, access control lists, next-hop addresses, and route tags. Additionally, route maps can modify route parameters during redistribution. + + +A prefix list contains a list of rules that defines route redistribution access for a specified IP address space. Route maps often use prefix lists to filter routes. + + +The RACL divergence optimizes hardware resource usage on each forwarding ASIC. EOS installs ACLs only on the hardware components corresponding to the member interfaces of the SVIs with an applied ACL, and saves hardware resources and scales the RACLs to a larger configuration. +Tip: Use the **show** commands to display the interface mapping, Ternary Content Addressable Memory (TCAM) entries, and TCAM utilization information. + + +## Access Control Lists + + +These sections describe access control lists: + + +- ACL Types + +- ACL Configuration + +- Applying ACLs + + +### ACL Types + + +The switch supports the following ACL types: + + +- **IPv4** matches on IPv4 source or destination addresses, with L4 modifiers including protocol, port number, IPsec tunnel interfaces, and DSCP value. + +- **IPv6** matches on IPv6 source or destination addresses, with L4 modifiers including protocol, port number, or GRE tunnel interface. + +- **Standard IPv4** matches only on source IPv4 addresses. + +- **Standard IPv6** matches only on source IPv6 addresses. + +- **MAC** matches on L2 source and destination addresses. + + +ACLs can also be made dynamic using **payload**, turning them into a User-Defined Field (UDF) alias for use in other ACLs. + + +#### ACL Structure + + +An ACL is an ordered list of rules that defines access restrictions for the entities (the control plane or an interface) to which it is applied. Route maps also use ACLs to select routes for redistribution into specified routing domains. + + +ACL rules specify the data to which packet contents are compared when filtering data. + + +- The interface forwards packets that match all commands in a permit rule. + +- The interface drops packets that match all commands in a deny rule. + +- The interface drops packets that do not match at least one rule. + + +Upon its arrival at an interface, the switch compares a packet’s fields to the first rule of the ACL applied to the interface. Packets that match the rule are forwarded (permit rule) or dropped (deny rule). The process continues whereby the switch compares packets that do not match the rule to the next rule in the list and continues until the packet either matches a rule or the rule list is exhausted. The interface drops packets that do not match a rule. + + +The sequence number designates the rule's placement in the ACL. + + +#### ACL Rules + + +The switch compares an ACL rule's command list to inbound and outbound packet fields. When all of a rule’s criteria match a packet’s contents, the interface performs the action specified by the rule. + + +The set of available commands depends on the ACL type and the specified protocol within the rule. The following is a list of commands available for supported ACL types: + + +##### IPv4 ACL Rule Parameters + + +All rules in IPv4 ACLs include the following criteria: + + +- **Protocol**: The packet’s IP protocol. Valid rule inputs include: + + +- Protocol name for a limited set of common protocols. + +- Assigned protocol number for all IP protocols. + +- **Source Address**: The packet’s source IPv4 address. Valid rule inputs include: + + +- A subnet address (CIDR or address mask). Discontiguous masks are supported. + +- A host IP address (dotted decimal notation.) + +- Using ***any*** to denote that the rule matches all source addresses. + +- **Destination Address**: The packet’s destination IP address. Valid rule inputs include: + + +- A subnet address (CIDR or address mask). Discontiguous masks are supported. + +- A host IP address (dotted decimal notation.) + +- Using ***any*** to denote that the rule matches all destination addresses. + + +All rules in IPv4 ACLs ***may*** include the following criteria: + + +- **Fragment**: Rules filter on the fragment bit. + +- **Time-to-live**: Compares the packet TTL (time-to-live) value to a specified value and is valid in ACLs applied to the control plane. The validity of ACLs applied to the data plane varies by switch platform. Comparison options include: + + +- **Equal:** Packets match if the packet value equals the statement value. + +- **Greater than:** Packets match if the packet value is greater than the statement value. + +- **Less than:** Packets match if the packet value is less than the statement value. + +- **Not equal:** Packets match if the packet value does not equal the statement value. + + +The availability of the following optional criteria depends on the specified protocol: + + +- **Source Ports / Destination Ports**: A rule filters on ports when the specified protocol supports IP address-port combinations. Rules provide one of these port filtering values: + + +- Using ***any*** denotes that the rule matches all ports. + +- A list of ports that matches the packet port. The maximum list size is 10 ports. + +- Negative port list. The rule matches any port not in the list. The maximum list size is 10 ports. + +- Integer (lower bound): The rule matches any port with a number larger than the integer. + +- Integer (upper bound): The rule matches any port with a number smaller than the integer. + +- Range integers: The rule matches any port whose number is between the integers. + +- **Flag bits**: Rules filter TCP packets on flag bits. + +- **Message type**: Rules filter ICMP type or code. + +- **Tracked**: Matches packets in existing ICMP, UDP, or TCP connections and is valid in ACLs applied to the control plane. The validity of ACLs applied to the data plane varies by switch platform. + + +##### IPv6 ACL Rule Parameters + + +Note: When calculating the size of ACLs, be aware that Arista switches install four rules in every IPv6 ACL so that ICMPv6 neighbor discovery packets bypass the default drop rule. + + +All rules in IPv6 ACLs include the following criteria: + + +- **Protocol**: All rules filter on the packet’s IP protocol field. Rule input options include: + + +- Protocol name for a limited set of common protocols. + +- Assigned protocol number for all IP protocols. + +- **Source Address**: The packet’s source IPv6 address. Valid rule inputs include: + + +- An IPv6 prefix (CIDR). Discontiguous masks are supported. + +- A host IP address (dotted decimal notation). + +- Using ***any*** to denote that the rule matches all addresses. + +- **Destination Address**: The packet’s destination IP address. Valid rule inputs include: + + +- A subnet address (CIDR or address mask). Discontiguous masks are supported. + +- A host IP address (dotted decimal notation). + +- Using ***any*** to denote that the rule matches all addresses. + + +All rules in IPv6 ACLs ***may*** include the following criteria: + + +- **Fragment**: Rules filter on the fragment bit. + +- ***HOP***     Compares the packet’s hop-limit value to a specified value. Comparison options include: + + +- **eq**: Packets match if the hop-limit value equals the statement value. + +- **gt**: Packets match if the hop-limit value is greater than the statement value. + +- **lt**: Packets match if the hop-limit value is less than the statement value. + +- **neq**: Packets match if the hop-limit value is not equal to the statement value. + + +The availability of the following optional criteria depends on the specified protocol: + + +- **Source Ports / Destination Ports**: A rule filters on ports when the specified protocol supports IP address-port combinations. Rules provide one of these port filtering values: + + +- Using ***any*** denotes that the rule matches all ports. + +- A list of ports that matches the packet port. The maximum list size is 10 ports. + +- Negative port list. The rule matches any port not in the list. The maximum list size is 10 ports. + +- Integer (lower bound): The rule matches any port with a number larger than the integer. + +- Integer (upper bound): The rule matches any port with a number smaller than the integer. + +- Range integers: The rule matches any port whose number is between the integers. + +- **Flag bits**: Rules filter TCP packets on flag bits. + +- **Message type**: Rules filter ICMP type or code. + +- **Tracked**: Matches packets in existing ICMP, UDP, or TCP connections and is valid in ACLs applied to the control plane. The validity of ACLs applied to the data plane varies by switch platform. + + +##### Standard IPv4 and IPv6 ACL Rule Parameters + + +Note: When calculating the size of ACLs, be aware that Arista switches install four rules in every IPv6 ACL so that ICMPv6 neighbor discovery packets bypass the default drop rule. + + +Standard ACLs filter only on the source address. + + +##### MAC ACL Rule Parameters + + +MAC ACLs filter traffic on a packet’s layer 2 header. Criteria that MAC ACLs use to filter packets include: + + +- **Source Address** and **Mask**: The packet’s source MAC address. Valid rule inputs include: + + +- MAC address range (address mask in 3x4 dotted hexadecimal notation). + +- Using ***any*** to denote that the rule matches all source addresses. + +- **Destination Address** and **Mask**: The packet’s destination MAC address. Valid rule inputs include: + + +- MAC address range (address mask in 3x4 dotted hexadecimal notation). + +- Using ***any*** to denote that the rule matches all destination addresses. + +- **Protocol**: The packet’s protocol as specified by its EtherType field contents. Valid inputs include: + + +- Protocol name for a limited set of common protocols. + +- Assigned protocol number for all protocols. + + +#### Creating and Modifying Lists + + +The switch provides configuration modes for creating and modifying ACLs. The command that enters an ACL configuration mode specifies the name of the list that the mode modifies. When the configuration mode is exited, the switch saves the list to the running configuration. + + +- ACLs are created and modified in ACL configuration mode. + +- Standard ACLs are created and modified in Standard-ACL-configuration mode. + +- MAC ACLs are created and modified in MAC-ACL-configuration mode. + + +Lists created in one mode cannot be modified in any other mode. + + +A sequence number determines a rule's position within a list. New rules are inserted into a list based on their sequence numbers. You can reference a rule's sequence number to delete it from a list. + + +ACL Configuration describes procedures for configuring ACLs. + + +#### Implementing Access Control Lists + + +Implement an Access Control List (ACL) by assigning the list to an Ethernet interface, subinterface, port channel interface, or control plane. The switch assigns a default ACL to the control plane unless the configuration contains a valid control-plane ACL assignment statement. Ethernet and port-channel interfaces are not assigned an ACL by default. Apply standard ACLs to interfaces in the same manner as other ACLs. + + +IPv4 and MAC ACLs are separately applied for inbound and outbound packets. An interface or subinterface can be assigned multiple ACLs, with a limit of one ACL per packet direction per ACL type. A subset of all available switches supports Egress ACLs. The control plane does not support egress ACLs. + + +Applying ACLs describes procedures for applying ACLs to interfaces or the control plane. + + +#### ACL Rule Tracking + + +ACL rule tracking determines how ACL rules impact traffic on the interfaces where those rules are applied. ACLs provide two tracking mechanisms: + + +- **ACL logging**: Logs a syslog entry when a packet matches specified ACL rules. + +- **ACL counters**: ACL counters increment when a packet matches a rule in specified ACLs. + + +##### ACL Logging + + +ACL rules provide a **log** option that produces a log message when a packet matches the rule. ACL logging creates a syslog entry when a packet matches an ACL rule where logging is enabled. Packets that match a logging-enabled ACL rule are copied to the CPU by the hardware. These packets trigger the creation of a syslog entry. The information provided in the entry depends on the ACL type or the protocol specified by the ACL. The system applies hardware rate limiting to packets written to the CPU, which prevents potential Denial-of-Service attacks. The logging rate is also limited in software to avoid creating syslog lists that are too large for human operators to use in practical ways. + + +ACL Rule Tracking Configuration describes procedures for configuring and enabling ACL logging. + + +##### ACL Counters + + +The system assigns an ACL counter to each ACL rule. The activity of the ACL counters for rules within a list depends on the list’s counter state. When the list is in a counting state, the ACL counter of a rule increments when the rule matches a packet. When the list is in a non-counting state, the counter does not increment. A list’s counter state applies to all rules in the ACL. The default state for new ACLs is non-counting. + + +The system maintains the values of the counters for all rules in the list when an ACL changes from a counting state to a non-counting state or is no longer applied to any interfaces that increment counters. The counters do not reset. When the ACL returns to counting mode or is applied to an interface that increments counters, the counter operation continues from its last value. + + +Counters never decrement and are reset only through CLI commands. + + +ACL Rule Tracking Configuration describes procedures for configuring and enabling ACL counters. + + +#### Egress ACL Counters + + +Egress ACL counters count the number of packets matching rules associated with egress ACLs applied to various interfaces in a switch. 7050 and 7060 series switches maintain these counters for every TCAM rule. On these platforms, commands such as **show platform trident tcam**, **show platform trident counters**, and **show ip access-list** always display packet counters greater than zero. + + +Other switches do not enable counters by default. You must configure counters for each ACL. The **show hardware counter** and **show ip access-list** commands display the counters. + + +##### Configuring Egress ACL Counters + + +7050 and 7060 series switches enable egress ACL counters and do not require configuration. + + +For other platforms, to enable egress ACL counters for a specific ACL, use the **counters per-entry** command in the ACL's configuration mode. + + +**Example** + + +As shown in the following example, configure the **counters per-entry** command in the ACL configuration mode. + + +``` +`switch(config)# **ip access-list acl1** +switch(config-acl-acl1)# **counters per-entry**` +``` + + +Enabling Egress Counters Globally + +7050 and 7060 series switches enable egress counters. + + +For other switches, enable IPv4 and IPv6 egress ACL counters in the global configuration mode using the **hardware counter feature acl out** command. + + +**Example** + + +The following examples show how to enable IPv4 and IPv6 egress ACL counters: + +``` +`switch(config)# **hardware counter feature acl out ipv4** +switch(config)#` +``` + + +``` +`switch(config)# **hardware counter feature acl out ipv6** +switch(config)#` +``` + + +Disabling Egress Counters Globally + +For 7050 and 7060 series switches, egress counters cannot be disabled. + + +For other switches, disable IPv4 and IPv6 egress ACL counters in the global configuration mode by using the **hardware counter feature acl out** command. + + +The following examples show how to disable IPv4 and IPv6 egress ACL counters: + + +``` +`switch(config)# **no hardware counter feature acl out ipv4** +switch(config)#` +``` + + +``` +`switch(config)# **no hardware counter feature acl out ipv6** +switch(config)#` +``` + + +Egress Counter Roll Over in the Global Mode + +The counters roll over when the counter value for an ACL rule exceeds **2^64** (2 to the power of 64). + + +**Example** + + +In the following example, the **hardware counter feature acl ipv6 out** command is configured using units and packets. + + +``` +`switch(config)# **hardware counter feature acl ipv6 out units packets** +switch(config)#` +``` + + +The **clear ip access-lists counters** command clears the counters for all of the IPv4 ACLs or a specific IPv4 ACL, either globally or per CLI session. + + +**Example** + + +In the following example the ACL list named **red** is selected. + + +``` +`switch(config)# **clear ip access-list counters red session** +switch(config)#` +``` + + +The IPv6 egress ACL counters do not work in unshared mode. + + +**Example** + + +Use the **hardware access-lists resource sharing vlan ipv6 out** command to enable egress IPv6 ACL sharing. + + +``` +`switch(config)# **hardware access-list resource sharing vlan ipv6 out** +switch(config)#` +``` + + +The **clear ipv6 access-list counters** command clears the counters for all of the IPv6 ACLs or a specific IPv6 ACL, either globally or per CLI session. + + +**Example** + + +In the following example the ACL list named **green** is selected. + + +``` +`switch(config)# **clear ipv6 access-list counters green session** +switch(config)#` +``` + + +##### Displaying Egress ACL Counters + + +Use the following **show** commands to display information on Egress ACL Counters. + + +Use the **show ip access-lists** command to display all the IPv4 ACLs, or a specific IPv4 ACL configured in a switch. The output contains details such as ACL rules and counter values for each rule. + +``` +`switch(config)# **show ip access-list acl1** +IP Access List acl1 + counter per-entry + 10 deny ip 11.1.1.0/24 any dscp af11 + 20 deny ip any any [match 39080716, 0:00:00 ago] + + Total rules configured: 2 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1` +``` + + +Use the **show ipv6 access-lists** command to display all the IPv6 ACLs or a specific IPv6 ACL configured in a switch. The output contains details such as rules in an ACL and respective counter values with each rule. + +``` +`switch(config)# **show ipv6 access-list acl1** +IPV6 Access List acl1 + counter per-entry + 10 permit ipv6 any any [match 3450000, 0:00:10 ago] + 20 deny ipv6 any any + + Total rules configured: 2 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1` +``` + + +The counter name **EgressAclDropCounter** in the output of this show command signifies the aggregate counter value for the remaining egress IPv4 ACL. In this example, the deny rules, with per-rule counters, do not allocate. No per-rule counters are allocated if you do not configure the **counter per-entry** parameter for the respective ACL. + +``` +`switch(config)# **show hardware counter drop** +Summary: +Total Adverse (A) Drops: 0 +Total Congestion (C) Drops: 0 +Total Packet Processor (P) Drops: 250 +Type Chip CounterName : Count : First Occurrence : Last Occurrence +------------------------------------------------------------------------------- +P Fap0 **EgressAclDropCounter** : 250 : 2015-11-11 22:39:02 : 2015-11-11 22:51:44` +``` + + +### ACL Configuration + + +You create and modify Access Control Lists (ACLs) in an ACL-configuration mode. You can edit a list only in the mode where you created it. The switch provides five configuration modes for creating and modifying access control lists: + + +- **ACL configuration mode** for IPv4 access control lists. + +- **IPv6-ACL configuration mode** for IPv6 access control lists. + +- **Std-ACL configuration mode** for Standard IPv4 access control lists. + +- **Std-IPv6-ACL configuration mode** for Standard IPv6 access control lists. + +- **MAC-ACL configuration mode** for MAC access control lists. + + +These sections describe the creation and modification of ACLs: + + +- Managing ACLs + +- Modifying an ACL + +- ACL Rule Tracking Configuration + +- Displaying ACLs + +- Configuring Per-Port Per-VLAN QoS + +- Displaying Per-Port Per-VLAN QoS + +- Configuring Mirror Access Control Lists + + +#### Managing ACLs + + +##### Creating and Opening a List + + +To create an ACL, enter one of the following commands, followed by the name of the list: + + +- **ip access-list** for IPv4 ACLs. + +- **ipv6 access-list** for IPv6 ACLs. + +- **ip access-list standard** for standard IPv4 ACLs. + +- **ipv6 access-list standard** for standard IPv6 ACLs. + +- **mac access-list** for MAC ACLs. + + +The switch enters the appropriate ACL Configuration Mode for the list. When adding the name of an existing ACL to the command, subsequent commands edit that list (see Modifying an ACL for additional information). + + +##### Examples + + +- This command places the switch in ACL Configuration Mode to create an ACL named **test1**. + +``` +`switch(config)# **ip access-list test1** +switch(config-acl-test1)#` +``` + +- This command places the switch in ACL Configuration Mode to create a Standard ACL named **stest1**. + +``` +`switch(config)# **ip access-list standard stest1** +switch(config-std-acl-stest1)#` +``` + +- This command places the switch in ACL Configuration Mode to create an MAC ACL named **mtest1**. + +``` +`switch(config)# **mac access-list mtest1** +switch(config-mac-acl-mtest1)#` +``` + + +##### Saving List Modifications + + +ACL Configuration Modes are group-change modes. Changes made in a group-change mode are saved when exiting the mode. To discard changes, use the **abort** command instead of **exit**. + + +##### Examples + + +- Use the following commands to configure the first three rules into a new ACL. + +``` +`switch(config-acl-test1)# **permit ip 10.10.10.0/24 any** +switch(config-acl-test1)# **permit ip any host 10.20.10.1** +switch(config-acl-test1)# **deny ip host 10.10.10.1 host 10.20.10.1**` +``` + +- To view the edited list, use the **show** command. + + +``` +`switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip 10.30.10.0/24 host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any` +``` + + +Because EOS has not saved the changes, the ACL remains empty, as displayed by **show ip access-lists**. + + +``` +`switch(config-acl-test1)# **show ip access-lists test1** +switch(config-acl-test1)#` +``` + + +Use the **exit** command to save all current changes to the ACL and exit the ACL configuration mode. + + +``` +`switch(config-acl-test1)# **exit** +switch(config)# **show ip access-lists test1** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip 10.30.10.0/24 host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any + +Total rules configured: 4 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1` +``` + + +To apply the ACL **test1** on an interface, **Ethernet1/1**, for example, and on the ingress direction, use the following command: + +``` +`switch(config)# **int et1/1** +switch(config-if-Et1/1)# **ip access-group test1 in**` +``` + + +Use the **exit** command to save all changes to the Ethernet interface and exit the interface configuration mode. + +``` +`switch(config-if-Et1/1)# **exit** +switch(config)# +switch(config)# **show ip access-lists test1** +IP Access List test1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip 10.30.10.0/24 host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + +Total rules configured: 4 +Configured on Ingress: Et1/1 +Active on Ingress: Et1/1` +``` + + +##### Discarding List Changes + + +The **abort** command exits ACL Configuration mode without saving pending changes. + + +##### Examples + + +- The following commands enter the first three rules into a new ACL. + +``` +`switch(config-acl-test1)# **permit ip 10.10.10.0/24 any** +switch(config-acl-test1)# **permit ip any host 10.20.10.1** +switch(config-acl-test1)# **deny ip host 10.10.10.1 host 10.20.10.1**` +``` + +- To view the edited list, use the **show** command. + + +``` +`switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip 10.30.10.0/24 host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any` +``` + + +To discard the changes, use the **abort** command. If the ACL existed before entering the ACL Configuration Mode, the **abort** command restores the version that existed before entering the ACL Configuration Mode. Otherwise, the **show ip access-lists** command displays no output. + + +``` +`switch(config-acl-test1)# **abort** +switch(config)#` +``` + + +#### Modifying an ACL + + +An existing ACL, including those applied to interfaces, can be modified by entering the appropriate configuration mode for the ACL as described in Creating and Opening a List. By default, while modifying an ACL, all traffic is blocked on any interface using the ACL. + + +##### Permit All Traffic During ACL Update + + +To avoid packet loss and interference with features like routing and dynamic NAT, you can configure the following switches to permit **all** traffic on Ethernet and VLAN interfaces during ACL modifications: + + +- 7050X + +- 7060X + +- 7150 + +- 7250X + +- 7280 + +- 7280R + +- 7300X + +- 7320X + +- 7500 series switches + + +Use the **hardware access-list update default-result permit** command to configure the preceding switches. + + +The following commands add **`deny`** rules to the appropriate ACL: + + +- **deny (IPv4 ACL)** adds a deny rule to an IPv4 ACL. + +- **deny (IPv6 ACL)** adds a deny rule to an IPv6 ACL. + +- **deny (Standard IPv4 ACL)** adds a deny rule to an IPv4 standard ACL. + +- **deny (Standard IPv6 ACL)** adds a deny rule to an IPv6 standard ACL. + +- **deny (MAC ACL)** adds a deny rule to a MAC ACL. + + +The following commands add **`permit`** rules to the appropriate ACL: + + +- **permit (IPv4 ACL)** adds a permit rule to an IPv4 ACL. + +- **permit (IPv6 ACL)** adds a permit rule to an IPv6 ACL. + +- **permit (Standard IPv4 ACL)** adds a permit rule to an IPv4 standard ACL. + +- **permit (Standard IPv6 ACL)** adds a permit rule to an IPv6 standard ACL. + +- **permit (MAC ACL)** adds a permit rule to a MAC ACL. + + +##### Adding a Rule + + +To append a rule to the end of a list, enter the rule without a sequence number while in ACL configuration mode for the list. The switch computes the new rule’s sequence number by adding **10** to the last rule’s sequence number. + + +##### Examples + + +- The following command configures the switch to permit all traffic during ACL modifications on interfaces using the ACL. The rules in modified ACLs go into effect after exiting ACL configuration mode and after populating the ACL rules in hardware. + +``` +`switch(config)# **hardware access-list update default-result permit**` +``` + +- The following commands enter the first three rules into a new ACL. + +``` +`switch(config-acl-test1)# **permit ip 10.10.10.0/24 any** +switch(config-acl-test1)# **permit ip any host 10.20.10.1** +switch(config-acl-test1)# **deny ip host 10.10.10.1 host 10.20.10.1**` +``` + +- To view the edited list, use the **show** command. + + +``` +`switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip any host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1` +``` + +- The following command appends a rule to the ACL. The new rule’s sequence number is **40**. + +``` +`switch(config-acl-test1)# **permit ip any any** +switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip any host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any` +``` + + +##### Inserting a Rule + + +To insert a rule into an ACL, enter the rule with a sequence number between the existing rules’ numbers. + + +##### Example + + +The following command inserts a rule between the first two by assigning the sequence number **15**. + +``` +`Switch(config-acl-test1)# **15 permit ip 10.30.10.0/24 host 10.20.10.1** +Switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        15 permit ip 10.30.10.0/24 host 10.20.10.1 +        20 permit ip any host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any` +``` + + +##### Deleting a Rule + + +To remove a rule from the current ACL, perform one of these commands: + + +- Enter **no**, followed by the sequence number to delete a rule. + +- Enter **no**, followed by the actual rule to delete it. + +- Enter **default**, followed by the actual rule to delete it. + + +##### Examples + + +- These equivalent commands remove rule **20** from the list. + +``` +`switch(config-acl-test1)# **no 20** +switch(config-acl-test1)# **no permit ip any host 10.20.10.1** +switch(config-acl-test1)# **default permit ip any host 10.20.10.1**` +``` + +- This ACL results from entering one of the preceding commands. + + +``` +`switch(config-acl-test1)# **show** +ip access list test1 +        10 permit ip 10.10.10.0/24 any +        15 permit ip 10.30.10.0/24 host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any` +``` + + +##### Resequencing Rule Numbers + + +Sequence numbers determine the order of the rules in an ACL. After editing a list and deleting existing rules while inserting new rules between existing rules, the sequence number distribution may not be uniform. Resequencing rule numbers changes the sequence number of rules to provide a constant difference between adjacent rules. The **resequence (ACLs)** command adjusts the sequence numbers of ACL rules. + + +##### Example + + +The **resequence (ACLs)** command renumbers rules in the test1 ACL. The sequence number of the first rule is **100**; subsequent rule numbers are incremented by **20**. + +``` +`switch(config-acl-test1)# **show** +IP Access List test1 +        10 permit ip 10.10.10.0/24 any +        25 permit ip any host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        50 permit ip any any +        90 remark end of list +switch(config-acl-test1)# **resequence 100 20** +switch(config-acl-test1)# **show** +IP Access List test1 +        100 permit ip 10.10.10.0/24 any +        120 permit ip any host 10.20.10.1 +        140 deny ip host 10.10.10.1 host 10.20.10.1 +        160 permit ip any any +        180 remark end of list` +``` + + +#### ACL Rule Tracking Configuration + + +ACL Rules provide a **log** option that produces a syslog message about the packets matching a desired packet. ACL Logging creates a syslog entry when a packet matches an ACL rule with logging enabled. + + +##### Example + + +The following command creates an ACL Rule with logging enabled. + +``` +`switch(config-acl-test1)# **15 permit ip 10.30.10.0/24 host 10.20.10.1 log** +switch(config-acl-test1)#` +``` + + +The format of the generated Syslog message depends on the ACL type and the specified protocol: + + +- Messages generated by a TCP or UDP packet matching an IP ACL: + +**IPACCESS: list** *acl   intf*  *filter* *protocol* *src-ip*(*src_port*)  **->**   *dst-ip*(*dst_port*) + +- Messages generated by ICMP packets matching an IP ACL: + +**IPACCESS: list** *acl   intf* *filter* **icmp** *src-ip*(*src-port*)   **->**   *dst-ip*(*dst-port*) **type=** *n* **code=** *m* + +- Messages generated by all other IP packets matching an IP ACL: + +**IPACCESS: list** *acl   intf   filter* *protocol* *src-ip* **->** *dst-ip* + +- Messages generated by packets matching a MAC ACL: + +**MACACCESS: list** *acl   intf* *filter* *vlan* *ether* *src_mac* **->**   *dst_mac* + +- Messages generated by a TCP or UDP packet matching a MAC ACL: + +**MACACCESS: list** *acl  intf* *filter* *vlan* *ether* *ip-prt   src-mac* *src-ip* **:** *src-prt* **->** *dst-mac* *dst-ip* **:** *dst-prt* + +- Messages generated by any other IP packet matching a MAC ACL: + +**MACACCESS: list** *acl  intf* *filter**vlan* *ether* *src_mac* *src_ip* **->** *dst_mac* *dst_ip* + + +Variables in the Syslog messages display the following values: + + +- **acl** - Specifies the name of the ACL. + +- **intf** - Specifies the name of the interface receiving the packet. + +- **filter** - Specifies the action triggered by the ACL as **denied** or **permitted**. + +- **protocol** - Specifies the IP protocol specified by the packet. + +- **vlan** - Specifies the number of the VLAN receiving the packet. + +- **ether** - Specifies the EtherType protocol specified by the packet. + +- **src-ip** and **dst-ip** - Specifies the source and destination IP addresses. + +- **src-prt** and **dst-prt** - Specifies the source and destination ports. + +- **src-mac** and **dst-mac** - Specifies the source and destination MAC addresses. + + +ACLs provide a command that configures as counter state as counting or non-counting. The counter state applies to all rules in the ACL. The initial state for new ACLs defaults to non-counting. + + +The **counters per-entry (ACL configuration modes)** command places the ACL in counting mode. + + +The following command places the configuration mode ACL in counting mode. + +``` +`switch(config-acl-test1)# **counters per-entry** +switch(config-acl-test1)# **exit** +switch(config-acl-test1)# **show ip access-list test1** +IP Access List test1 +        counters per-entry +        10 permit ip 10.10.10.0/24 any +        20 permit ip any host 10.20.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any +        50 remark end of list +Total rules configured: 5 + Configured on Ingress: Et1 + Active on Ingress: Et1` +``` + + +The **clear ip access-lists counters** and **clear ipv6 access-lists counters** commands set the IP access list counters to zero for the specified IP access list. + + +The following command clears the ACL counter for the **test1** ACL. + +``` +`switch(config)# **clear ip access-lists counters test1** +switch(config)#` +``` + + +#### Displaying ACLs + + +Display ACLs using the **show running-config** command. The **show ip access-lists** command also displays ACL rosters and contents as specified by command parameters. + + +When editing an ACL, the **show (ACL configuration modes)** command displays the current or pending list as specified by command parameters. + + +##### Displaying a List of ACLs + + +To display the roster of ACLs on the switch, use the **show [ip | ipv6 | mac] access-lists** command with the **summary** option. + + +##### Example + + +The following command lists the available IPv4 access control lists. + +``` +`switch(config)# **show ip access-lists summary** +IPV4 ACL default-control-plane-acl + Total rules configured: 12 + Configured on: control-plane + Active on : control-plane + +IPV4 ACL list2 + Total rules configured: 3 + +IPV4 ACL test1 + Total rules configured: 6 + +IPV4 ACL test_1 + Total rules configured: 1 + +IPV4 ACL test_3 + Total rules configured: 0 +switch(config)#` +``` + + +##### Displaying Contents of an ACL + + +These commands display ACL contents. + + +- **show access-lists** + +- **show ip access-lists** + +- **show ipv6 access-lists** + +- **show mac access-lists** + + +Each command can display the contents of one ACL or of all ACLs of the type specified by the command: + + +- To display the contents of one ACL, enter **show** **acl_type** **access-lists** followed by the name of the ACL. The **acl_type** can be **ip, ipv6, mac** or null. + +- To display the contents of all ACLs on the switch, enter the command without any options. + + +ACLs in counting mode display the number of inbound packets matching each rule in the list and the elapsed time since the last match. + + +##### Examples + + +- The following command displays the rules in the **default-control-plane-acl IP ACL**, configuration, and status. + +``` +`switch# **show ip access-lists default-control-plane-acl** +IP Access List default-control-plane-acl [readonly] +        counters per-entry +        10 permit icmp any any +        20 permit ip any any tracked [match 1725, 0:00:00 ago] +        30 permit ospf any any +        40 permit tcp any any eq ssh telnet www snmp bgp https +        50 permit udp any any eq bootps bootpc snmp [match 993, 0:00:29 ago] +        60 permit tcp any any eq mlag ttl eq 255 +        70 permit udp any any eq mlag ttl eq 255 +        80 permit vrrp any any +        90 permit ahp any any +        100 permit pim any any +        110 permit igmp any any [match 1316, 0:00:23 ago] +        120 permit tcp any any range 5900 5910 +Total rules configured: 12 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF)` +``` + +- The following command displays the rules, configuration, and status of all the IP ACLs on the switch. + +``` +`switch# **show ip access-lists** +IP Access List default-control-plane-acl [readonly] + counters per-entry + 10 permit icmp any any + 20 permit ip any any tracked [match 1371, 0:00:00 ago] + 30 permit ospf any any + 40 permit tcp any any eq ssh telnet www snmp bgp https + 50 permit udp any any eq bootps bootpc snmp + 60 permit tcp any any eq mlag ttl eq 255 + 70 permit udp any any eq mlag ttl eq 255 + 80 permit vrrp any any + 90 permit ahp any any + 100 permit pim any any + 110 permit igmp any any [match 1316, 0:00:23 ago] + 120 permit tcp any any range 5900 5910 + + Total rules configured: 12 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IP Access List list2 + 10 permit ip 10.10.10.0/24 any + 20 permit ip 10.30.10.0/24 host 10.20.10.1 + 30 permit ip any host 10.20.10.1 + 40 deny ip host 10.10.10.1 host 10.20.10.1 + 50 permit ip any any + + Total rules configured: 5 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + + +IP Access List test1 +switch(config)#` +``` + + +##### Displaying ACL Modifications + + +While editing an ACL in ACL Configuration Mode, the show command provides options for displaying ACL contents. + + +- To display the list, as modified in ACL configuration mode, enter **show** or **show pending**. + +- To display the list, as stored in ***running-config***, enter **show active**. + +- To display differences between the pending list and the stored list, enter **show diff**. + + +##### Examples + + +The examples in this section display previously configured ACL commands. + + +The configuration stores these parameters: + + +``` +`10 permit ip 10.10.10.0/24 any +20 permit ip any host 10.21.10.1 +30 deny ip host 10.10.10.1 host 10.20.10.1 +40 permit ip any any +50 remark end of list` +``` + + +The current edit session removed this command, and the change not yet stored to the ***running-config***: + + +``` +`20 permit ip any host 10.21.10.1` +``` + + +The current edit session added these commands to the ACL, and the change not yet stored to the ***running-config***: + + +``` +`20 permit ip 10.10.0.0/16 any +25 permit tcp 10.10.20.0/24 any +45 deny pim 239.24.124.0/24 10.5.8.4/30` +``` + + +The following command displays the pending ACL as modified in the ACL Configuration Mode. + +``` +`switch(config-acl-test_1)# **show pending** +IP Access List test_1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip 10.10.0.0/16 any +        25 permit tcp 10.10.20.0/24 any +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any +        45 deny pim 239.24.124.0/24 10.5.8.4/30 +        50 remark end of list` +``` + + +The following command displays the ACL as stored in the configuration. + +``` +`switch(config-acl-test_1)# **show active** +IP Access List test_1 +        10 permit ip 10.10.10.0/24 any +        20 permit ip any host 10.21.10.1 +        30 deny ip host 10.10.10.1 host 10.20.10.1 +        40 permit ip any any +        50 remark end of list` +``` + + +The following command displays the difference between the saved and modified ACLs. + + +- A plus sign (**+**) denotes rules added to the pending list. + +- A minus sign (**-**) denotes rules removed from the saved list. + +``` +`switch(config-acl-test_1)# **show diff** +--- ++++ +@@ -1,7 +1,9 @@ + IP Access List test_1 +         10 permit ip 10.10.10.0/24 any +-        20 permit ip any host 10.21.10.1 ++        20 permit ip 10.10.0.0/16 any ++        25 permit tcp 10.10.20.0/24 any +         30 deny ip host 10.10.10.1 host 10.20.10.1 +         40 permit ip any any ++        45 deny pim 239.24.124.0/24 10.5.8.4/30` +``` + + +##### Displaying Egress ACL Counters + + +The following **show** commands display Egress ACL Counters information. + + +Use the **show ip access-lists** command to display all the IPv4 ACLs, or a specific IPv4 ACL configured in a switch. The output contains details such as rules in an ACL as well as the respective counter values with each rule, configuration, and status. + +``` +`switch(config)# **show ip access-list acl1** +IP Access List acl1 + counter per-entry + 10 deny ip 11.1.1.0/24 any dscp af11 + 20 deny ip any any [match 39080716, 0:00:00 ago] + + Total rules configured: 2 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1` +``` + + +Use the **show ipv6 access-lists** command to display all the IPv6 ACLs or a specific IPv6 ACL configured in a switch. The output contains details such as rules in an ACL and the respective counter values with each rule along with the configuration and status. + +``` +`switch(config)# **show ipv6 access-list acl1** +IPV6 Access List acl1 + counter per-entry + 10 permit ipv6 any any [match 3450000, 0:00:10 ago] + 20 deny ipv6 any any + + Total rules configured: 2 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1` +``` + + +The counter name **EgressAclDropCounter** in the output of this show command signifies the aggregate counter value for the remaining egress IPv4 ACL. In this example, the deny rules, with per rule counters, are not allocated. The per-rule counters are not allocated when the user does not configure the counter per-entry parameter for the respective ACL. + +``` +`switch(config)# **show hardware counter drop** +Summary: +Total Adverse (A) Drops: 0 +Total Congestion (C) Drops: 0 +Total Packet Processor (P) Drops: 250 +Type Chip CounterName : Count : First Occurrence : Last Occurrence +------------------------------------------------------------------------------- +P Fap0 EgressAclDropCounter : 250 : 2015-11-11 22:39:02 : 2015-11-11 22:51:44` +``` + + +#### Configuring Per-Port Per-VLAN QoS + + +To configure per-port per-VLAN Quality of Service (QoS), first configure the ACL policing for QoS and then apply the policy map on a single Ethernet or port-channel interface on a per-port per-VLAN basis. The per port per VLAN QoS allows a class map to match traffic for a single VLAN or for a range of VLANs separated by commas. Per-port per-VLAN only works with QoS-based class maps. + + +To configure per-port per-VLAN QoS on DCS-7280(R) and DCS-7500(R), change the TCAM profile to QoS as shown in the following steps: + + +- Change the TCAM profile to QoS. + +``` +`switch# **config** +switch(config)# **hardware tcam profile qos**` +``` + +- Create an ACL and then match the traffic packets based on the VLAN value and the VLAN mask configured in the ACL. + +``` +`switch(config)# **ip access-list acl1** +switch(config-acl-acl1)# **permit vlan 100 0xfff ip any any** +switch(config-acl-acl1)# **exit**` +``` + +- Similarly, create a class map and then match the traffic packets based on the range of VLAN values configured in the class map. + +``` +`switch(config)# **class-map match-any class1** +switch(config-cmap-qos-class1)# **match vlan 20-40, 1000-1250, 2000** +switch(config-cmap-qos-class1)# **exit**` +``` + + +#### Displaying Per-Port Per-VLAN QoS + + +The following **show** commands display the status, traffic hit counts, TCAM profile information, and policy maps configured on an interface. + + +**Examples** + + +- The **show policy-map** command displays the policy-map information of the configured policy-map. + + +``` +`switch# **show policy-map policy1** +Service-policy policy1 +Class-map: class1 (match-any) +Match: ip access-group name acl1 +Police cir 512000 bps bc 96000 bytes +Class-map: class-default (match-any)` +``` + +- The **show policy-map interface** command displays the policy-map configured on an interface. + + +``` +`switch# **show policy-map interface ethernet 1** +Service-policy input: p1 +Hardware programming status: Successful +Class-map: c2001 (match-any) +Match: vlan 2001 0xfff +set dscp 4 +Class-map: c2002 (match-any) +Match: vlan 2002 0xfff +set dscp 8 +Class-map: c2003 (match-any) +Match: vlan 2003 0xfff +set dscp 12` +``` + + +#### Configuring Mirror Access Control Lists + + +Access Control Lists (ACLs) are configured to permit or deny traffic between source and destination ports on Strata-based platforms. Mirror ACLs are used in mirroring traffic by matching VLAN ID of the configured ACLs. Mirror ACLs are applied for IPv4, IPv6, and MAC ACLs. + + +Note:Mirror ACLs work only in the receiving direction. + + +**Examples** + + +- The following commands configure ACL to permit VLAN traffic between any source and destination host. + +``` +`switch(config)# **ip access-list acl1** +switch(config-acl-acl1)# **permit vlan 1234 0x0 ip any any**` +``` + +- The following commands configure monitor session **sess1** with **Ethernet 1** as source port and **Ethernet 2** as the destination port for an ingress ip **acl_1**. + +``` +`switch(config)# **monitor session sess1 source ethernet 1 rx ip access-group acl1** +switch(config)# **monitor session sess1 destination ethernet 2**` +``` + + +### Applying ACLs + + +Access Control Lists (ACLs) become active when assigned to an interface, subinterface, or control plane. This section describes the process of adding and removing ACL interface assignments. + + +#### Applying an ACL to an Interface + + +The switch must be in interface configuration mode to assign an ACL to an interface or subinterface. + + +- The **ip access-group** command applies the specified IP or standard IP ACL to the configuration mode interface or subinterface. + +- The **ip access-group** command applies the specified IP or standard IP ACL to the control plane traffic. + +- The **mac access-group** command applies the specified MAC ACL to the configuration mode interface. + + +IPv4, IPv6, and MAC ACLs are separately applied for inbound and outbound packets. You can assign an interface or subinterface with multiple ACLs, with a limit of one ACL per packet direction per ACL type. A subset of all available switches support Egress ACLs. IPv6 egress ACLs have limited availability, and IPv6 egress ACLs applied to routed interfaces or subinterfaces across the same chip on the DCS-7500E and the DCS-7280E series can be shared. In addition to that, the DSCP value can match on IPv6 egress ACLs. This ability results in more efficient utilization of system resources and is particularly useful for environments with few, potentially large, IPv6 egress ACLs applied across multiple routed interfaces. + + +#### Examples + + +- These commands assign **test1** ACL to **interface ethernet 3**, and verify the assignment. + +``` +`switch(config)# **interface ethernet 3** +switch(config-if-Et3)# **ip access-group test1 in** +switch(config-if-Et3)# **show running-config interfaces ethernet 3** +interface Ethernet3 +   ip access-group test1 in +switch(config-if-Et3)#` +``` + +- The following commands place the switch in control plane configuration mode and applies the ACL assignment to the control plane traffic. + +``` +`switch(config)# **control-plane** +switch(config-cp)# **ip access-group test_cp in**` +``` + +- The following command enables shared ACLs. + +``` +`switch(config)# **hardware access-list resource sharing vlan ipv6 out** +switch(config)#` +``` + +- The following command disables shared ACLs. + +``` +`switch(config)# **no hardware access-list resource sharing vlan ipv6 out** +switch(config)#` +``` + +- The following commands apply an IPv4 ACL named **test_ACL** to ingress traffic on **interface ethernet 5.1**. + +``` +`switch(config)# **interface ethernet 5.1** +switch(config-if-Et5.1)# **ipv4 access-group test_ACL in** +switch(config-if-Et5.1)#` +``` + + +#### Removing an ACL from an Interface + + +The **no ip access-group** command removes an IP ACL assignment statement from ***running-config*** for the configuration mode interface. After removing an ACL, the interface is no longer associated with an IP ACL. + + +The **no mac ip access-group** command removes a MAC ACL assignment statement from ***running-config*** for the configuration mode interface. After removing a MAC ACL is removed, the interface is no longer associated with an MAC ACL. + + +To remove an ACL from the control plane, enter the **no ip access-group** command in control plane configuration mode. Removing the control plane ACL command from ***running-config*** reinstates **default-control-plane-acl** as the control plane ACL. + + +#### Examples + + +- The following commands remove the assigned IPv4 ACL from **interface ethernet 3**. + +``` +`switch(config)# **interface ethernet 3** +switch(config-if-Et3)# **no ip access-group test in** +switch(config-if-Et3)#` +``` + +- The following commands place the switch in control plane configuration mode and remove the ACL assignment from ***running-config***, restoring **default-control-plane-acl** as the control plane ACL. + +``` +`switch(config)# **control-plane** +switch(config-cp)# **no ip access-group test_cp in** +switch(config-cp)#` +``` + + +## Service ACLs + + +These sections describe Service ACLs: + + +- Service Access Control List Description + +- Configuring Service ACLs and Displaying Status and Counters + + +### Service Access Control List Description + + +Service ACL enforcement is a feature added to a control plane service (the SSH server, the SNMP server, routing protocols, etc.) that allows the switch administrator to restrict the processing of packets and connections by the control plane processes that implement that service. The control plane program run by the control plane process checks already received packets and connections against a user-configurable Access Control List (ACL), a Service ACL. + + +The Service ACL contains permit and deny rules matching any source address, destination address, and TCP or UDP ports of received packets or connections. After receiving a packet or connection, the control plane process evaluates the packet or connection against the rules of the Service ACL configured for the control plane process. If the received packet or connection matches a deny rule, the control plane process drops or closes it without further processing. + + +Control Plane Process Enforced Access Control enables the system administrator to restrict which systems on the network can access the services provided by the switch. Each service has its own access control list, giving the system administrator fine-grained control over access to the switch's control plane services. The CLI for this uses the familiar pattern of access control lists assigned for a specific purpose, in this case, for each control plane service. + + +### Configuring Service ACLs and Displaying Status and Counters + + +#### SSH Server + + +To apply the SSH Server Service ACLs for IPv4 and IPv6 traffic, use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands in **`config-mgt-ssh`** configuration mode: + + +``` +`switch(config)# **management ssh** +switch(config-mgmt-ssh)# **ip access-group [vrf ] in** +switch(config-mgmt-ssh)# **ipv6 access-group [vrf ] in**` +``` + + +In Release EOS-4.19.0, all VRFs are required to use the same SSH Server Service ACL. The Service ACL assigned without the **vrf** keyword is applied to all VRFs where the SSH Server is enabled. + + +Use the following commands to display the status and counters of the SSH Server Service ACLs: + + +``` +`switch# **show management ssh ip access-list** +switch# **show management ssh ipv6 access-list**` +``` + + +#### SNMP Server + + +Use the [**snmp-server community**](/um-eos/eos-snmp#xx1154277) command to apply the SNMP Server Service ACLs to restrict which hosts can access SNMP services on the switch: + + +**Example** + + +``` +`switch(config)# **snmp-server community** **community-name** [view **viewname**] [ro | rw] **acl_name**` +``` + + +``` +`switch(config)# **snmp-server community** **community-name** [view **viewname**] [ro | rw] ipv6 **ipv6_acl_name**` +``` + + +#### EAPI + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs to the EOS Application Programming Interface (EAPI) Server: + + +``` +`switch(config)# **management api http-commands** +switch(config-mgmt-api-http-cmds)# **vrf ** +switch(config-mgmt-api-http-cmds-vrf-)# **ip access-group ** +switch(config-mgmt-api-http-cmds-vrf-)# **ipv6 access-group **` +``` + + +Use the following commands to display the status and counters of the EAPI server Service ACLs: + + +``` +`switch# **show management api http-commands ip access-list** +switch# **show management api http-commands ipv6 access-list**` +``` + + +#### BGP + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs for controlling connections to the BGP routing protocol agent: + + +``` +`switch(config)# **router bgp ** +switch(config-router-bgp)# **ip access-group ** +switch(config-router-bgp)# **ipv6 access-group ** +switch(config-router-bgp)# **vrf ** +switch(config-router-bgp-vrf-)# **ip access-group ** +switch(config-router-bgp-vrf-)# **ipv6 access-group **` +``` + + +Use the following commands to display the status and counters of the BGP routing protocol Service ACLs: + + +``` +`switch# **show bgp ipv4 access-list** +switch# **show bgp ipv6 access-list**` +``` + + +#### UCMP Auto Adjust for BGP + + +Unequal Cost MultiPath (UCMP) for BGP forwards traffic based on weight assignments for next hops of Equal Cost MultiPath (ECMP) routes. The system programs the weights in the Forwarding Information Base (FIB). + + +Devices that receive BGP routes disseminate BGP link-bandwidth extended community attribute information. These devices then program the next hops in the FIB using the received link-bandwidth values. The system appends the percentage of interface speed to the received link bandwidth extended community value of the route. It adjusts the weight ratio of the traffic sent over egress ports to forward more traffic toward the peer with a higher interface speed. + + +##### Configuring UCMP Auto Adjust for BGP + + +The following command enables the weight adjustment and configures the adjust auto to **62.3** percent. + + +``` +`switch(config-router-bgp)# **neighbor group1 link-bandwidth adjust auto percent 62.3**` +``` + + +PERCENT is a float value between **0.0** and **100.0** and is optional. + + +#### OSPF + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs to control packets processed by the OSPF routing protocol agent: + + +**Example** + + +``` +`switch(config)# **router ospf ** +switch(config-router-ospf)# **ip access-group ** +switch(config-router-ospf)# **ipv6 access-group **` +``` + + +When using VRFs, each per VRF OSPF instance must be explicitly assigned its Service ACL. + + +Use the following commands to display the OSPF routing protocol Service ACLs' status and counters: + + +``` +`switch# **show ospf ipv4 access-list** +switch# **show ospf ipv6 access-list**` +``` + + +#### PIM + + +Use the **access-group** command to apply Service ACLs for controlling packets processed by the PIM routing protocol agent: + + +``` +`switch(config)# **router pim** +switch(config-router-pim)# **ipv4** +switch(config-router-pim-ipv4)# **access-group ** +switch(config-router-pim-ipv4)#**vrf ** +switch(config-router-pim-vrf-)# **ipv4** +switch(config-router-pim-vrf--ipv4)# **access-group **` +``` + + +Use the following command to display the status and counters of the PIM routing protocol Service ACLs. + + +``` +`switch# **show ip pim access-list**` +``` + + +#### IGMP + + +Use the **ip igmp access-group** command to apply Service ACLs for controlling packets processed by the IGMP management protocol agent: + + +``` +`switch(config)# **router igmp** +switch(config-router-igmp)# **ip igmp access-group ** +switch(config-router-igmp)# **vrf ** +switch(config-router-igmp-vrf-)# **ip igmp access-group **` +``` + + +Use the following command to display the status and counters of the IGMP management protocol Service ACLs. + + +``` +`switch# **show ip igmp access-list**` +``` + + +#### DHCP Relay + + +Use the **ip dhcp relay access-group** and **ipv6 dhcp relay access-group** commands to apply Service ACLs for controlling packets processed by the DHCP relay agent: + + +``` +`switch(config)# **ip dhcp relay access-group [vrf ]** +switch(config)# **ipv6 dhcp relay access-group [vrf ]**` +``` + + +Use the following commands to display the status and counters of the DHCP relay agent Service ACLs: + + +``` +`switch# **show ip dhcp relay access-list** +switch# **show ipv6 dhcp relay access-list**` +``` + + +#### LDP + + +Use the **ip access-group (Service ACLs)** to apply Service ACLs for controlling packets and connections processed by the LDP MPLS label distribution protocol: + + +``` +`switch(config)# **mpls ldp** +switch(config-mpls-ldp)# **ip access-group **` +``` + + +Use the following command to display the status and counters of the LDP Service ACLs. + + +``` +`switch# **show mpls ldp access-list**` +``` + + +#### LANZ + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs for controlling connections accepted by the LANZ agent: + + +``` +`switch(config)# **queue-monitor streaming** +switch(config-qm-streaming)# **ip access-group ** +switch(config-qm-streaming)# **ipv6 access-group **` +``` + + +Use the following command to display the status and counters of the LDP Service ACLs. + + +``` +`switch# **show queue-monitor streaming access-lists**` +``` + + +#### MPLS Ping and Traceroute + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs for controlling connections accepted by the MPLS Ping agent: + + +``` +`switch(config)# **mpls ping** +switch(config-mpls-ping)# **ip access-group [vrf ]** +switch(config-mpls-ping)# **ipv6 access-group [vrf ]**` +``` + + +#### Telnet Server + + +Use the **ip access-group (Service ACLs)** and **ipv6 access-group (Service ACLs)** commands to apply Service ACLs to the Telnet server: + + +``` +`switch(config)# **management telnet** +switch(config-mgmt-telnet)# **ip access-group [vrf ] in** +switch(config-mgmt-telnet)# **ipv6 access-group [vrf ] in**` +``` + + +In EOS 4.19.0, all VRFs are required to use the same Telnet server Service ACL. The Service ACL assigned without the **vrf** keyword is applied to all VRFs where the Telnet server is enabled. + + +Use the following commands to display the status and counters of the LDP Service ACLs: + + +``` +`switch# **show management telnet ip access-list** +switch# **show management telnet ipv6 access-list**` +``` + + +## Sub-interface ACLs + + +This Sub-interface ACLs feature enables ACL functionality on subinterfaces. + + +### **Configuring Sub-interface ACLs** + + +Configure the ACLs on subinterfaces using the following command. + + +``` +`**ip|ipv6 access-group** **acl-name** in | out` +``` + + +Use the following command to unconfigure the ACLs on subinterfaces. + + +``` +`**no ip|ipv6 access-group** in | out` +``` + + +### Configuring ACL Mirroring on a Subinterface Source + + +Configure a mirror session using subinterface sources and apply explicit ACLs to each source in the session. EOS only supports ingress mirroring from the Rx direction. + + +Use the following commands to configure a session, *ACLMirror1*, on *Ethernet5/1.1*, *Ethernet5/1.2*, *Ethernet6/1* as the source, *acl1* as the ACL group, and *Ethernet 14/1* as the destination: + + +``` +`switch(config)# **monitor session ACLMirror1 source Ethernet 5/1.1 rx** +switch(config)# **monitor session ACLMirror1 source Ethernet 5/1.2 rx ip access-group acl1** +switch(config)# **monitor session ACLMirror1 source Ethernet 6/1 rx** +switch(config)# **monitor session ACLMirror1 destination Ethernet 14/1**` +``` + + +#### Displaying the ACL Mirroring Information + + +Use the [**show monitor session**](/um-eos/eos-data-transfer#xx1136306) command to display the session information: + + +``` +`switch(config)# **show monitor session** +Session ACLMirror1 +------------------------ + +Programmed in HW: Yes + +Source Ports: + + Rx Only: Et5/1.2(IP ACL: acl1), Et5/1.1 + Et6/1 + +Destination Ports: + + Et14/1 : active` +``` + + +### **Sub-interface ACLs Limitations** + + +The sub-interface ACLs feature contains the following limitations: + + +- Egress IPv4 ACLs on subinterfaces are not supported when sharing mode is disabled for Egress IPv4 RACLs. + +- Egress IPv6 ACL deny logging is not supported on subinterfaces. + +- Blocking traffic while modifying ACLs is not supported on Egress IPv4 ACLs on subinterfaces. + + +### **Sub-interface ACLs Show Commands** + + +The **show ip access-lists** and **show ipv6 access-lists** commands display the summary of a configured ACL including the subinterface on which the ACL is configured and active. + + +**show ip|ipv6 access-lists** **acl-name** summary + + +**Examples** + + +``` +`switch(config)# **show ip access-lists acl1 summary** +IPV4 ACL acl1 + Total rules configured: 1 + Configured on Ingress: Et5.1 + Active on Ingress: Et5.1` +``` + + +``` +`switch(config)# **show ipv6 access-lists acl1 summary** +IPV6 ACL acl1 + Total rules configured: 1 + Configured on Egress: Et5.1 + Active on Egress: Et5.1` +``` + + +## RACL Sharing on SVIs + + +### IPv4 Ingress Sharing + + +IPv4 ingress sharing optimizes the utilization of hardware resources by sharing them between different VLAN interfaces when they have the same ACL attached. + + +Larger deployments benefit from this function, where IPv4 ingress sharing is applied on multiple SVIs with member interfaces on the same forwarding ASIC. For example, a trunk port carrying multiple VLANs and an ingress sharing is applied on all VLANs; it occupies lesser hardware resources irrespective of the number of VLANs. By default, IPv4 ingress sharing is disabled on the switches. + + +To enable IPv4 Ingress Sharing, use the **no hardware access-list resource sharing vlan in** command. +Note: Enabling or disabling the IPv4 ingress sharing requires the restart of software agents on the switches which is a disruptive process and will impact the traffic forwarding. +The **no** form of the command disables the IPv4 ingress sharing on the switch. To display the IPv4 ingress sharing information use **show platform trident** command on the switch. + + +### IPv4 Egress Sharing + + +IPv4 Egress Sharing optimizes the utilization of hardware resources by sharing TCAM entries for a group of SVIs on which IPv4 ACLs are shared. The TCAM entries are shared for all the SVIs per chip, saving a lot of hardware resources and enabling ACLs to scale to larger configurations. + + +Larger deployments benefit from IPv4 Egress Sharing, which is applied on multiple SVIs with member interfaces on the same forwarding ASIC. For example, a trunk port carrying multiple VLANs, and when applying Egress Sharing on all VLANs, it occupies lesser hardware resources irrespective of the number of VLANs. + + +By default, the system enables IPv4 Egress Sharing on the switches. However, enabling both IPv4 Egress Sharing and uRPF cannot at the same time is not possible. Disabling IPv4 RACL sharing will allow uRPF configuration and ensure the simultaneous configuration of the RACL in non-shared mode. + + +To enable unicast Reverse Path Forwarding (uRPF) on the switch, the IPv4 Egress Sharing must be disabled using the **no hardware access-list resource sharing vlan ipv4 out** command. + + +If IPv4 Egress Sharing was previously disabled from the default configuration, use the hardware access-list resource sharing vlan ipv4 out command to enable it. +Note: Enabling or disabling IPv4 Egress Sharing requires restarting software agents on the switches, which is a disruptive process and will impact the traffic forwarding. + +Use the following **show** commands to verify the IPv4 Egress Sharing information on the switch. + + +- show ip access-lists + +- [show vlan](/um-eos/eos-virtual-lans-vlans#xx1153070) + +- show platform arad acl tcam + +- [show ip route](/um-eos/eos-ipv4#xx1145358) + +- [show platform arad ip route](/um-eos/eos-ipv4#xx1173125) + + +### Configuring IPv4 Egress Sharing + + +The **hardware access-list resource sharing vlan ipv4 out** command enables IPv4 Egress Sharing on the switch. +Note: IPv4 Egress Sharing is enabled by default. + + +The **no** form of the command disables the switch's IPv4 Egress Sharing, allowing you to configure the uRPF. + + +### Displaying IPv4 Egress Sharing Information + + +**Examples** + + +- The **show ip access-lists** command displays the list of all the configured IPv4 ACLs. + +``` +`switch# **show ip access-lists summary** +IPV4 ACL default-control-plane-acl [readonly] + Total rules configured: 17 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IPV4 ACL ipAclLimitTest + Total rules configured: 0 + Configured on Egress: Vl2148,2700 + Active on Egress: Vl2148,2700` +``` + +- The **show vlan** command displays the list of all the member interfaces under each SVI. + +``` +`switch# **show vlan** +VLAN Name Status Ports +----- -------------- --------- ----------------- +1 default active +2148 VLAN2148 active Cpu, Et1, Et26 +2700 VLAN2700 active Cpu, Et18` +``` + +- The **show platform arad acl tcam** command displays the number of TCAM entries (hardware resources) occupied by the ACL on each forwarding ASIC and the percentage of TCAM utilization per forwarding ASIC. + +``` +`switch# **show platform arad acl tcam detail** +ip access-list ipAclLimitTest (Shared RACL, 0 rules, 1 entries, direction out, +state success, Acl Label 2) +Fap: Arad0, Shared: true, Interfaces: Vl2148, Vl2700 +Bank Offset Entries +0 0 1 +Fap: Arad1, Shared: true, Interfaces: Vl2148 +Bank Offset Entries +0 0 1 + +switch# **show platform arad acl tcam summary** +The total number of TCAM lines per bank is 1024. +======================================================== +Arad0: +======================================================== + Bank Used Used % Used By + 0 1 0 IP Egress PACLs/RACLs +Total Number of TCAM lines used is: 1 +======================================================== +Arad1: +======================================================== + Bank Used Used % Used By + 0 1 0 IP Egress PACLs/RACLs +Total Number of TCAM lines used is: 1` +``` + +- The **show ip route** command displays the unicast ip routes installed in the system. + +``` +`switch# **show ip route** +VRF name: default +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I - ISIS, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route + +Gateway of last resort is not set + C 10.1.0.0/16 is directly connected, Vlan2659 + C 10.2.0.0/16 is directly connected, Vlan2148 + C 10.3.0.0/16 is directly connected, Vlan2700 + S 172.17.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.18.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.19.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.20.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.22.0.0/16 [1/0] via 172.24.0.1, Management1 + C 172.24.0.0/18 is directly connected, Management1` +``` + +- The **show platform arad ip route** command displays the platform unicast forwarding routes. + +``` +`switch# **show platform arad ip route** +Tunnel Type: M(mpls), G(gre) + ------------------------------------------------------------------------------- +| Routing Table | | +|------------------------------------------------------------------------------ +|VRF| Destination | | | | Acl | | +ECMP| FEC | Tunnel +| ID| Subnet | Cmd | Destination | VID | Label | MAC / CPU Code +|Index|Index|T Value + +-------------------------------------------------------------------------------- +|0 |0.0.0.0/8 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1031 | - +|0 |10.1.0.0/16 |TRAP | CoppSystemL3DstMiss|2659 | - | ArpTrap | - |1030 | - +|0 |10.2.0.0/16 |TRAP | CoppSystemL3DstMiss|2148 | - | ArpTrap | - |1026 | - +|0 |10.3.0.0/16 |TRAP | CoppSystemL3DstMiss|2700 | - | ArpTrap | - |1034 | - +|0 |127.0.0.0/8 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1031 | - +|0 |172.17.0.0/16 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1025 | - +|0 |172.18.0.0/16 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1025 | - +|0 |172.19.0.0/16 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1025 | - +|0 |172.20.0.0/16 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1025 | - +|0 |172.22.0.0/16 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1025 | - +|0 |172.24.0.0/18 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1032 | - +|0 |0.0.0.0/0 |TRAP | CoppSystemL3LpmOver|0 | - | SlowReceive | - +|1024 | - +|0 |10.1.0.0/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.1.0.1/32* |TRAP | CoppSystemIpUcast |0 | - | Receive | - |32766| - +|0 |10.1.255.1/32* |ROUTE| Po1 |2659 |4094 | 00:1f:5d:6b:ce:45 +| - |1035 | - +|0 |10.1.255.255/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.2.0.0/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.2.0.1/32* |TRAP | CoppSystemIpUcast |0 | - | Receive | - |32766| - +|0 |10.2.255.1/32* |ROUTE| Et1 |2148 |2 | 00:1f:5d:6d:54:dc | +- |1036 | - +|0 |10.2.255.255/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.3.0.0/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.3.0.1/32* |TRAP | CoppSystemIpUcast |0 | - | Receive | - |32766| - +|0 |10.3.255.1/32* |ROUTE| Et18 |2700 |2 | 00:1f:5d:6b:00:01 | +- |1038 | -` +``` + + +## Route Maps + + +A route map is an ordered set of rules that controls the redistribution of IP routes into a protocol domain based on criteria such as route metrics, access control lists, next-hop addresses, and route tags. Route maps can also alter route parameters as they are redistributed. + + +### Route Map Description + + +Route maps are composed of route map statements, each consisting of a list of match and set commands. + + +#### Route Map Statements + + +Route map statements are categorized by the resolution of routes that the statement filters. + + +- Permit statements facilitate the redistribution of matched routes. + +- Deny statements prevent the redistribution of matched routes. + + +Route map statement elements include name, sequence number, filter type, match commands, set commands, and continue commands. + + +- The **name** identifies the route map to which the statement belongs. + +- The **sequence number** designates the statement’s placement within the route map. + +- A **filter type** specifies the route resolution. Valid types are **permit** and **deny**. + +- The **match commands** specify criteria that select the routes the statement evaluates for redistribution. + +- The **set commands** modify route parameters for redistributed routes. + +- The **continue commands** prolong the route map evaluation of routes that match a statement. + + +Statements filter routes for redistribution. Routes that statements pass are redistributed (permit statements) or rejected (deny statements). The next statement in the route map then filters routes that statements fail. + + +- When a statement does not contain a **match** command, the statement passes all routes. + +- When a statement contains a single **match** command that lists a single object, the statement passes routes whose parameters match the object. + +- When a statement contains a single **match** command that lists multiple objects, the statement passes routes whose parameters match at least one object. + +- When a statement contains multiple **match** commands, the statement passes routes whose parameters match all match commands. + + +The **Set** commands modify parameters for redistributed routes and are valid in permit statements. + + +#### Example + + +The following route map statement is named **MAP_1** with sequence number **10**. The statement matches all routes from BGP Autonomous System 10 and redistributes them with a local preference set to **100**. Routes that do not match the statement are evaluated against the next statement in the route map. + +``` +`switch# **route-map MAP_1 permit 10** + match as 10 + set local-preference 100` +``` + + +#### Route Maps with Multiple Statements + + +A route map consists of statements with the same name and different sequence numbers. Statements filter routes by ascending order based on their sequence numbers. When a statement passes a route, the redistribution action is performed as the filter type specifies, ignoring all subsequent statements. When the statement fails the route, the statement with the smallest sequence number larger than the current one filters the route. + + +All route maps contain an implied final statement containing a single deny statement without a match command. This statement denies the redistribution of any routes that no other statement passes. + + +#### Example + + +The following route map is named **MAP_1** and has two permit statements. Routes that do not match either statement are denied redistribution into the target protocol domain. + +``` +`switch# **route-map MAP_1 permit 10** + match as 10 + set local-preference 100 +! +switch# **route-map MAP_1 permit 20** + match metric-type type-1 + match as 100` +``` + + +Route Map Configuration describes route map configuration procedures. + + +#### Route Maps with Multiple Statements and Continue Commands + + +Route map statements that contain a continue (route map) command support additional route map evaluation of routes whose parameters meet the statement’s match commands. Routes that match a statement containing a **continue** command are evaluated against the statement specified by the **continue** command. + + +When a route matches multiple route-map statements, the filter action (deny or permit) is determined by the last statement that the route matches. The **set** commands in all statements matching the route are applied to the route after the route map evaluation is complete. Multiple set commands are applied in the same order by which the route was evaluated against the statements containing them. + + +#### Example + + +The following route map is named **MAP_2** with a **permit** and a **deny** statement. The permit statement contains a continue command. Routes that match statement 10 are evaluated against statement 20. + +``` +`switch# **route-map MAP_2 permit 10** + match as 10 +   continue 20 + set local-preference 100 +! +switch# **route-map MAP_2 deny 20** + match metric-type type-1 + match as 100` +``` + + +The route is redistributed if it passes statement 10 and is rejected by statement 20. The route is denied redistribution in all other instances. The **continue** command guarantees the evaluation of all routes against both statements. + + +### Route Map Configuration + + +Route maps are created and modified in route-map configuration mode. These sections describe the configuration mode and its commands. + + +- Route Map Creation and Editing + +- Modifying Route Map Components + + +#### Route Map Creation and Editing + + +##### Creating a Route Map Statement + + +To create a route map, use the **route-map** command, including the map name and filter type (**deny** or **permit**). If the command does not specify a number, the system assigns a default sequence number to the statement. + + +##### Example + + +The following command places the switch in the ***route map*** configuration mode and creates a route map statement named **map1** with a sequence number of **50**. + +``` +`switch(config)# **route-map map1 permit 50** +switch(config-route-map-map1)#` +``` + + +##### Editing a Route Map Statement + + +To edit an existing route map statement, use the **route-map**, including the map’s name and the statement’s number. The switch enters the route map configuration mode for the statement. Subsequent **match (route-map)** and **set (route-map)** commands add the corresponding commands to the statement. + + +The **show** command displays the contents of the existing route map. + + +##### Example + + +The following command places the switch in the route map configuration mode to edit an existing route map statement. The **show** command displays the contents of all statements in the route map. + +``` +`switch(config)# **route-map MAP2** +switch(config-route-map-MAP2)#show + Match clauses: + match as 10 + match tag 333 + Set clauses: + set local-preference 100 +switch(config-route-map-MAP2)#` +``` + + +##### Saving Route Map Modifications + + +Route map configuration mode is a group-change mode. You can save changes by exiting the mode, either with an explicit **exit** command or by switching directly to another configuration mode. This includes switching to the configuration mode for a different route map. + + +##### Example + + +The first command creates the **map1** statement with a sequence number of 10. The second command is not yet saved to the route map, as displayed by the **show** command. + + +``` +`switch(config)# **route-map map1 permit** +switch(config-route-map-map1)# **match as 100** +switch(config-route-map-map1)# **show** + +switch(config-route-map-map1)#` +``` + + +The **exit** command saves the **match** command. + + +``` +`switch(config-route-map-map1)# **exit** +switch(config)# **show route-map map1** +route-map map1 permit 10 + Match clauses: + match as 100 + Set clauses: +switch(config)#` +``` + + +##### Discarding Route Map Modifications + + +The **abort** command discards all pending changes and exits route-map configuration mode. + + +##### Example + + +The **abort** command discards the pending **match** command and restores the original route map. + +``` +`switch(config)# **route-map map1 permit** +switch(config-route-map-map1)# **match as 100** +switch(config-route-map-map1)# **abort** +switch(config)# **show route-map map1** +switch(config)#` +``` + + +#### Modifying Route Map Components + + +The following commands add rules to the configuration mode route map: + + +- **match (route-map)** adds a match rule to a route map. + +- **set (route-map)** adds a set rule to a route map. + + +##### Inserting a Statement + + +To insert a new statement into an existing route map, create a new statement with a sequence number that differs from any existing statement in the map. + + +##### Example + + +The following commands add statement **50** to the **Map1** route map and a match statement of **150**. They save the configuration using **exit** then display the new route map using **show route-map Map1**. + +``` +`switch(config)# **route-map Map1 permit 50** +switch(config-route-map-Map1)# **match as 150** +switch(config-route-map-Map1)# **exit** +switch(config)# **show route-map Map1** +route-map Map1 deny 10 + Match clauses: + match as 10 + match tag 333 + Set clauses: + set local-preference 100 +route-map Map1 permit 50 + Match clauses: + match as 150 + Set clauses: +switch(config)#` +``` + + +##### Deleting Route Map Components + + +To remove a component from a route map, perform one of the following: + + +- To remove a command from a statement, enter **no**, followed by the command you want to remove. + +- To remove a statement, enter **no**, followed by the route map with the filter type and the sequence number of the statement you want to remove. + +- To remove a route map, enter **no** followed by the route map without a sequence number. + + +### Using Route Maps + + +Protocol redistribution commands include a route map parameter determining the routes to be redistributed into the specified protocol domain. + + +#### Example + + +The following commands use the **Map1** route map to select OSPFv2 routes for redistribution into BGP AS1. + +``` +`switch(config)# **router bgp 1** +switch(config-router-bgp)# **redistribute ospf route-map Map1** +switch(config-router-bgp)# **exit** +switch(config)#` +``` + + +## Prefix Lists + + +A prefix list is an ordered set of rules that defines route redistribution access for a specified IP address space. It consists of a filter action (**`deny`** or **`permit`**), an address space identifier (IPv4 **`subnet + address`** or IPv6 **`prefix`**), and a **`sequence`** number. + + +Prefix lists are referenced by route map match commands when filtering routes for redistribution. + + +- Prefix List Configuration describes the prefix list configuration process. + +- Using Prefix Lists describes the use of prefix lists. + +- Static Routes Redistribution into IGPs describes the redistribution of routes whose configured next-hops satisfy the route-map policy. + + +### Prefix List Configuration + + +A prefix list is an ordered set of rules that defines route redistribution access for a specified IP address space. A prefix list rule consists of a filter action (deny or permit), a network address (IPv4 subnet or IPv6 prefix), and a sequence number. A rule may also include an alternate mask size. + + +The switch supports IPv4 and IPv6 prefix lists. The switch is placed in a Prefix-list configuration mode to create and edit IPv4 or IPv6 prefix lists. + + +#### IPv4 Prefix Lists + + +IPv4 prefix lists are created or modified by adding an IPv4 prefix list rule in the Prefix-list configuration mode. Each rule includes the name of a prefix list and the sequence number, network address, and filter action. A list consists of all rules that have the same prefix-list name. + + +The **ip prefix-list** command creates a prefix list or adds a rule to an existing list. Route map match commands use prefix lists to filter routes for redistribution into OSPF, RIP, or BGP domains. + + +##### Creating an IPv4 Prefix List + + +To create an IPv4 prefix list, enter the **ip prefix-list** command, followed by the list's name. The switch enters the ***IPv4 prefix-list*** configuration mode for the list. If the name of an existing ACL follows the command, subsequent commands edit that list. + + +##### Examples + + +- The following command places the switch in ***IPv4 prefix list*** configuration mode to create an IPv4 prefix list named **route-one**. + +``` +`switch(config)# **ip prefix-list route-one** +switch(config-ip-pfx)#` +``` + +- This series of commands creates four different rules for the prefix-list named **route-one**. + +``` +`switch(config)# **ip prefix-list route-one** +switch(config-ip-pfx)# **seq 10 deny 10.1.1.0/24** +switch(config-ip-pfx)# **seq 20 deny 10.1.0.0/16** +switch(config-ip-pfx)# **seq 30 permit 12.15.4.9/32** +switch(config-ip-pfx)# **seq 40 deny 1.1.1.0/24**` +``` + + +To view the list, save the rules by exiting the ***Prefix-list*** command mode using the **exit** command, then re-enter the configuration mode and use the **show active** command. + + +``` +`switch(config-ip-pfx)# **exit** +switch(config)# **ip prefix-list route-one** +switch(config-ip-pfx)# **show active** +ip prefix-list route-one + seq 10 deny 10.1.1.0/24 + seq 20 deny 10.1.0.0/16 + seq 30 permit 12.15.4.9/32 + seq 40 deny 1.1.1.0/24 +switch(config-ip-pfx)# **ip prefix-list route-one**` +``` + + +IPv4 prefix lists are referenced in the **match (route-map)** command. + + +#### IPv6 Prefix Lists + + +##### Creating an IPv6 Prefix List + + +The switch provides an ***IPv6 prefix-list*** configuration mode for creating and modifying IPv6 prefix lists. A list can be edited only in the mode where it was created. + + +To create an IP ACL, enter the **ipv6 prefix-list** command and the list's name. The switch enters the list's ***IPv6 prefix-list*** configuration mode. If the name of an existing ACL follows the command, subsequent commands edit that list. + + +##### Example + + +This command places the switch in the ***IPv6 prefix list*** configuration mode to create an IPv6 prefix list named **map1**. + +``` +`switch(config)# **ipv6 prefix-list map1** +switch(config-ipv6-pfx)#` +``` + + +##### Adding a Rule + + +To append a rule to the end of a list, enter the rule without a sequence number while in ***Prefix-List*** configuration mode for the list. The system derives the new rule’s sequence number by adding **10** to the last rule’s sequence number. + + +##### Example + + +These commands enter the first two rules into a new prefix list. + +``` +`switch(config-ipv6-pfx)# **permit 3:4e96:8ca1:33cf::/64** +switch(config-ipv6-pfx)# **permit 3:11b1:8fe4:1aac::/64**` +``` + + +To view the list, save the rules by exiting the ***prefix-list*** command mode using the **exit** command, then re-enter the configuration mode and use the **show active** command. + + +``` +`switch(config-ipv6-pfx)# **exit** +switch(config)# **ipv6 prefix-list map1** +switch(config-ipv6-pfx)# **show active** +ipv6 prefix-list map1 + seq 10 permit 3:4e96:8ca1:33cf::/64 + seq 20 permit 3:11b1:8fe4:1aac::/64 +switch(config-ipv6-pfx)#` +``` + + +The following command appends a rule to the end of the prefix list. The new rule’s sequence number is **30**. + + +``` +`switch(config-ipv6-pfx)# **permit 3:1bca:1141:ab34::/64** +switch(config-ipv6-pfx)# **exit** +switch(config)# **ipv6 prefix-list map1** +switch(config-ipv6-pfx)# **show active** +ipv6 prefix-list map1 + seq 10 permit 3:4e96:8ca1:33cf::/64 + seq 20 permit 3:11b1:8fe4:1aac::/64 +   seq 30 permit 3:1bca:1141:ab34::/64 +switch(config-ipv6-pfx)#` +``` + + +##### Inserting a Rule + + +To insert a rule into a prefix list, use the **seq (IPv6 Prefix Lists)** command to enter a rule with a sequence number between the numbers of two existing rules. + + +##### Example + + +This command inserts a rule between the first two by assigning sequence number **15**. + +``` +`switch(config-ipv6-pfx)# **seq 15 deny 3:4400::/64** +switch(config-ipv6-pfx)# **exit** +switch(config)# **show ipv6 prefix-list map1** +ipv6 prefix-list map1 +seq 10 permit 3:4e96:8ca1:33cf::/64 +seq 15 deny 3:4400::/64 +seq 20 permit 3:11b1:8fe4:1aac::/64 +seq 30 permit 3:1bca:3ff2:634a::/64 +switch(config)#` +``` + + +##### Deleting a Rule + + +To remove a rule from the configuration mode prefix list, enter **no seq** (see **seq (IPv6 Prefix Lists)**), followed by the rule's sequence number. + + +##### Example + + +These commands remove rule **20** from the prefix list and display the resultant prefix list. + +``` +`switch(config-ipv6-pfx)# **no seq 20** +switch(config-ipv6-pfx)# **exit** +switch(config)# **show ipv6 prefix-list map1** +ipv6 prefix-list map1 +seq 10 permit 3:4e96:8ca1:33cf::/64 +seq 15 deny 3:4400::/64 +seq 30 permit 3:1bca:3ff2:634a::/64 +switch(config)#` +``` + + +### Using Prefix Lists + + +Route map match commands include an option that matches a specified prefix list. + + +**Example** + + +The **MAP_1** route map uses a match command that references the **PL_1** prefix list. + +``` +`switch(config)# **route-map MAP_1 permit** +switch(config-route-map-MAP_1)# **match ip address prefix-list PL_1** +switch(config-route-map-MAP_1)# **set community 500** +switch(config-route-map-MAP_1)# **exit**` +``` + + +### Static Routes Redistribution into IGPs + + +Use **match ip next-hop** to match against next-hops in a route-map. This can be used to redistribute matching static routes into an IGP (IS-IS, OSPF, etc.). + + +The following example applies the **match ip next-hop** clause for static routes redistributed into IGPs for multi-agent mode. The following configures a static route. + + +``` +`switch(config)# **ip route 10.20.30.0/24 1.2.3.4**` +``` + + +The following commands configure a prefix-list: + + +``` +`switch (config)# **ip prefix-list prefixListName** +switch(config-ip-pfx)# **permit 1.2.3.4/32**` +``` + + +**1.2.3.4** is a **configured** next-hop for static route **10.20.30.0/24**. + + +The following commands configure a route map: + + +``` +`switch(config)# **route-map routeMapName** +switch(config-route-map-routeMapName)# **match ip next-hop prefix-list prefixListName**` +``` + + +For example, based on the route-map mentioned in the preceding command, to redistribute matching static routes into an IGP, use the following command for IS-IS: + + +``` +`switch(config-router-isis)# **redistribute static route-map routeMapName**` +``` + + +View redistributed routes using the following **show** commands. + + +The **show ip route** command displays the IP route. + + +``` +`switch# **show ip route** + +VRF: default +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B - BGP, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route, L - VRF Leaked + +Gateway of last resort is not set + + ... + I L2 10.20.30.0/24 [115/10] via 1.2.3.4, Ethernet1` +``` + + +Use the **show isis database detail** command to view routes redistributed into IS-IS. + + +``` +`switch# **show isis database detail** + +IS-IS Instance: B VRF: default + IS-IS Level 1 Link State Database + LSPID Seq Num Cksum Life IS Flags + ... + IS-IS Level 2 Link State Database + LSPID Seq Num Cksum Life IS Flags + 0000.0000.0001.00-00 6 10364 840 L2 <> + ... + Reachability : 10.20.30.0/24 Metric: 0 Type: 1 Up + ...` +``` + + +While the preceding example applies to IS-IS, a similar approach may be taken for other IGPs, such as OSPF. + + +## Port ACLs with User-Defined Fields + + +Describes the support for specifying User-Defined Fields (UDF) in Port ACLs, including IPv4, IPv6, and MAC ACLs. The purpose of the User-Defined Fields feature is to permit or deny packets based on custom offset pattern matching. + + +User-Defined Fields, or UDFs, are part of an access-list filter and comprise an offset, length, pattern match and mask. This describes a single portion of any incoming packet that matches the provided value. + + +UDFs may also be defined via aliases. Aliases can save a UDF configuration for reuse in multiple access lists or access list rules. An alias may substitute for a fully defined UDF, including the offset, pattern, and mask. The pattern or mask may be overridden when the alias is used in an access list rule. + + +The behavior, CLI syntax, and configuration of UDFs are identical to Traffic Steering UDF and Mirroring ACL UDF. + + +This section describes port ACLs with user-defined fields, including configuration instructions. Topics covered by this section include: + + +- Configuring Port ACLs with User-Defined Fields + +- Port ACLs with User-Defined Fields Limitations + + +### Configuring Port ACLs with User-Defined Fields + + +User-Defined Fields (UDFs) are specified as part of an access list. However, the type of access list dictates the base position of the UDF and the options available. In addition, you must configure a TCAM profile to include UDFs as part of the Port ACL feature’s key. + + +#### TCAM Profile + + +User-Defined Fields are defined as additional fields in the Port ACL feature’s key. By default, UDFs are not included in the keys for the Port ACL features. Adding a UDF to the key requires removing different key fields to fit within the TCAM width restrictions. + + +Note: Each UDF is either 16 bits wide or 32 bits wide. + + +The following are example configurations of the TCAM profile. + + +##### IPv4 Port ACL + + +The following configurations create a new profile based on the default profile. This new profile replaces the Layer 4 port key fields with one 16-bit UDF and one 32-bit UDF. + + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile ipv4Udf copy default** +switch(config-hw-tcam-profile-ipv4Udf)# **feature acl port ip** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **no key field l4-ops** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **no key field l4-src-port** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **no key field l4-dst-port** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **key field udf-16b-1** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **key field udf-32b-1** +switch(config-hw-tcam-profile-ipv4Udf-feature-acl-port-ip)# **exit** +switch(config-hw-tcam-profile-ipv4Udf)# **exit** +switch(config-hw-tcam)# **system profile ipv4Udf**` +``` + + +16-bit IPv4 Header Match + +**Example** + + +The following configurations match IPv4 packets based on the Identification (ID) field. + + +Packets ingressing into **interface ethernet 7** with an ID equal to **1000** (`0x03E80000`) are forwarded, while packets with an ID different from **1000** are dropped. + + +``` +`(config)# **ip access-list udfAcl** +(config-acl-udfAcl)# **permit ip any any payload header start offset 1 pattern 0x03E80000 mask 0x0000FFFF** +(config-acl-udfAcl)# **deny ip any any** +(config-acl-udfAcl)# **exit** +(config)# **interface ethernet 7** +(config-if-Et7)#` +``` + + +##### IPv6 Port ACL + + +The following configurations create a new profile based on the default profile. This new profile replaces the destination IPv6 address key field with two 32-bit UDFs. + + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile ipv6Udf copy default** +switch(config-hw-tcam-profile-ipv6Udf)# **feature acl port ipv6** +switch(config-hw-tcam-profile-ipv6Udf-feature-acl-port-ipv6)# **no key field dst-ipv6** +switch(config-hw-tcam-profile-ipv6Udf-feature-acl-port-ipv6)# **key field udf-32b-1** +switch(config-hw-tcam-profile-ipv6Udf-feature-acl-port-ipv6)# **key field udf-32b-2** +switch(config-hw-tcam-profile-ipv6Udf-feature-acl-port-ipv6)# **exit** +switch(config-hw-tcam-profile-ipv6Udf)# **exit** +switch(config-hw-tcam)# **system profile ipv6Udf**` +``` + + +32-bit IPv6 Payload Match + +**Example** + + +The following configurations match IPv6 UDP packets based on the first 32 bits of the packet payload. + + +UDP packets ingressing into **interface ethernet 7** that starts with **0x1234567X** (where **X** can be any valid hexadecimal) in the payload are forwarded while dropping any other packets. The offset is set to **2** (2 x 4-byte words) to skip the UDP header. + + +``` +`(config)# **ipv6 access-list udfAcl** +(config-ipv6-acl-udfAcl)# **permit udp any any payload offset 2 pattern 0x12345670 mask 0x0000000f** +(config-ipv6-acl-udfAcl)# **deny ipv6 any any** +(config-ipv6-acl-udfAcl)# **exit** +(config)# **interface ethernet 7** +(config-if-Et7)# **ipv6 access-group udfAcl in**` +``` + + +### Port ACLs with User-Defined Fields Limitations + + +User-defined fields consume a limited set of copy resources. For each unique offset, if a pattern is specified masked to be > 16 bits wide, then a 32-bit resource is used. If no 32-bit resource is available, then two 16-bit resources are used if available. Copy resources depend on the number of UDF key fields added to the feature key. Each UDF key field maps to one copy resource. Using the above TCAM profile configurations: + + +- IPv4: 1 × 16-bit pattern + 1 × 32-bit pattern. + +- IPv6: 2 × 32-bit pattern. + +- MAC: 1 × 16-bit pattern + 1 × 32-bit pattern. + + +Other limitations include: + + +- The maximum offset value is **31**, which is 31 4-byte words, or 124 bytes. + +- UDFs only work on ingress Port ACLs. + + +## ACL, Route Map, and Prefix List Commands + + +This section describes CLI commands that this chapter references. + + +### ACL Creation and Access Commands + + +- hardware access-list resource sharing vlan in + +- hardware access-list resource sharing vlan ipv4 out + +- ip access-list + +- ip access-list standard + +- ipv6 access-list + +- ipv6 access-list standard + +- mac access-list + +- system profile + + +### ACL Implementation Commands + + +- ip access-group + +- ipv6 access-group + +- mac access-group + + +### Service ACL Implementation Commands + + +- ip access-group (Service ACLs) + +- ipv6 access-group (Service ACLs) + + +### ACL Edit Commands + + +- counters per-entry (ACL configuration modes) + +- hardware access-list update default-result permit + +- no sequence number (ACLs) + +- resequence (ACLs) + +- show (ACL configuration modes) + + +### ACL Rule Commands + + +- deny (IPv4 ACL) + +- deny (IPv6 ACL) + +- deny (MAC ACL) + +- deny (Standard IPv4 ACL) + +- deny (Standard IPv6 ACL) + +- permit (IPv4 ACL) + +- permit (IPv6 ACL) + +- permit (MAC ACL) + +- permit (Standard IPv4 ACL) + +- permit (Standard IPv6 ACL) + +- remark + + +### ACL List Counter Commands + + +- clear ip access-lists counters + +- clear ipv6 access-lists counters + +- hardware counter feature acl out + + +### ACL Display Commands + + +- show access-lists + +- show ip access-lists + +- show ipv6 access-lists + +- show mac access-lists + + +### Prefix List Creation and Access Commands + + +- ip prefix-list + +- ipv6 prefix-list + + +### Prefix List Edit Commands + + +- deny (IPv6 Prefix List) + +- permit (IPv6 Prefix List) + +- seq (IPv6 Prefix Lists) + + +### Prefix List Display Commands + + +- show hardware tcam profile + +- show ip prefix-list + +- show ipv6 prefix-list + +- show platform arad acl tcam + +- show platform arad acl tcam summary + +- show platform arad mapping + +- show platform fap acl + +- show platform fap acl tcam + +- show platform fap acl tcam hw + +- show platform fap acl tcam summary + +- show platform trident tcam + + +### Route Map Creation and Access Command + + +- route-map + + +### Route Map Edit Commands + + +- continue (route map) + +- description (route map) + +- match (route-map) + +- set (route-map) + +- set as-path prepend + +- set as-path match + +- set community (route-map) + +- set extcommunity (route-map) + + +### Route Map Display Commands + + +- show route-map + + +### clear ip access-lists counters + + +The **clear ip access-lists counters** command sets ACL counters to zero for the specified IPv4 Access Control List (ACL). The **session** parameter limits ACL counter clearing to the current CLI session. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +clear ip access-lists counters acl_name scope + + +**Parameters** + + +- **acl_name** - Specify the name of ACL list. Options include the following: + + +- **no parameter** - Specifies all ACLs. + +- **access_list** - Specifies the name of ACL. + +- **scope** - Specify the session affected by command. Options include the following: + + +- **no parameter** - Clears all counters on all CLI sessions. + +- **session** - Clears counters only on the current CLI session. + + +**Example** + + +This command resets all IPv4 ACL counters. + +``` +`switch(config)# **clear ip access-lists counters** +switch(config)#` +``` + + +### clear ipv6 access-lists counters + + +The **clear ipv6 access-lists counters** command sets ACL counters to zero for the specified IPv6 Access Control List (ACL). The **session** parameter limits ACL counter clearing to the current CLI session. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +clear ipv6 access-lists counters [acl_name][scope] + + +**Parameters** + + +- **acl_name** - Specify the name of ACL. Options include the following: + + +- **no parameter** - Clears all IPv6 ACLs. + +- **access_list** - Clears the access list of the IPv6 ACL. + +- **scope** - Specify the session affected by command. Options include the following: + + +- **no parameter** - The command affects counters on all CLI sessions. + +- **session** - Affects only current CLI session. + + +**Example** + + +This command resets all IPv6 ACL counters. + +``` +`switch(config)# **clear ipv6 access-lists counters** +switch(config)#` +``` + + +### continue (route map) + + +The **continue** command creates a route map statement entry that enables additional route map evaluation of routes with parameters meeting the statement matching criteria. + + +A statement typically contains a match (route-map) and a set (route-map) command. The evaluation of routes with settings the same as **match** command parameters normally ends and the statement's **set** commands apply to the route. Routes that match a statement containing a **continue** command evaluate against the statement specified by the **continue** command. + + +When a route matches multiple route map commands, the last statement that the route matches determines the filter action (**deny** or**permit**) . The **set** commands in all statements matching the route apply to the route after completing the route map evaluation. Multiple set commands apply in the same order by the route evaluation against the statement containing them. + + +The **no continue** and **default continue** commands remove the corresponding **continue** command from the configuration mode **route map** statement by deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +**continue next_seq** + + +**no continue next_seq** + + +**default continue next_seq** + + +**Parameters** + + +next_seq - Specifies next statement for evaluating matching routes. Options include the following: + + +- **no parameter** - The next statement in the route map, as determined by sequence number. + +- **seq_number** - Specifies the number of the next statement. Values range from **1** to **16777215**. + + +**Restrictions** + + +A **continue** command cannot specify a sequence number smaller than the sequence number of the route map statement. + + +**Related Command** + + +route-map command enters the Route-Map Configuration Mode. + + +**Example** + + +This command creates route map **map1**, statement **40** with a match command, a set command, and a continue command. Routes that match the statement subsequently evaluate against statement **100**. The **set local-preference** command applies to matching routes regardless of subsequent matching operations. + +``` +`switch(config)# **route-map map1 deny 40** +switch(config-route-map-map1)# **match as 15** +switch(config-route-map-map1)# **continue 100** +switch(config-route-map-map1)# **set local-preference 50** +switch(config-route-map-map1)#` +``` + + +### counters per-entry + + +The **counters per-entry** command places the ACL in counting mode. In counting mode, the feature generally displays the number of instances in which each rule in the list matches an inbound packet and the elapsed time since the last match. However, for certain select platforms, in addition to the packet counter, ACL counters can also be enabled for byte counts when applied to data plane ACLs. Review the complete list of platforms that support byte count for data plan ACLslisted below: + + +Note: Only dataplane ACLs support byte counting on the switch. + + +The following platforms support ACL byte counting: + + +- CCS-710/720/722/755/758 series + +- DCS-7010TX + +- DCS-7050SX3/CX3/TX3/CX4/DX4/PX4 + +- DCS-7060 Series + +- DCS-7300X3/7304X3/7308X3/7316/7320X/7324/7328/7358X4/7368/7388 + + +On the FM6000 platform, this command has no effect when used in an ACL for a PBR class map. + + +The **no counters per-entry** and **default counters per-entry** commands place the ACL in non-counting mode. + + +**Command Mode** + + +ACL Configuration + + +IPv6-ACL Configuration + + +Std-ACL Configuration + + +Std-IPv6-ACL Configuration + + +MAC-ACL Configuration + + +**Command Syntax** + + +counters per-entry + + +no counters per-entry + + +default counters per-entry + + +**Examples** + + +- This command places the **test1** ACL in counting mode. + +``` +`switch(config)# **ip access-list test1** +switch(config-acl-test1)# **counters per-entry** +switch(config-acl-test1)#` +``` + +- This command displays the ACL, with counter information, for an ACL in counting mode. + +``` +`switch# **show ip access-lists** +IP Access List default-control-plane-acl [readonly] + counters per-entry + 10 permit icmp any any + 20 permit ip any any tracked [match 12041 packets, 0:00:00 ago] + 30 permit ospf any any + 40 permit tcp any any eq ssh telnet www snmp bgp https [match 11 packets, 1:41:07 ago] + 50 permit udp any any eq bootps bootpc snmp rip [match 78 packets, 0:00:27 ago] + 60 permit tcp any any eq mlag ttl eq 255 + 70 permit udp any any eq mlag ttl eq 255 + 80 permit vrrp any any + 90 permit ahp any any + 100 permit pim any any + 110 permit igmp any any [match 14 packets, 0:23:27 ago] + 120 permit tcp any any range 5900 5910 + 130 permit tcp any any range 50000 50100 + 140 permit udp any any range 51000 51100 +Total rules configured: 14 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF)` +``` + +- On platforms that support byte counting, Counter information displays as shown below: + +``` +`switch# **show ip access-lists** +IP Access List default-control-plane-acl [readonly] + counters per-entry + 10 permit icmp any any [match 30 packets, 0:02:08 ago] + 20 permit ip any any tracked [match 97777 packets, 0:00:00 ago] + 30 permit udp any any eq bfd ttl eq 255 + 40 permit udp any any eq bfd-echo ttl eq 254 + 50 permit udp any any eq multihop-bfd micro-bfd sbfd + 60 permit udp any eq sbfd any eq sbfd-initiator + 70 permit ospf any any + 80 permit tcp any any eq ssh telnet www snmp bgp https msdp ldp netconf-ssh gnmi [match 72 packets, 0:00:00 ago] + 90 permit udp any any eq bootps bootpc snmp rip ntp ldp ptp-event ptp-general + 100 permit tcp any any eq mlag ttl eq 255 + 110 permit udp any any eq mlag ttl eq 255 + 120 permit vrrp any any + 130 permit ahp any any + 140 permit pim any any +Total rules configured: 14 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IP Access List ipCountersTest:*The **ipCountersTest ACL** is applied to the data plane. Hence, it displays the byte count information as shown below:* + counters per-entry + 10 permit tcp host 10.1.1.1 range 2000 4000 host 10.2.1.1 [match 486 bytes in 3 packets, 0:00:26 ago] + 20 permit tcp host 10.1.1.1 range 14000 16000 host 10.2.1.1 [match 486 bytes in 3 packets, 0:00:18 ago] + 30 permit udp host 10.1.1.1 range 62000 64000 host 10.2.1.1 [match 450 bytes in 3 packets, 0:00:00 ago] + 40 permit tcp host 10.1.1.1 range 50000 52000 host 10.2.1.1 [match 486 bytes in 3 packets, 0:00:02 ago] + 50 permit tcp host 10.1.1.1 range 38000 40000 host 10.2.1.1 [match 486 bytes in 3 packets, 0:00:10 ago] + 60 permit tcp host 10.1.1.1 range 26000 28000 host 10.2.1.1 [match 486 bytes in 3 packets, 0:00:18 ago] +Total rules configured: 6` +``` + + +**ipCountersTest** ACL applies to the data plane and displays the byte count information. + + +### deny (IPv4 ACL) + + +The **deny**command adds a deny rule to the configuration mode IPv4 Access Control List (ACL). Interfaces with the ACL drop packets filtered by a **deny** rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding **10** to the number of the ACL's last rule. + + +The **no deny** and **default deny** commands remove the specified rule from the configuration mode ACL. The no sequence number (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +ACL Configuration + + +**Command Syntax** + + +[seq_num] deny protocol source_addr source_port dest_addrR dest_port flags message fragments tracked dscp_filter ttl_filter log + + +no deny protocol source_addr source_port dest_addrR dest_port flags message fragments tracked dscp_filter ttl_filter log] + + +default deny protocol source_addr source_port dest_addrR dest_port flags message fragments tracked dscp_filter ttl_filter log] + + +Note: Commands use a subset of the listed fields. Available parameters depend on specified protocol. + + +**Parameters** + + +- **seq_num** - The sequence number assigned to the rule. Options include the following: + + +- **no parameter** The number derive from adding 10 to the number of the ACL last rule. + +- **14294967295** -The number assigned to the entry. + +- **protocol** Specify the protocol field filter. Values include the following: + + +- **ahp** - Authentication Header Protocol (51 + +- **icmp** - Internet Control Message Protocol (1) + +- **igmp** - Internet Group Management Protocol (2) + +- **ip** - Internet Protocol v4 (4) + +- **ospf** - Open Shortest Path First (89) + +- **pim** - Protocol Independent Multicast (103) + +- **tcp** - Transmission Control Protocol (6) + +- **udp** - User datagram protocol (17) + +- **vrrp** - Virtual Router Redundancy Protocol (112) + +- **protocol_num** - An integer corresponding to an IP protocol. Values range from **0** to **255**. + +- **source_addr** and **dest_addr** - Specify the source and destination address filters. Values include the following: + + +- **network_addr** - Specify the subnet address as a CIDR or address-mask. + +- **any** - Filter packets from all addresses. + +- **host** **ip_addr** - Specify an IP address in dotted decimal notation. + +Subnet addresses support discontiguous masks. + +- **source_port** and **dest_port** - Specify the source and destination port filters. Values include the following: + + +- **any** - Specify all ports. + +- **eq** **port-1** **port-2** ... **port-n** - Specify a list of ports. Maximum list size is 10 ports. + +- **neq** **port-1** **port-2** ... **port-n** - Specify the set of all ports not listed. Maximum list size is 10 ports. + +- **gt** **port** - Specify the set of ports with larger numbers than the listed port. + +- **lt** **port** - Specify the set of ports with smaller numbers than the listed port. + +- **range** **port_1** **port_2** - Specify a range of ports. + +- **fragments** Filters packets with FO bit set that indicates a non-initial fragment packet. + +- **flags** Flag bit filters (TCP packets). + +- **message** Message type filters (ICMP packets). + +- **tracked** Rule filters packets in existing ICMP, UDP, or TCP connections. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **dscp_filter** - Filters a packet by the DSCP value. Values include the following: + + +- **no parameter** -Specify that the rule does not use DSCP to filter packets. + +- **dscp** **dscp_value** - Specify to match packets match if the DSCP field in packet equals the **dscp_value**. + +- **TTL_FILTER** - Filters a packet by the TTL (time-to-live) value. Values include the following: + + +- **ttl eq** **ttl_value** - Match packets if **ttl** in packet is equal to **ttl_value**. + +- **ttl gt** **ttl_value** - Match packets if **ttl** in packet is greater than **ttl_value**. + +- **ttl lt** **ttl_value** - Match packets if **ttl** in packet is less than **ttl_value**. + +- **ttl neq** **ttl_value** - Match packets if **ttl** in packet is not equal to **ttl_value**. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **log** - Triggers an informational log message to the console about the matching packet. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Examples** + + +- This command appends a **deny** statement at the end of the ACL. The **deny** statement drops OSPF packets from **10.10.1.1/24** to any host. + +``` +`switch(config)# **ip access-list text1** +switch(config-acl-text1)# **deny ospf 10.1.1.0/24 any** +switch(config-acl-text1)#` +``` + +- This command inserts a **deny** statement with the sequence number 65. The **deny** statement drops all PIM packets. + +``` +`switch(config-acl-text1)# **65 deny pim any any** +switch(config-acl-text1)#` +``` + + +### deny (IPv6 ACL) + + +The **deny**command adds a deny rule to the an IPv6 Access Control List (ACL). Interfaces with the ACL drop packets filtered by a **deny** rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding **10** to the number of the ACL's last rule. + + +The **no deny** and **default deny** commands remove the specified rule from the configuration mode ACL. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +IPv6-ACL Configuration + + +**Command Syntax** + + +**seq_num deny protocol src_addr source_pt dest_addr dest_pt flag msg hop tracked dscp_filter log** + + +**no deny **protocol src_addr source_pt dest_addr dest_pt flag msg hop tracked dscp_filter log**** + + +**default deny****protocol src_addr source_pt dest_addr dest_pt flag msg hop tracked dscp_filter log** + + +Note: Commands use a subset of the listed fields. Available parameters depend on specified protocol. Use CLI syntax assistance to view parameters for specific protocols when creating a deny rule. + + +**Parameters** + + +- **seq_num** - The sequence number assigned to the rule. Optionsinclude the following: + + +- **no parameter** - The number derived from adding **10** to the number of the ACL last rule. + +- **1 - 4294967295** - A number assigned to an entry. + +- **prot** - Specify the protocol field filter. Values include the following: + + +- **icmpv6** - Internet Control Message Protocol for version 6 (58) + +- **ipv6** - Internet Protocol IPv6 (41) + +- **ospf** - Open Shortest Path First (89) + +- **tcp** - Transmission Control Protocol (6) + +- **udp** - User Datagram Protocol (17) + +- **protocol_num** - An integer corresponding to an IP protocol. Values range from **0** to **255**. + +- **SRC_ADDR** and **DEST_ADDR** - Specify source and destination address filters. Options include the following: + + +- **ipv6_prefix** - Specify an IPv6 address with prefix length (CIDR notation). + +- **any** - Filter packets from all addresses. + +- **host** **ipv6_addr** - Specify an IPv6 host address. + +- **SRC_PT** and **DEST_PT** - Specify the source and destination port filters. Options include the following: + + +- **any** - Specify all ports. + +- **eq** **port-1** **port-2** ... **port-n** - Specify a list of ports. Maximum list size is 10 ports. + +- **neq** **port-1** **port-2** ... **port-n** - Specify the set of all ports not listed. Maximum list size is 10 ports. + +- **gt** **port** - Specify the set of ports with larger numbers than the listed port. + +- **lt** **port** - Specify the set of ports with smaller numbers than the listed port. + +- **range** **port_1** **port_2** - Specify a range of ports. + +- HOP - Filters by packet hop-limit value. Options include the following: + + +- **no parameter** - The rule does not use hop limit to filter packets. + +- **hop-limit eq** **hop_value** - Match packetsif **hop-limit** value in packet equals **hop_value**. + +- **hop-limit gt** **hop_value** - Match packets if **hop-limit** in packet is greater than **hop_value**. + +- **hop-limit lt** **hop_value** - Match packets if **hop-limit** in packet is less than **hop_value**. + +- **hop-limit neq** **hop_value** - Match packets if **hop-limit** in packet is not equal to **hop_value**. + +- **FLAG** - Specify flag bit filters (TCP packets). + +- **MSG** - Specify message type filters (ICMPv6 packets). + +- **tracked** - Specify rule filters packets in existing ICMP, UDP, or TCP connections. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **DSCP_FILTER**- Filters packet by theDSCP value. Values include the following: + + +- **no parameter** - The rule does not use DSCP to filter packets. + +- **dscp** **dscp_value** - Match packets if DSCP field in packet equalsthe **dscp_value**. + +- **log** - Triggers an informational log message to the console about the matching packet. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Example** + + +This command appends a **deny** statement at the end of the ACL. The **deny** statement drops IPv6 packets from **3710:249a:c643:ef11::/64** to any host. + +``` +`switch(config)# **ipv6 access-list text1** +switch(config-acl-text1)# **deny ipv6 3710:249a:c643:ef11::/64 any** +switch(config-acl-text1)#` +``` + + +### deny (IPv6 Prefix List) + + +The **deny** command adds a deny rule in the IPv6 Prefix List Configuration Mode . Route map match commands use prefix lists to filter routes for redistribution into OSPF, RIP, or BGP domains. Routes are denied access when they match the prefix in a **deny** statement. + + +The **no deny** and **default deny** commands remove the specified rule from theIPv6 prefix list. The **no deny** command also removes the specified rule from the prefix list. + + +**Command Mode** + + +IPv6-pfx Configuration + + +**Command Syntax** + + +sequence deny ipv6_prefix mask + + +**Parameters** + + +- **sequence** - A sequence number assigned to the rule. Options include the following: + + +- **no parameter** - A number derived by adding **10** to the number of the list last rule. + +- **seq** **seq_num** - A number specified by **seq_num**. Value ranges from **0 to 65535**. + +- **ipv6_prefix** - Specify the IPv6 prefix to filter routes (CIDR notation). + +- **mask** - Specify the range of the prefix to match. + + +- **no parameter** - Requires an exact match with the subnet mask. + +- **eq** **mask_e** - Specify a prefix length equal to **mask_e**. + +- **ge** **mask_g** Specify a range from **mask_g** to **128**. + +- **le** **mask_l** - Specify a range from **subnet** mask length to **mask_l**. + +- **ge** **mask_l** **le** **mask_g** - Specify a range from **mask_g** to **mask_l**. + +- **mask_e**, **mask_land**, and **mask_g** -from **1 to 128**. + + +**Example** + + +This command appends a **deny** statement at the end of the **text1** prefix list. The **deny** statement denies redistribution of routes with the specified prefix. + +``` +`switch(config)# **ipv6 prefix-list route-five** +switch(config-ipv6-pfx)# **deny 3100::/64** +switch(config-ipv6-pfx)#` +``` + + +### deny (MAC ACL) + + +The **deny** command adds a deny rule to the MAC Access Control List (ACL) Configuration Mode. + + +Interfaces with an applied ACL drop packets filtered by a **deny** rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding **10** to the number of the ACL last rule. + + +The **no deny** and **default deny** commands remove the specified rule from the MAC Access Control List (ACL) Configuration Mode. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +MAC-ACL Configuration Mode + + +**Command Syntax** + + +seq_num deny source_addr dest_addr [protocol][log] + + +no deny source_addr dest_addr [protocol][log] + + +default deny source_addr dest_addr [protocol][log] + + +**Parameters** + + +- **seq_num** Sequence number assigned to the rule. Options include the following: + + +- **no parameter** - A number derived by adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** - A number assigned to entry. + +- **source_addr** and **dest_addr** - Configure source and destination address filters. Options includethe following: + + +- **mac_address mac_mask** - Specify the MAC address and mask. + +- **any** - Filters all Packets from all addresses. + +- **mac_address** - Specifies a MAC address in 3x4 dotted hexadecimal notation (hhhh.hhhh.hhhh). + +- **mac_mask** - Specifies a MAC address mask in 3x4 dotted hexadecimal notation (hhhh.hhhh.hhhh). + +- **0** - Requires an exact match to filter. + +- **1** - Filters on any value. + +- **protocol** - Configure a protocol field filter. Values include the following: + + +- **aarp** - Appletalk Address Resolution Protocol (0x80f3). + +- **appletalk** - Appletalk (0x809b). + +- **arp** - Address Resolution Protocol (0x806). + +- **ip** - Internet Protocol Version 4 (0x800). + +- **ipx** - Internet Packet Exchange (0x8137). + +- **lldp** - LLDP (0x88cc). + +- **novell** - Novell (0x8138). + +- **rarp** - Reverse Address Resolution Protocol (0x8035). + +- **protocol_num** An integer corresponding to a MAC protocol. Values range from **0 to 65535**. + +- **log** Triggers an informational log message to the console about the matching packet. + + +**Examples** + + +- This command appends a permit statement at the end of the ACL. The deny statement drops all **aarp** packets from **10.1000.0000** through **10.1000.FFFF** to any host. + +``` +`switch(config)# **mac access-list text1** +switch(config-mac-acl-text1)# **deny 10.1000.0000 0.0.FFFF any aarp**` +``` + +- This command inserts a permit statement with the sequence number **25**. The deny statement drops all packets through the interface. + +``` +`switch(config-mac-acl-text1)# **25 deny any any**` +``` + + +### deny (Standard IPv4 ACL) + + +The **deny** command adds a deny rule to the Standard IPv4 Access Control List (ACL) Configuration Mode. Standard ACL rules filter on the source field. + + +Interfaces with an applied ACL drop packets filtered by a **deny** rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding **10** to the number of the ACL last rule. + + +The **no deny** and **default deny** commands remove the specified rule from the Standard IPv4 Access Control List (ACL) Configuration Mode. The no sequence number (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +Std-ACL Configuration + + +**Command Syntax** + + +[seq_num] deny source_addr log + + +no deny source_addr log + + +default deny source_addr log + + +**Parameters** + + +- **seq_num** - Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - A number derived by adding **10** to the number of the ACL last rule. + +- **1 - 4294967295** - A number assigned to entry. + +- **source_addr**- Specify a source address filter. Options include the following: + + +- **network_addr** - Specify a subnet address as a CIDR or address-mask. + +- **any** Filter packets from all addresses. + +- **host** **ip_addr** - Specify an IP address in dotted decimal notation. + +Subnet addresses support noncontinuous masks. + +- **log** - Triggers an informational log message to the console about the matching packet. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Example** + + +This command appends a **deny** statement at the end of the ACL. The **deny** statement drops packets from **10.10.1.1/24**. + +``` +`switch(config)# **ip access-list standard text1** +switch(config-std-acl-text1)# **deny 10.1.1.1/24** +switch(config-std-acl-text1)#` +``` + + +### deny (Standard IPv6 ACL) + + +The **deny**command adds a deny rule to the Standard IPv6 Access Control List (ACL) Configuration Mode. Standard ACL rules filter on the source field. + + +Interfaces with an applied ACL drop packets filtered by a **deny** rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding **10** to the number of the ACL's last rule. + + +The **no deny** and **default deny** commands remove the specified rule from the Standard IPv6 Access Control List (ACL) Configuration Mode. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +Std-IPv6-ACL Configuration + + +**Command Syntax** + + +seq_num deny source_addr + + +no deny source_addr + + +default deny source_addr + + +**Parameters** + + +- **seq_num** Sequence number assigned to the rule. Options include: + + +- **no parameter** - A Number derived by adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** - The number assigned to entry. + +- **source_addr**- The Source address filter configured for the ACL. Options include: + + +- **ipv6_prefix** - IPv6 address with prefix length (CIDR notation). + +- **any** - Filter all packets from all addresses. + +- **host** **ipv6_addr** - Specify the IPv6 host address. + + +**Example** + + +This command appends a **deny** statement at the end of the ACL. The **deny** statement drops packets from **2103::/64**. + +``` +`switch(config)# **ipv6 access-list standard text1** +switch(config-std-acl-ipv6-text1)# **deny 2103::/64** +switch(config-std-acl-ipv6-text1)#` +``` + + +### description (route map) + + +The **description** command adds a text string to the configuration mode route map. The string has no functional impact on the route map. + + +The **no description** and **default description** commands remove the text string from the configuration mode route map by deleting the corresponding **description** command from ***running-config***. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +description label_text + + +no description + + +default description + + +**Parameter** + + +**label_text** Character string assigned to the route map configuration. + + +**Related Command** + + +route-map + + +**Example** + + +These commands add description text to the **XYZ-1** route map. + +``` +`switch(config)# **route-map XYZ-1** +switch(config-route-map-XYZ-1)# **description This is the first map.** +switch(config-route-map-XYZ-1)# **exit** +switch(config)# **show route-map XYZ-1** +route-map XYZ-1 permit 10 + Description: + description This is the first map. + Match clauses: + Set clauses: +switch(config)#` +``` + + +### hardware access-list resource sharing vlan in + + +The **hardware access-list resource sharing vlan in** command enables the IPv4 Ingress Sharing of hardware resources on the switch when the same ACL applies to different VLANs. + + +The **no hardware access-list resource sharing vlan in** command disables the IPv4 Ingress Sharing of hardware resources on the switch. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +hardware access-list resource sharing vlan [ipv4 | ipv6] in + + +no hardware access-list resource sharing vlan in + + +**Guidelines** + + +- Ccompatible only with the DCS-7010 and DCS-7050x series switches. + +- Enabling IPv4 Ingress Sharing requires the restart of software agents on the platform. This is a disruptive process and impacts traffic forwarding. + + +Use the **show platform trident** command to verify the Ingress IPv4 Sharing information. + + +### hardware access-list resource sharing vlan ipv4 out + + +The **hardware access-list resource sharing vlan ipv4 out** command enables IPv4 Egress RACL TCAM sharing on the switch. + + +The **no hardware access-list resource sharing vlan ipv4 out** command disables the IPv4 Egress RACL TCAM sharing on the switch. By default, the switch enables IPv4 Egress RACL sharing. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +hardware access-list resource sharing vlan ipv4 out + + +no hardware access-list resource sharing vlan ipv4 out + + +**Guidelines** + + +- Compatible only with the DCS-7280E and DCS-7500E series switches. + +- Disabling IPv4 RACL sharing requires the restart of software agents on the platform. This is a disruptive process and impacts traffic forwarding. + +- Enabling IPv4 RACL sharing, if previously disabled from the default configuration, requires the restart of software agents on the platform. This is a disruptive process and impacts traffic forwarding. Enabling IPv4 RACL sharing if uRPF is configured disables uRPF. + +- Use the **show running-config all | include sharing** command to verify whether or not sharing for egress IPv4 RACLs is enabled. + + +**Example** + + +This command verifies if IPv4 RACL sharing is enabled or disabled. + +``` +`switch# **show running-config all | include sharing** + +hardware access-list resource sharing vlan ipv4 out + ---->It returns the following output if IPv4 RACL sharing is enabled.` +``` + + +### hardware access-list update default-result permit + + +The **hardware access-list update default-result permit** command configures the switch to permit all traffic on Ethernet and VLAN interfaces with ACLs applied to them while modifying the ACLs. Permits traffic when modifying the ACL using one of the **ip access-list** commands, and ends when exiting the ACL Configuration Mode and rules populated in hardware. EOS disables this by default. + + +The **no hardware access-list update default-result permit** and **default hardware access-list update default-result permit** commands restore the switch to the default state and blocks traffic during ACL modifications by removing the corresponding **hardware access-list update default-result permit** command from the ***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +hardware access-list update default-result permit + + +no hardware access-list update default-result permit + + +default hardware access-list update default-result permit + + +**Restrictions** + + +This command is available on the Arista 7050X, 7060X, 7150, 7250X, 7280, 7280R, 7300X, 7320X, and 7500 series switches. + + +When enabled, static NAT, and ACL-based mirroring are affected during ACL updates. + + +**Example** + + +This command configures a 7150 series switch to permit all traffic on Ethernet and VLAN interfaces with applied ACLs while modifying the ACLs. + +``` +`switch(config)# **hardware access-list update default-result permit** +switch(config)#` +``` + + +### hardware counter feature acl out + + +The **hardware counter feature acl out** command enables egress ACL hardware counters for IPv4 or IPv6 and count the number of packets matching rules associated with egress ACLs applied to various interfaces on a switch. + + +The **no hardware counter feature acl out** and **default hardware counter feature acl out** commands disable or return the egress ACL hardware counters to the default state. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +hardware counter feature acl out [options [ipv4 | ipv6] + + +no hardware counter feature acl out [options [ipv4 | ipv6] + + +default hardware counter feature acl out [options [ipv4 | ipv6] + + +**Parameters** + + +- **options** - ACL hardware counter options include the following: + + +- **ipv4** - Specify an IPv4 address. + +- **ipv6** - Specify an IPv4 address. + + +**Examples** + + +- This command enables IPv4 egress ACL hardware counters. + +``` +`switch(config)# **hardware counter feature acl out ipv4** +switch(config)#` +``` + +- This command disables IPv4 egress ACL hardware counters. + +``` +`switch(config)# **no hardware counter feature acl out ipv4** +switch(config)#` +``` + + +### ip access-group (Service ACLs) + + +The **ip access-group** (Service ACLs) command configures a Service ACL to apply to a control-plane service. Specify the service by the command mode used to apply the Service ACL. + + +The **no ip access-group** and **default ip access-group** commands remove the corresponding **ip access-group** (Service ACLs) command from ***running-config***. + + +**Command Mode** + + +Mgmt-SSH Configuration + + +Mgmt-API Configuration + + +Router-BGP Configuration + + +Router-OSPF Configuration + + +Router-IGMP Configuration + + +MPLS-LDP Configuration + + +Queue-Monitor-Streaming Configuration + + +MPLS-Ping Configuration + + +Mgmt-Telnet Configuration + + +**Command Syntax** + + +ip access-group acl_name [vrfvrf_name][in] + + +no ip access-group acl_name [vrfvrf_name][in] + + +default ip access-group acl_name [vrfvrf_name][in] + + +**Parameters** + + +Parameters vary by process. + + +- **acl_name** - Specify the name of the Service ACL assigned to control-plane service. + +- **vrf** **vrf_name** - Specifies the VRF to apply the Service ACL. + +- **in** - Specifies inbound connections or packets only. Requires a keyword for SSH and Telnet services. + + +**Example** + + +These commands apply the Service ACL **bgpacl** to the BGP routing protocol in VRF **purple**. + +``` +`(config)# **router bgp 5** +(config-router-bgp)# **vrf purple** +(config-router-bgp-vrf-purple)# **ip access-group bgpacl**` +``` + + +For additional configuration examples, see Configuring Service ACLs and Displaying Status and Counters. + + +### ip access-group + + +The **ip access-group** command applies an IPv4 or standard IPv4 Access Control List (ACL) to an interface or subinterface in the Interface Configuration Mode. + + +The **no ip access-group** and **default ip access-group** commands remove the corresponding **ip access-group** command from ***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-Channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip access-group list_name[direction [in | out]] + + +no ip access-group list_name[direction [in | out]] + + +default ip access-group list_name [direction [in | out]] + + +**Parameters** + + +- **list_name** - Specify the name of ACL assigned to interface. + +- direction Transmission direction of packets, relative to interface. Valid options include the following: + + +- **in** - Inbound packets. + +- **out** - Outbound packets. + + +**Considerations** + + +Filtering of outbound packets by ACLs not supported on Petra platform switches. + + +Filtering of outbound packets by ACLs on FM6000 switches supported on physical interfaces only (Ethernet and port channels). + + +ACLs on sub-interfaces are supported on DCS-7280E, DCS-7500E, DCS-7280R, and DCS-7500R. + + +**Example** + + +These commands apply the IPv4 ACL named **test2** to **interface ethernet 3**. + +``` +`switch(config)# **interface ethernet 3** +switch(config-if-Et3)# **ip access-group test2 in** +switch(config-if-Et3)#` +``` + + +### ip access-list + + +The **ip access-list** command places the switch in ACL Configuration Mode, a group change mode that modifies an IPv4 access control list. The command specifies the name of the IPv4 ACL that subsequent commands modify and creates an ACL if it references a nonexistent list. All changes in a group change mode edit session are pending until the end of the session. + + +The **exit** command saves pending ACL changes to ***running-config***, then returns the switch to Global Configuration Mode. ACL changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending ACL changes, returning the switch to Global Configuration Mode. + + +The **no ip access-list** and **default ip access-list** commands delete the specified IPv4 ACL. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip access-list list_name + + +no ip access-list list_name + + +default ip access-list list_name + + +**Parameter** + + +**list_name** - Specify the name of the ACL. Must begin with an alphabetic character. Cannot contain spaces or quotation marks. + + +**Commands Available in ACL configuration mode:** + + +- deny (IPv4 ACL) + +- no sequence number + +- permit (IPv4 ACL) + +- remark + +- resequence (ACLs) + +- show (ACL configuration modes) + + +**Related Commands:** + + +- ip access-list standard Enters ***std-acl*** configuration mode for editing standard IP ACLs. + +- show ip access-lists Displays IP and standard ACLs. + + +**Examples** + + +- This command places the switch in ACL configuration mode to modify the **filter1** IPv4 ACL. + +``` +`switch(config)# **ip access-list filter1** +switch(config-acl-filter1)#` +``` + +- This command saves changes to **filter1** ACL, then returns the switch to Global Configuration Modee. + +``` +`switch(config-acl-filter1)# **exit** +switch(config)#` +``` + +- This command discards changes to **filter1**, then returns the switch to Global Configuration Mode. + +``` +`switch(config-acl-filter1)# **abort** +switch(config)#` +``` + + +### ip access-list standard + + +The **ip access-list standard** command places the switch in STD-ACL Configuration Mode, a group change mode that modifies a standard IPv4 access control list. The command specifies the name of the standard IPv4 ACL that subsequent commands modify, and creates an ACL if it references a nonexistent list. All group change mode edit session changes are pending until the session ends. + + +The **exit** command saves pending ACL changes to ***running-config***, then returns the switch to Global Configuration Mode. Pending changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending ACL changes, returning the switch to global configuration mode. + + +The **no ip access-list standard** and **default ip access-list standard** commands delete the specified ACL. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip access-list standard list_name + + +no ip access-list standard list_name + + +default ip access-list standard list_name + + +**Parameter** + + +**list_name** - Specify the name of standard ACL. Must begin with an alphabetic character. Cannot contain spaces or quotation marks. + + +**Commands Available in std-ACL configuration mode:** + + +- deny (Standard IPv4 ACL) + +- no sequence number + +- permit (Standard IPv4 ACL) + +- remark + +- resequence (ACLs) + +- show (ACL configuration modes) + + +**Related Commands** + + +- ip access-list - Enters ACL configuration mode for editing IPv4 ACLs. + +- show ip access-lists - Displays IPv4 and standard IPv4 ACLs. + + +**Examples** + + +- This command places the switch in std-ACL configuration mode to modify the **filter2** IPv4 ACL. + +``` +`switch(config)# **ip access-list standard filter2** +switch(config-std-acl-filter2)#` +``` + +- This command saves changes to **filter2** ACL, then returns the switch to the Global Configuration Mode. + +``` +`switch(config-std-acl-filter2)# **exit** +switch(config)#` +``` + +- This command discards changes to **filter2**, then returns the switch to the Global Configuration Mode. + +``` +`switch(config-std-acl-filter2)# **abort** +switch(config)#` +``` + + +### ip prefix-list + + +The **ip prefix-list** command creates a prefix list or adds an entry to an existing list. Route map match commands use prefix lists to filter routes for redistribution into OSPF, RIP, or BGP domains. + + +A prefix list comprises all prefix list entries with the same label. The sequence numbers of the rules in a prefix list specify the order for applying rules to a route evaluated by the match command. + + +The **no ip prefix-list** and **default ip prefix-list**commands delete the specified prefix list entry by removing the corresponding **ip prefix-list** statement from ***running-config***. If the **no** or **default ip prefix-list** command does not list a sequence number, the command deletes all entries of the prefix list. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip prefix-list list_name [deny | permit] [seq index] network_addr [mask] resequence seq_number remark comment + + +no ip prefix-list list_name seq [index] + + +default ip prefix-list list_name seq [index] + + +**Parameters** + + +- **list_name** - Specify a name for the prefix list. + +- **seq** **seq_num**- Specify the sequence number for the prefix list entry Value ranges from **0** to **65535**. + +- **permit | deny** - Specifies route access when a route matches IP prefix list. Options include: + + +- **permit** - Allows access when matching the specified subnet. + +- **deny** - Denies access when matching the specified subnet. + +- **network_addr** - Specify the subnet to filter routes. Use either a CIDR or address-mask format. + +- **MASK** - Specifies the range of the prefix to be matched. + + +- **no parameter** Exact match with the subnet mask is required. + +- **eq** **mask_e** Prefix length is equal to **mask_e**. + +- **ge** **mask_g** Range is from **1** to **32**. + +- **le** **mask_l** Range is from **subnet** mask length to **mask_l**. + +- **ge** **mask_l** **le** **mask_g** Range is from **mask_g** to **mask_l**. + +- **mask_e**, **mask_l**, and **mask_g** range from **1** to **32**. When **le** and **ge** are specified, **subnet** **mask** **mask_g**>**mask_l**. + +- **remark comment** - Add a comment to the prefix list configuration. + + +**Example** + + +- This command places the switch in IPv4 prefix list configuration mode to create an IPv4 prefix list named **route-one**. + +``` +`switch(config)# **ip prefix-list route-one** +switch(config-ip-pfx)#` +``` + +- These commands create four different rules for the prefix-list named **route-one**. + +``` +`switch(config)# **ip prefix-list route-one** +switch(config-ip-pfx)# **seq 10 deny 10.1.1.0/24** +switch(config-ip-pfx)# **seq 20 deny 10.1.0.0/16** +switch(config-ip-pfx)# **seq 30 permit 12.15.4.9/32** +switch(config-ip-pfx)# **seq 40 deny 1.1.1.0/24**` +``` + + +### ipv6 access-group + + +The **ipv6 access-group**command applies an IPv6 or standard IPv6 Access Control List (ACL) to the configuration mode interface. + + +The **no ipv6 access-group** and **default ipv6 access-group** commands remove the corresponding **ipv6 access-group** command from ***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-Channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +**ipv6 access-group list_name [in | out]** + + +**no ipv6 access-group list_name [in | out]** + + +**default ipv6 access-group list_name [in | out]** + + +**Parameters** + + +- **list_name** - Specify the name of the ACL assigned to interface. + +- **[in | out]** - Specify the transmission direction of packets, relative to interface. Valid options include the following: + + +- **in** Inbound packets. + +- **out** Outbound packets. + + +**Examples** + + +These commands assign the IPv6 ACL named **test2** to the **interface ethernet 3**. + +``` +`switch(config)# **interface ethernet 3** +switch(config-if-Et3)# **ipv6 access-group test2 in** +switch(config-if-Et3)#` +``` + + +### ipv6 access-group (Service ACLs) + + +The **ipv6 access-group** (Service ACLs) command configures an IPv6 or standard IPv6 Service ACL to be applied by a control-plane service. Specify the service with the command mode to apply the Service ACL. + + +The **no ipv6 access-group** (Service ACLs) and **default ipv6 access-group** (Service ACLs) commands remove the corresponding **ipv6 access-group**(Service ACLs) command from ***running-config***. + + +**Command Mode** + + +Mgmt-SSH Configuration + + +Mgmt-API Configuration + + +Router-BGP Configuration + + +Router-OSPF Configuration + + +MPLS-LDP Configuration + + +Queue-Monitor-Streaming Configuration + + +MPLS-Ping Configuration + + +Mgmt-Telnet Configuration + + +**Command Syntax** + + +ipv6 access-group ipv6_acl_name [vrf vrf_name][in] + + +no ipv6 access-group [ipv6_acl_name][vrfvrf_name][in] + + +default ipv6 access-group ipv6_acl_name [vrf vrf_name][in] + + +**Parameters** + + +Parameters vary by process. + + +- **ipv6_acl_name** - Specify the name of the IPv6 Service ACL assigned to control-plane service. + +- **vrf** **vrf_name** - Specifies the VRF to apply the Service ACL. + +- **in** - Specifies inbound connections or packets only and requires a keyword for SSH and Telnet services. + + +**Example** + + +These commands apply the IPv6 Service ACL **bgpacl** to the BGP routing protocol in VRF **purple**. + +``` +`(config)# **router bgp 5** +(config-router-bgp)# **vrf purple** +(config-router-bgp-vrf-purple)# **ipv6 access-group bgpacl**` +``` + + +For additional configuration examples, see Configuring Service ACLs and Displaying Status and Counters. + + +### ipv6 access-list + + +The **ipv6 access-list** command places the switch in ***IPv6-ACL*** Configuration Mode, a group change mode that modifies an IPv6 access control list. The command specifies the name of the IPv6 ACL that subsequent commands modify and creates an ACL if it references a nonexistent list. All changes in a group change mode edit session pend until the end of the session. + + +The **exit** command saves pending ACL changes to ***running-config***, then returns the switch to global configuration mode. ACL changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending ACL changes, returning the switch to Global Configuration Mode. + + +The **no ipv6 access-list** and **default ipv6 access-list** commands delete the specified IPv6 ACL. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ipv6 access-list list_name + + +no ipv6 access-list **list_name** + + +default ipv6 access-list list_name + + +**Parameters** + + +**list_name** - Specify a name for the ACL. Must begin with an alphabetic character and cannot contain spaces or quotation marks. + + +**Commands Available in IPv6-ACL configuration mode:** + + +- deny (IPv6 ACL) + +- no (ACLs) + +- permit (IPv6 ACL) + +- remark + +- resequence (ACLs) + +- show (ACL configuration modes) + + +**Related Commands** + + +- ipv6 access-list standard Enters ***std-ipv6-acl*** configuration mode for editing standard IPv6 ACLs. + +- show ipv6 access-lists Displays IPv6 and standard IPv6 ACLs. + + +**Examples** + + +- This command places the switch in IPv6-ACL configuration mode to modify the **filter1** IPv6 ACL. + +``` +`switch(config)# **ipv6 access-list filter1** +switch(config-ipv6-acl-filter1)#` +``` + +- This command saves changes to **filter1** ACL, then returns the switch to global configuration mode. + +``` +`switch(config-ipv6-acl-filter1)# **exit** +switch(config)#` +``` + +- This command discards changes to **filter1**, then returns the switch to global configuration mode. + +``` +`switch(config-ipv6-acl-filter1)# **abort** +switch(config)#` +``` + + +### ipv6 access-list standard + + +The **ipv6 access-list standard** command places the switch in std-IPv6-ACL-configuration mode, a group change mode that modifies a standard IPv6 access control list. The command specifies the name of the standard IPv6 ACL that subsequent commands modify and creates an ACL if it references a nonexistent list. All group change mode edit session changes are pending until the session ends. + + +The **exit** command saves pending ACL changes to ***running-config***, then returns the switch to Global Configuration Mode. Pending changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending ACL changes, returning the switch to global configuration mode. + + +The **no ipv6 access-list standard** and **default ipv6 access-list standard** commands delete the specified ACL. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ipv6 access-list standard list_name + + +no ipv6 access-list standard list_name + + +default ipv6 access-list standard list_name + + +**Parameters** + + +**list_name** - Specify a name for the ACL. Must begin with an alphabetic character and cannot contain spaces or quotation marks. + + +**Commands Available in std-IPv6-ACL configuration mode:** + + +- deny (Standard IPv6 ACL) + +- no (ACLs) + +- permit (Standard IPv6 ACL) + +- remark + +- resequence (ACLs) + +- show (ACL configuration modes) + + +**Related Commands** + + +- ipv6 access-list Enters IPv6-ACL configuration mode for editing IPv6 ACLs. + +- show ipv6 access-lists Displays IPv6 and standard IPv6 ACLs. + + +**Examples** + + +- This command places the switch in Std-IPv6 ACL configuration mode to modify the **filter2** ACL. + +``` +`switch(config)# **ipv6 access-list standard filter2** +switch(config-std-ipv6-acl-filter2)#` +``` + +- This command saves changes to **filter2** ACL, then returns the switch to global configuration mode. + +``` +`switch(config-std-ipv6-acl-filter2)# **exit** +switch(config)#` +``` + +- This command discards changes to **filter2**, then returns the switch to global configuration mode. + +``` +`switch(config-std-ipv6-acl-filter2)# **abort** +switch(config)#` +``` + + +### ipv6 prefix-list + + +The **ip prefix-list** command places the switch in ***IPv6 prefix-list*** configuration mode, which is a group change mode that modifies an IPv6 prefix list. The command specifies the name of the IPv6 prefix list that subsequent commands modify and creates a prefix list if it references a nonexistent list. All changes in a group change mode edit session are pending until the end of the session. + + +The **exit** command saves pending prefix list changes to ***running-config***, then returns the switch to global configuration mode. ACL changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending changes, returning the switch to global configuration mode. + + +The **no ipv6 prefix-list** and **default ipv6 prefix-list** commands delete the specified IPv6 prefix list. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ipv6 prefix-list list_name + + +no ipv6 prefix-list list_name + + +default ipv6 prefix-list list_name + + +**Parameter** + + +**list_name** Name of prefix list. Must begin with an alphabetic character. Cannot contain spaces or quotation marks. + + +**Commands Available in IPv6-pfx configuration mode:** + + +- deny (IPv6 Prefix List) + +- permit (IPv6 Prefix List) + +- seq (IPv6 Prefix Lists) + + +**Examples** + + +- This command places the switch in ***IPv6 prefix-list*** configuration mode to modify the **route-five** prefix list. + +``` +`switch(config)# **ipv6 prefix-list route-five** +switch(config-ipv6-pfx)#` +``` + +- This command saves changes to the prefix list, then returns the switch to global configuration mode. + +``` +`switch(config-ipv6-pfx)# **exit** +switch(config)#` +``` + +- This command saves changes to the prefix list, then places the switch in ***interface-ethernet*** mode. + +``` +`**switch(config-ipv6-pfx)# interface ethernet 3 +switch(config-if-Et3)#**` +``` + +- This command discards changes to the prefix list, then returns the switch to global configuration mode. + +``` +`switch(config-ipv6-pfx)# **abort** +switch(config)#` +``` + + +### mac access-group + + +The **mac access-group** command applies a MAC Access Control List (MAC ACL) when in the Interface Configuration Mode. + + +The **no mac access-group** and **default mac access-group** commands remove the specified **mac access-group** command from ***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-Channel Configuration + + +**Command Syntax** + + +mac access-group list_name [direction [in | out]] + + +no mac access-group list_name[direction [in | out]] + + +default mac access-group list_name [direction [in | out]] + + +**Parameters** + + +- **list_name** - Specify the name of MAC ACL. + +- **direction** - Specify the transmission direction of packets, relative to interface. Valid options include: + + +- **in** Inbound packets. + +- **out** Outbound packets. + + +**Restrictions** + + +Only Helix, Trident, and Trident II platform switches support filtering of outbound packets by MAC ACLs. + + +**Example** + + +These commands assign the MAC ACL named **mtest2** to **interface ethernet 3** to filter inbound packets. + +``` +`switch(config)# **interface ethernet 3** +switch(config-if-Et3)# **mac access-group mtest2 in** +switch(config-if-Et3)#` +``` + + +### mac access-list + + +The **mac access-list** command places the switch in ***MAC-ACL*** Configuration Mode, a group change mode that modifies a MAC access control list. The command specifies the name of the MAC ACL that subsequent commands modify and creates an ACL if it references a nonexistent list. All changes in a group change mode edit session are pending until the end of the session. + + +The **exit** command saves pending ACL changes to ***running-config***, then returns the switch to Global Configuration Mode. ACL changes are also saved by entering a different configuration mode. + + +The **abort** command discards pending ACL changes, returning the switch to Global Configuration Mode. + + +The **no mac access-list** and **default mac access-list**commands delete the specified list. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +mac access-list list_name + + +no mac access-list list_name + + +default mac access-list list_name + + +**Parameter** + + +**list_name** - Specify the name of the MAC ACL. Names must begin with an alphabetic character and cannot contain a space or quotation mark. + + +**Commands Available in MAC-ACL Configuration Mode:** + + +- deny (MAC ACL) + +- no (ACLs) + +- permit (MAC ACL) + +- remark + +- resequence (ACLs) + +- show (ACL configuration modes) + + +**Examples** + + +- This command places the switch in ***MAC-ACL*** configuration mode to modify the **mfilter1** MAC ACL. + +``` +`switch(config)# **mac access-list mfilter1** +switch(config-mac-acl-mfilter1)#` +``` + +- This command saves changes to **mfilter1** ACL, then returns the switch to global configuration mode. + +``` +`switch(config-mac-acl-mfilter1)# **exit** +switch(config)#` +``` + +- This command saves changes to **mfilter1** ACL, then places the switch in ***interface-ethernet*** configuration mode. + +``` +`switch(config-mac-acl-mfilter1)# **interface ethernet 3** +switch(config-if-Et3)#` +``` + +- This command discards changes to **mfilter1**, then returns the switch to global configuration mode. + +``` +`switch(config-mac-acl-mfilter1)# **abort** +switch(config)#` +``` + + +### match (route-map) + + +The **match** command creates a route map statement entry that specifies one route filtering command. When a statement contains multiple match commands, the permit or deny filter applies to a route only if the properties equal the corresponding parameters in each **match** command. When a route properties do not equal the command parameters, the route is evaluated against the next statement in the route map, as determined by sequence number. If all statements fail to permit or deny the route, the route is denied. + + +The **no match** and **default match** commands remove the **match**command from the configuration mode route map statement by deleting the corresponding command from ***running-config***. + + +Note: The route map configuration supports only standard ACL. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +match condition + + +no match condition + + +default match condition + + +**Parameters** + + +- **condition** - Specifies criteria for evaluating a route. Options include the following: + + +- **aggregate-role** - Specify the role in BGP contributor-aggregate relation. Options include the following: + + +- **contributor** - Specify BGP aggregate contributor. + +- **aggregate-attributes** - Specify the Route map to apply against the aggregate route. + +- **as** **1** to **4294967295** - Specify the BGP Autonomous System number. + +- **as-path** **path_name** - Specify the BGP Autonomous System path access list. + +- as-path length { <= | = | => } **length** + + +- <= - Length of AS path must be less than or equal to specified value. + +- = - Length of AS path must be equal to specified value. + +- => Length of AS path must be equal to or greater than specified value. + +- **length** - Value for AS path length comparison (0-4000). + +- **community** **name** BGP community. Options include the following: + + +- **listname** - Specify the BGP community. + +- **listname** - Specify the **exact-match** BGP community. The list must match the present set. + +- **extcommunity** **listname** - Specify the BGP extended community. Options include the following: + + +- **listname** - Specify the BGP community. + +- **listname** - Specify the **exact-match** BGP community. The list must match the present set. + +- **interface** **intf_name** - Specifies an interface. Options include the following:: + + +- **ethernet** **e_num** - Specify the Ethernet interface. + +- **loopback** **l_num** - Specify the Loopback interface. + +- **port-channel** **p_num** - Specify the Port channel interface. + +- **vlan** **v_num** - Specify the VLAN interface. + + +**invert-result** - Specify the Invert sub route map result. + +- **ip address** **LIST** - Specify the IPv4 address filtered by an ACL or prefix list. Options include the following: + + +- **access-list** **acl_name** - Specify the IPv4 address filtered by access control list (ACL). + +- **prefix-list** **plv4_name**- Specify the IPv4 address filtered by IP prefix list. + +- **ip next-hop prefix-list** **plv4_name** - Specify the IPv4 next-hop filtered by IP prefix list. + +- **ip resolved-next-hop prefix-list** **plv4_name** - Specify the IPv4 resolved next-hop filtered by IP prefix list. + +- **ipv6 address prefix-list** **plv6_name** - Specify the IPv6 address filtered by IPv6 prefix list. + +- **ipv6 next-hop prefix-list** **plv6_name** - Specify the IPv6 next-hop filtered by IPv6 prefix list. + +- **ipv6 resolved-next-hop prefix-list** **plv6_name** - Specify the IPv6 resolved nexthop filtered by IPv6 prefix list. + +- **local-preference** **1** to **4294967295** - Specify the BGP local preference metric. + +- **metric** **1** to **4294967295** - Specify the route metric. + +- **metric-type** **OSPF_TYPE** - Specify the OSPF metric type. Options include the following: + + +- **type-1** OSPF type 1 metric. + +- **type-2** OSPF type 2 metric. + +- **source-protocol** **protocol_type** - Specify the Routing protocol of route source. Options include the following: + + +- **bgp** + +- **connected** + +- **ospf** + +- **rip** + +- **static** + +- **tag** **1** to **4294967295** Route tag. + + +**Related Command** + + +route-map + + +**Examples** + + +- This command creates a ***route map*** match rule that filters routes from BGP **as 15**. + +``` +`switch(config)# **route-map map1** +switch(config-route-map-map1)# **match as 15** +switch(config-route-map-map1)#` +``` + +- This command adds a **route-map** match rule that the AS path be less than or equal to 300. + +``` +`switch(config-route-map-map1)# **match as-path length <= 300** +switch(config-route-map-map1)#` +``` + + +### no sequence number + + +The **no sequence number** command removes the rule with the specified sequence number from the ACL. The **default ** command also removes the specified rule. + + +**Command Mode** + + +ACL Configuration + + +IPv6-ACL Configuration + + +Std-ACL Configuration + + +Std-IPv6-ACL Configuration + + +MAC-ACL Configuration + + +**Command Syntax** + + +no line_num + + +default line_num + + +**Parameter** + + +**line_num** - Specify the sequence number of rule to delete. Values range from **1 - 4294967295**. + + +**Example** + + +This command removes statement **30** from the list. + +``` +`switch(config-acl-test1)# **show IP Access Lists test1** + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 50 remark end of list +Total rules configured: 5 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +switch(config-acl-test1)# **no 30** +switch(config-acl-test1)# **show IP Access Lists** + 20 permit ip any host 10.20.10.1 + 40 permit ip any any + 50 remark end of list +Total rules configured: 4 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1` +``` + + +### permit (IPv4 ACL) + + +The **permit** command adds a permit rule to the configuration mode IPv4 Access Control List (ACL). Interfaces with the applied ACL accept packets filtered by a permit rule the ACL is applied. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding 10 to the number of the ACL last rule. + + +The **no permit** and **default permit** commands remove the specified rule from the configuration mode ACL. The no (ACLs) command also removes a specified rule from the ACL. + + +**Command Mode** + + +ACL Configuration + + +**Command Syntax** + + +seq_num permit protocol src_addr source_pt dest_addr dest_pt flags msg fragments tracked dscp_filter ttl_filter log + + +no permit protocol src_addr source_pt dest_addr dest_pt flags msg fragments tracked dscp_filter ttl_filter log + + +default permit protocol src_addr source_pt dest_addr dest_pt flags msg fragments tracked dscp_filter ttl_filter log + + +Commands use a subset of the listed fields and available parameters depend on specified protocol. + + +**Parameters** + + +- **seq_num**- Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - A number derived from adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** - Specify the number assigned to entry. + +- **protocol** - Specify the protocol field filter. Options include the following: + + +- **ahp**- Authentication Header Protocol (51) + +- **gre** - Generic Routing Encapsulation + +- **gtp** - GPRS Tunneling Protocol + +- **icmp** - Internet Control Message Protocol (1) + +- **igmp** -Internet Group Management Protocol (2) + +- **ip** -Any Internet Protocol v4 (4) + +- **ospf** -Open Shortest Path First (89) + +- **pim** -Protocol Independent Multicast (103) + +- **tcp** -Transmission Control Protocol (6) + +- **udp** -User datagram protocol (17) + +- **vlan** - Enter VLAN number and mask. VLAN value range from 1 to 4094, and mask value range from 0x000-0xFFF . + +- **vrrp** - Virtual Router Redundancy Protocol (112). + +- **protocol_num** -An integer corresponding to an IP protocol. Values range from **0 to 255**. + +- **src_addr** and **dest_addr** - Specify the source and destination address filters. Options include the following: + + +- **network_addr** - Specify the subnet address (CIDR or address-mask). + +- **any** - Filter packets from all addresses. + +- **host** **ip_addr** - Specify the IP address in dotted decimal notation. + +Source and destination subnet addresses support discontiguous masks. + +- **source_port** and **dest_port** Source and destination port filters. varnames include: + + +- **any** - Specify all ports. + +- **eq** **port-1** **port-2** ... **port-n** - Specify a list of ports. Maximum list size is 10 ports. + +- **neq** **port-1** **port-2** ... **port-n** - Specify the set of all ports not listed. Maximum list size is 10 ports. + +- **gt** **port** - Specify the set of ports with larger numbers than the listed port. + +- **lt** **port** - Specify the set of ports with smaller numbers than the listed port. + +- **range** **port_1** **port_2** - Specify the set of ports within a range. + +- **fragments** -Filters packets with FO bit set (indicates a non-initial fragment packet). + +- **flags** -Specify the flags bit filters (TCP packets). Use CLI syntax assistance (?) to display varnames. + +- **msg** - Specify the message type filters (ICMP packets). Use CLI syntax assistance (?) to display varnames. + +- **tracked** - Specify the rule filters packets in existing ICMP, UDP, or TCP connections. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **dscp_filter** -Specify the rule filters packet by its DSCP value. Values include: + + +- **no parameter** - The rule does not use DSCP to filter packets. + +- **dscp** **dscp_value** - Packets match if DSCP field in packet is equal to **dscp_value**. + +- **ttl_filter** - Rule filters packet by its TTL (time-to-live) value. Values include: + + +- **ttl eq** **ttl_value** - Match packets if **ttl** in packet is equal to **ttl_value**. + +- **ttl gt** **ttl_value** - Match packets if **ttl** in packet is greater than **ttl_value**. + +- **ttl lt** **ttl_value** - Match packets if **ttl** in packet is less than **ttl_value**. + +- **ttl neq** **ttl_value** - Match packets if **ttl** in packet is not equal to **ttl_value**. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **log**-Specify to trigger an informational log message to the console about the matching packet. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Examples** + + +- This command appends a **permit** statement at the end of the ACL. The **permit** statement passes all OSPF packets from **10.10.1.1/24** to any host. + +``` +`switch(config)# **ip access-list text1** +switch(config-acl-text1)# **permit ospf 10.1.1.0/24 any** +switch(config-acl-text1)#` +``` + +- This command inserts a **permit** statement with the sequence number **25**. The **permit** statement passes all PIM packets through the interface. + +``` +`switch(config-acl-text1)# **25 permit pim any any** +switch(config-acl-text1)#` +``` + +- These commands configure ACL to permit VLAN traffic between any source and destination host. + +``` +`switch(config)# **ip access-list acl1** +switch(config-acl-acl1)# **permit vlan 1234 0x0 ip any any**` +``` + + +### permit (IPv6 ACL) + + +The **permit** command adds a permit rule to the configuration mode IPv6 Access Control List (ACL). Interfaces with the applied ACL accept packets filtered by a permit rule the ACL is applied. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding 10 to the number of the ACL last rule. + + +The **no permit** and **default permit** commands remove the specified rule from the configuration mode ACL. The no (ACLs) command also removes a specified rule from the ACL. + + +**Command Mode** + + +IPv6-ACL Configuration + + +**Command Syntax** + + +seq_num permit protocol src_addr source_pt dest_addr dest_pt flags msg hop tracked dscp_filter log + + +no permit protocol src_addr source_pt dest_addr dest_pt flag msg hop tracked dscp_filter log + + +default deny protocol src_addr source_pt dest_addr dest_pt flag msg hop tracked dscp_filter log + + +Note: Commands use a subset of the listed fields and available parameters depend on specified protocol. + + +**Parameters** + + +- **seq_num** - The sequence number assigned to the rule. Options include the following: + + +- **no parameter.Number is derived by adding 10 to the number of the ACL’s last rule.** + +- **1 - 4294967295** Number assigned to entry. + +- **protocol** Specify the protocol field filter. Options include the following: + + +- **icmpv6** - Internet Control Message Protocol for IPv6 (58). + +- **ipv6** - Internet Protocol IPv6 (41). + +- **ospf** - Open Shortest Path First (89). + +- **tcp** - Transmission Control Protocol (6). + +- **udp** - User Datagram Protocol (17). + +- **vlan** - Enter VLAN number. Value ranges from 1 to 4094. + +- **protocol_num** - Integer corresponding to an IP protocol. Values range from 0 to 255. + +- **src_addr** and **dest_addr** - Specify the source and destination address filters. Options include the following: + + +- **ipv6_prefix** - Specify the IPv6 address with prefix length (CIDR notation). + +- **any** - Specify the Packets from all addresses are filtered. + +- **host** **ipv6_addr** - Specify the IPv6 host address. + +- **source_pt** and **DEST_PT** - Specify the source and destination port filters. Options include the following: + + +- **any** All ports. + +- **eq** **port-1** **port-2** ... **port-n** - Specify the list of ports. Maximum list size is 10 ports. + +- **neq** **port-1** **port-2** ... **port-n** Specify the set of all ports not listed. Maximum list size is 10 ports. + +- **gt** **port** - Specify theset of ports with larger numbers than the listed port. + +- **lt** **port** - Specify the set of ports with smaller numbers than the listed port. + +- **range** **port_1** **port_2** - Specify the set of ports whose numbers are in the range. + +- **hop** - Filter using the packet’s hop-limit value. Options include the following: + + +- **no parameter** - The rule does not use hop limit to filter packets. + +- **hop-limit eq** **hop_value** - Match packets if **hop-limit** value in packet equals **hop_value**. + +- **hop-limit gt** **hop_value** - Match packets if **hop-limit** in packet is greater than **hop_value**. + +- **hop-limit lt** **hop_value** - Match packets if **hop-limit** in packet is less than **hop_value**. + +- **hop-limit neq** **hop_value** - Match packets if **hop-limit** in packet is not equal to **hop_value**. + +- **flag** - Flag bit filters (TCP packets). + +- **msg** - Message type filters (ICMPv6 packets). + +- **tracked** The rule filters packets in existing ICMP, UDP, or TCP connections. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + +- **dscp_filter** The rule filters packet by its DSCP value. Options include the following: + + +- **no parameter** - The rule does not use DSCP to filter packets. + +- **dscp** **dscp_value** - Match packets if DSCP field in packet is equal to **dscp_value**. + + +- **flow_label** - The rule permits packets with IPv6 flow labels matching an exact value or a pattern based on a mask. varnames include: + + +- **no parameter** - The rule does not use IPv6 flow labels to filter packets. + +- **flow-label eq** **ipv6_flow_label** - The IPv6 flow label must exactly match **ipv6_flow_label**. Flow labels can range from 0 to 1048575. + +- **flow-label** **ipv6_flow_label** **flow_label_mask** The IPv6 flow label must match a pattern defined by **ipv6_flow_label** and **flow_label_mask**. The mask is an inverse mask. Where the mask has a 0 bit, the flow label must match the **ipv6_flow_label** value, and where the mask has a 1 bit, the corresponding bit in the flow label is ignored. For example, if **ipv6_flow_label** is 10 (0b01010 in binary) and **flow_label_mask** is 0x14 (0b10100 in binary), the rule matches flow labels described by 0b.1.10 where “.” is a wildcard and can be either 0 or 1. The flow labels that match include 10 (0b01010), 14 (0b0110), 26 (0b11010), and 30 (0b1110). Flow labels can range from 0 to 1048575 and flow label masks can range from 0x00000 to 0xfffff. + +- **log** - Send an informational log message to the console when a packet matches. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Examples** + + +- This command appends a **permit** statement at the end of the ACL. The **permit** statement passes all IPv6 packets with the source address 3710:249a:c643:ef11::/64 and with any destination address. + +``` +`switch(config)# **ipv6 access-list acl1** +switch(config-acl-acl1)# **permit ipv6 3710:249a:c643:ef11::/64 any** +switch(config-acl-acl1)# **exit** +switch(config)#` +``` + +- These commands configure ACL to permit VLAN traffic between any source and destination host. + +``` +`switch(config)# **ip access-list acl2** +switch(config-acl-acl2)# **permit ipv6 vlan 1234 0x0 ip any any** +switch(config-acl-acl2)# **exit** +switch(config)#` +``` + +- These commands add a rule to permit all IPv6 packets with flow label 23. + +``` +`switch(config)# **ipv6 access-list acl3** +switch(config-acl-acl3)# **permit ipv6 any any flow-label eq 23** +switch(config-acl-acl3)# **exit** +switch(config)#` +``` + +- These commands create a rule to permit all IPv6 packets matched by the flow label 23 and the mask 0x5678. + +``` +`switch(config)# **ipv6 access-list acl4** +switch(config-acl-acl4)# **permit ipv6 any any flow-label 23 0x5678** +switch(config-acl-acl4)# **exit** +switch(config)#` +``` + + +### permit (IPv6 Prefix List) + + +The **permit**command adds a rule to the configuration mode IPv6 prefix list. Route map match commands use prefix lists to filter routes for redistribution into OSPF, RIP, or BGP domains. Routes are redistributed into the specified domain when they match the prefix that a **permit** statement specifies. + + +The **no permit** and **default permit** commands remove the specified rule from the prefix list. The **no** seq (IPv6 Prefix Lists) command also removes the specified rule from the prefix list. + + +**Command Mode** + + +IPv6-pfx Configuration + + +**Command Syntax** + + +seq_num permit ipv6_prefix mask + + +**Parameters** + + +- **seq_num** - Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - Number derived from adding 10 to the number of the list's last rule. + +- **seq** **seq_num** - Specify the number from the **seq_num**. Value ranges from **0 to 65535**. + +- **ipv6_prefix** - Specify the IPv6 prefix that filters the routes in CIDR notation. + +- **mask** - Specify the range of the prefix to match. + + +- **no parameter** - Requires an exact match with the subnet mask. + +- **eq** **mask_e** - Specify the prefix length equal to **mask_e**. + +- **ge** **mask_g** - Specify the range from the **mask_g** to **128**. + +- **le** **mask_l** - Specify the range from the **subnet** mask length to **mask_l**. + +- **ge** **mask_l** **le** **mask_g** Range is from **mask_g** to **mask_l**. + +- **mask_e**, **mask_l** and **mask_g** range from **1 to 128**. + +- When **le** and **ge** are specified, the prefix list size **mask_g** **mask_l**. + + +**Example** + + +This command appends a **permit** statement at the end of the text1 prefix list. The **permit** statement allows redistribution of routes with the specified prefix. + +``` +`switch(config)# **ipv6 prefix-list route-five** +switch(config-ipv6-pfx)# **permit 3100::/64** +switch(config-ipv6-pfx)#` +``` + + +### permit (MAC ACL) + + +The **permit** command adds a permit rule to the configuration mode MAC access control list packets through the interface to which the list is applied. Rule filters include protocol, source, and destination. + + +The **no permit** and **default permit** commands remove the specified rule from the configuration mode ACL. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +MAC-ACL Configuration + + +**Command Syntax** + + +seq_num permit source_addr dest_addr protocol log + + +no permit source_addr dest_addr protocol log + + +default permit source_addr dest_addr protocol log + + +**Parameters** + + +- **seq_num** - Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - Specify the number derived by adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** - Specify the number assigned to entry. + +- **source_addr** and **dest_addr**- Specify the source and destination address filters. Options include the following: + + +- **mac_address** **mac_mask** - Specify the MAC address and mask. + +- **any** - Filter packets from all addresses. + +- **mac_address** - Specifies a MAC address in 3x4 dotted hexadecimal notation (hhhh.hhhh.hhhh). + +- **mac_mask** - Specifies a MAC address mask in 3x4 dotted hexadecimal notation (hhhh.hhhh.hhhh). + +- **0** - Require an exact match to filter. + +- **1** - Filter on any value. + +- **protocol** - Specify the protocol field filter. Options include the following: + + +- **aarp** Appletalk Address Resolution Protocol (0x80f3). + +- **appletalk** Appletalk (0x809b). + +- **arp** Address Resolution Protocol (0x806). + +- **ip** Internet Protocol Version 4 (0x800). + +- **ipx** Internet Packet Exchange (0x8137). + +- **lldp** LLDP (0x88cc). + +- **novell** Novell (0x8138). + +- **rarp** Reverse Address Resolution Protocol (0x8035). + +- **protocol_num** Integer corresponding to a MAC protocol. Values range from **0 to 65535**. + +- **log** - Specify to trigger an informational log message to the console about the matching packet. + + +**Examples** + + +- This command appends a **permit** statement at the end of the ACL. The **permit** statement passes all ***aarp*** packets from **10.1000.0000** through **10.1000.FFFF** to any host. + +``` +`switch(config)# **mac access-list text1** +switch(config-mac-acl-text1)# **permit 10.1000.0000 0.0.FFFF any aarp** +switch(config-mac-acl-text1)#` +``` + +- This command inserts a **permit** statement with the sequence number **25**. The **permit** statement passes all packets through the interface. + +``` +`switch(config-mac-acl-text1)# **25 permit any any** +switch(config-mac-acl-text1)#` +``` + + +### permit (Standard IPv4 ACL) + + +The **permit** command adds a permit rule to a standard IPv4 Access Control List (ACL). Standard ACL rules filter on the source field. + + +Interfaces with the applied ACL accept packets filtered by a permit rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding ***10*** to the number of the ACL's last rule.. + + +The **no permit** and **default permit** commands remove the specified rule from the ACL. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +Std-ACL Configuration + + +**Command Syntax** + + +[seq_num] permit [ source_addr network_addr [any | host] [log] + + +no permit [ source_addr network_addr [any | host] [log] + + +default permit [ source_addr network_addr [any | host][log] + + +**Parameters** + + +- **seq_num** - Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - A number derived from adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** Number assigned to entry. + +- **source_addr** - Specify the source address filter. Options include the following: + + +- **network_addr** - Specify the subnet address in CIDR or as an address-mask. + +- **any** - Filter packets from all addresses. + +- **host** **ip_addr** - Specify the IP address in dotted decimal notation. + +Subnet addresses support discontiguous masks. + +- **log** - Specify to trigger an informational log message to the console about the matching packet. + + +- Valid in ACLs applied to the control plane. + +- Validity in ACLs applied to data plane varies by switch platform. + + +**Example** + + +This command appends a **permit** statement at the end of the ACL. The **permit** statement passes all packets with a source address of **10.10.1.1/24**. + +``` +`switch(config)# **ip access-list standard text1** +switch(config-std-acl-text1)# **permit 10.1.1.1/24** +switch(config-std-acl-text1)#` +``` + + +### permit (Standard IPv6 ACL) + + +The **permit** command adds a permit rule to the standard IPv6 access control list. Standard ACL rules filter on the source field. + + +Interfaces with the applied ACL accept packets filtered by a permit rule. Sequence numbers determine rule placement in the ACL. Sequence numbers for commands without numbers derive from adding 10 to the number of the ACL's last rule. + + +The **no permit** and **default permit** commands remove the specified rule from the configuration mode ACL. The no (ACLs) command also removes the specified rule from the ACL. + + +**Command Mode** + + +Std-IPv6-ACL Configuration + + +**Command Syntax** + + +[seq_num] permit source_addr + + +no permit source_addr + + +default permit source_addr + + +**Parameters** + + +- **seq_num** - Specify the sequence number assigned to the rule. Options include the following: + + +- **no parameter** - The number derived by adding **10** to the number of the ACL's last rule. + +- **1 - 4294967295** - The number assigned to entry. + +- **source_addr** - Specify the source address filter. Options include the following: + + +- **ipv6_prefix** - Specify the IPv6 address with prefix length (CIDR notation). + +- **any** - Filter packets from all addresses. + +- **host** **ipv6_addr** - Specify the IPv6 host address. + + +**Example** + + +This command appends a **permit** statement at the end of the ACL. The **permit** statement drops packets with a source address of **2103::/64**. + +``` +`switch(config)# **ipv6 access-list standard text1** +switch(config-std-acl-ipv6-text1)# **permit 2103::/64** +switch(config-std-acl-ipv6-text1)#` +``` + + +### remark + + +The **remark** command adds a non-executable comment statement into the pending ACL. Remarks entered without a sequence number are appended to the end of the list. Remarks with a sequence number insert into the list as specified by the sequence number. + + +The **default remark** command removes the comment statement from the ACL. + + +The **no remark** command removes the comment statement from the ACL. The command can specify the remark by content or by sequence number. + + +**Command Mode** + + +ACL Configuration + + +IPv6-ACL Configuration + + +Std-ACL Configuration + + +Std-IPv6-ACL Configuration + + +MAC-ACL Configuration + + +**Command Syntax** + + +remark text + + +line_num remark [text] + + +no remark text + + +default remark text + + +**Parameters** + + +- **text** The comment text. + +- **line_num** Sequence number assigned to the remark statement. Value ranges from **1 - 4294967295**. + + +**Example** + + +This command appends a comment to the list. + +``` +`switch(config-acl-test1)# **remark end of list** +switch(config-acl-test1)# **show** +IP Access List test1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 50 remark end of list` +``` + + +### resequence (ACLs) + + +The **resequence** command assigns sequence numbers to rules in the configuration mode ACL. Command parameters specify the number of the first rule and the numeric interval between consecutive rules. + + +Maximum rule sequence number is **4294967295**. + + +**Command Mode** + + +ACL Configuration + + +IPv6-ACL Configuration + + +Std-ACL Configuration + + +Std-IPv6-ACL Configuration + + +MAC-ACL Configuration + + +**Command Syntax** + + +resequence [start_num [inc_num]] + + +**Parameters** + + +- **start_num** Sequence number assigned to the first rule. Default is **10**. + +- **inc_num** Numeric interval between consecutive rules. Default is **10**. + + +**Example** + + +The **resequence** command re-numbers the list, starting the first command at number **100** and incrementing subsequent lines by **20**. + +``` +`switch(config-acl-test1)# **show** +IP Access List test1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 50 remark end of list +switch(config-acl-test1)# **resequence 100 20** +switch(config-acl-test1)# **show** +IP Access List test1 + 100 permit ip 10.10.10.0/24 any + 120 permit ip any host 10.20.10.1 + 140 deny ip host 10.10.10.1 host 10.20.10.1 + 160 permit ip any any + 180 remark end of list` +``` + + +### route-map + + +The **route-map** command places the switch in Route-Map Configuration Mode, a group change mode that modifies a route map statement. The command specifies the name and number of the route map statement that subsequent commands modify and creates a route map statement if it references a nonexistent statement. All changes in a group change mode edit session pend until the end of the session. + + +Route maps define commands for redistributing routes between routing protocols. Use names, filter type (**permit** or **deny**), and sequence number to identify a route map statement. Statements with the same name are components of a single route map, and the sequence number determines the order in which the statements compare to a route. + + +The **exit** command saves pending route map statement changes to ***running-config***, then returns the switch to global configuration mode. Also, save ACL changes by entering a different configuration mode. + + +The **abort** command discards pending changes, returning the switch to global configuration mode. + + +The **no route-map** and **default route-map** commands delete the specified route map statement from ***running-config***. + + +Note: The route map configuration supports only standard ACL. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +route-map map_name [filter_type] [sequence_number] + + +no route-map map_name [filter_type] [sequence_number] + + +default route-map map_name [filter_type][sequence_number] + + +**Parameters** + + +- **map_name** - Assign a label to the route map. Protocols reference this label to access the route map. + +- **filter_type** - Specify the disposition of routes matching commands specified by route map statement. + + +- **permit** - Redistribute routes when they match route map statement. + +- **deny** - Do not redistribute routes when they match route map statement. + +- **no parameter** Assigns **permit** as the **filter_type**. + + +When a route does not match the route map criteria, EOS evaluates the next statement within the route map to determine the redistribution action for the route. + + +- **sequence_number** - Specify the route map position relative to other statements with the same name. + + +- **no parameter** - Assign the sequence number of 10 (default) to the route map. + +- **1-16777215** - Specifies sequence number assigned to route map. + + +**Commands Available in Route-Map Configuration Mode:** + + +- continue (route map) + +- match (route-map) + +- set (route-map) + + +**Examples** + + +- This command creates the route map named **map1** and places the switch in route map configuration mode. This configures the route map as a permit map. + +``` +`switch(config)# **route-map map1 permit 20** +switch(config-route-map-map1)#` +``` + +- This command saves changes to **map1** route map, then returns the switch to Global Configuration Mode. + +``` +`switch(config-route-map-map1)# **exit** +switch(config)#` +``` + +- This command saves changes to **map1** route map, then places the switch in Interface-Ethernet Configuration Mode. + +``` +`switch(config-route-map-map1)# **interface ethernet 3** +switch(config-if-Et3)#` +``` + +- This command discards changes to **map1** route map, then returns the switch to Global Configuration Mode. + +``` +`switch(config-route-map-map1)# **abort** +switch(config)#` +``` + + +### no seq (IPv6 Prefix Lists) + + +The **no seq** command removes the rule with the specified sequence number from the ACL. The **default seq** command also removes the specified rule. + + +The **seq** keyword provides a command option used at the beginning of deny (IPv6 Prefix List) and permit (IPv6 Prefix List) commands that places a new rule between two existing rules. + + +**Command Mode** + + +IPv6-pfx Configuration + + +**Command Syntax** + + +no seq line_num + + +default seq line_num + + +**Parameter** + + +**line_num** - Specify the sequence number of rule to delete. Valid rule numbers range from **0** to **65535**. + + +**Example** + + +These commands remove rule **20** from the **map1** prefix list, then displays the resultant list. + +``` +`switch(config)# **ipv6 prefix-list map1** +switch(config-ipv6-pfx)# **no seq 20** +switch(config-ipv6-pfx)# **exit** +switch(config)# **show ipv6 prefix-list map1** +ipv6 prefix-list map1 +seq 10 permit 3:4e96:8ca1:33cf::/64 +seq 15 deny 3:4400::/64 +seq 30 permit 3:1bca:3ff2:634a::/64 +seq 40 permit 3:1bca:1141:ab34::/64 +switch(config)#` +``` + + +### set (route-map) + + +The **set** command specifies modifications to routes selected for redistribution by the Route-Map Configuration Mode. + + +The **no set** and **default set** commands remove the specified **set** command from the Route-Map Configuration Mode statement by deleting the corresponding **set** command from ***running-config***. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +set condition [as-path prepend [num | auto]] + + +no set condition [as-path prepend [num | auto]] + + +default set condition[as-path prepend [num | auto]] + + +**Parameters** + + +- **condition** - Specifies the route modification parameter and value. Options include the following: + + +- **as-path prepend** - Specifies the BGP AS number prepended to as-path. For details, see the set as-path prepend command. + + +- **1 - 4294967295** - Specifies the BGP AS number to prepend. + +- **auto** - Specifies to use the peer AS number for inbound and local AS for outbound to prepend. + +- **distance** **1 - 255** - Specifies the protocol independent administrative distance. + +- **ip next-hop** **ipv4_address** - Specifies the next-hop IPv4 address. + + +- peer-address - Specifies using BGP peering address as next hop IPv4 address. + +- **ipv6 next-hop** **ipv6_address** - Specifies the next-hop IPv6 address. + + +- peer-address - Specifies using the BGP peering address as next hop IPv6 address. + +- **local-preference** **1 - 4294967295** - Specifies the BGP local preference metric. + +- **metric** **1 - 4294967295** - Specifies the route metric. + +- **metric +** **1 - 4294967295** - Specifies adding specified value to current route metric. + +- **metric -** **1 - 4294967295** - Specifies subtracting specified value to current route metric. + +- **metric-type** **OSPF_TYPE** OSPF metric type. Options include the following: + + +- **type-1** - OSPF type 1 metric. + +- **type-2** - OSPF type 2 metric. + +- **origin** **O_TYPE** BGP origin attribute. Options include the following: + + +- **egp** - Exterior BGP route. + +- **igp** - Interior BGP route. + +- **incomplete** - BGP route of unknown origin. + +- **tag** **1 - 4294967295** - Route tag. + +- **weight** **1 - 65535** - BGP weight parameter. + + +**Related Commands** + + +- route-map enters the Route-Map Configuration Mode. + +- set (route-map) specifies community modifications for the redistributed routes. + +- set community (route-map) specifies extended community modifications for the redistributed routes. + + +**Example** + + +This command creates a route map entry that sets the local preference metric to **100** on redistributed routes. + +``` +`switch(config)# **route-map map1** +switch(config-route-map-map1)# **set local-preference 100** +switch(config-route-map-map1)#` +``` + + +### set as-path match + + +The **set as-path match** command configures the **as_path** attribute for prefixes either received from a BGP neighbor or advertised to a BGP neighbor in the Route-Map Configuration Mode. + + +The **no set as-path match** command removes the AS path specified for the BGP prefix. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +set as-path match [all replacement [none| auto]] as_path + + +set as-path match[all replacement [none| auto]] as_path + + +**Parameters** + + +- **none** - Replaces the **as_path** of the matching routes with a null or an empty **as_path**. + +- **auto** - Applying the specific route map as an inbound policy to a corresponding BGP neighbor statement, then replace the **as_path** of the prefixes received from this neighbor with the neighbor AS number. If applying this route map as an outbound policy to a corresponding neighbor statement, then replace the **as_path** of the prefixes advertised to this neighbor with the locally configured AS number. + +- **as_path** - Replaces the AS-Path of the matching routes with an arbitrary **as_path**. + + +**Examples** + + +- This command replaces the AS-Path with the **none** option. + +``` +`switch# **show ip bgp neighbors 80.80.1.2 advertised-routes** +BGP routing table information for VRF default +Router identifier 202.202.1.1, local AS number 200 +Route status codes: s - suppressed, * - valid, > - active, # - not installed, E +- ECMP head, e - ECMP +S - Stale, c - Contributing to ECMP, b - backup, L - labeled-unicast, q - Queued +for advertisement +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - +Link Local Nexthop + +Network Next Hop Metric LocPref Weight Path +* > 101.101.1.0/24 80.80.1.1 - - - 200 i +* > 102.102.1.0/24 80.80.1.1 - - - 200 i +* > 103.103.1.0/24 80.80.1.1 - - - 200 302 i +* > 202.202.1.0/24 80.80.1.1 - - - 200 i + +switch# **configure terminal** +switch(config)# **route-map foo permit 10** +switch(config-route-map-foo)# **set as-path match all replacement none** +switch(config-route-map-foo)# **exit** +switch(config)# **router bgp 200** +switch(config-router-bgp)# **neighbor 80.80.1.2 route-map foo out** +switch(config-router-bgp)# **end** + +switch# **show ip bgp neighbors 80.80.1.2 advertised-routes** +BGP routing table information for VRF default +Router identifier 202.202.1.1, local AS number 200 +Route status codes: s - suppressed, * - valid, > - active, # - not installed, E +- ECMP head, e - ECMP +S - Stale, c - Contributing to ECMP, b - backup, L - labeled-unicast, q - Queued +for advertisement +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - +Link Local Nexthop + +Network Next Hop Metric LocPref Weight Path +* > 101.101.1.0/24 80.80.1.1 - - - 200 i +* > 102.102.1.0/24 80.80.1.1 - - - 200 i +* > 103.103.1.0/24 80.80.1.1 - - - 200 i +* > 202.202.1.0/24 80.80.1.1 - - - 200 i` +``` + +- Replace the AS-Path of matching prefixes with an empty or a null AS-Path. Remove AS **302** from prefix **103.103.1.0/24** as shown in the above output. + + +- This command replaces the AS-Path with the **auto** option. + +``` +`switch(config)# **route-map foo permit 10** +switch(config-route-map-foo)# **set as-path match all replacement auto** +switch(config-route-map-foo)# **end** + +switch# **show ip bgp neighbors 80.80.1.2 advertised-routes** +BGP routing table information for VRF default +Router identifier 202.202.1.1, local AS number 200 +Route status codes: s - suppressed, * - valid, > - active, # - not installed, E +- ECMP head, e - ECMP +S - Stale, c - Contributing to ECMP, b - backup, L - labeled-unicast, q - Queued +for advertisement +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - +Link Local Nexthop + +Network Next Hop Metric LocPref Weight Path +* > 101.101.1.0/24 80.80.1.1 - - - 200 200 i +* > 102.102.1.0/24 80.80.1.1 - - - 200 200 i +* > 103.103.1.0/24 80.80.1.1 - - - 200 200 i +* > 202.202.1.0/24 80.80.1.1 - - - 200 200 i` +``` + + +Replaces the AS-Path of matching prefixes with the locally configured AS **200**. + +- This command replaces the AS-Path with another AS-Path. + +``` +`switch(config)# **route-map foo permit 10** +switch(config-route-map-foo)# **set as-path match all replacement 500 600** +switch(config-route-map-foo)# **end** + +switch# **show ip bgp neighbors 80.80.1.2 advertised-routes** +BGP routing table information for VRF default +Router identifier 202.202.1.1, local AS number 200 +Route status codes: s - suppressed, * - valid, > - active, # - not installed, E +- ECMP head, e - ECMP +S - Stale, c - Contributing to ECMP, b - backup, L - labeled-unicast, q - Queued +for advertisement +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - +Link Local Nexthop + +Network Next Hop Metric LocPref Weight Path +* > 101.101.1.0/24 80.80.1.1 - - - 200 500 600 i +* > 102.102.1.0/24 80.80.1.1 - - - 200 500 600 i +* > 103.103.1.0/24 80.80.1.1 - - - 200 500 600 i +* > 202.202.1.0/24 80.80.1.1 - - - 200 500 600 i` +``` + + +Replaces the AS-Path of matching prefixes with **500 600** as configured. + +- Replaces the AS-Path with a combination of **auto** and an AS-Path. + +``` +`switch(config)# **route-map foo permit 10** +switch(config-route-map-foo)# **set as-path match all replacement auto 500 600** +switch(config-route-map-foo)# **end** + +switch# **show ip bgp neighbors 80.80.1.2 advertised-routes** +BGP routing table information for VRF default +Router identifier 202.202.1.1, local AS number 200 +Route status codes: s - suppressed, * - valid, > - active, # - not installed, E +- ECMP head, e - ECMP + S - Stale, c - Contributing to ECMP, b - backup, L - labeled-unicast, q - Queued +for advertisement +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - +Link Local Nexthop + + Network Next Hop Metric LocPref Weight Path + * > 101.101.1.0/24 80.80.1.1 - - - 200 200 500 600 i + * > 102.102.1.0/24 80.80.1.1 - - - 200 200 500 600 i + * > 103.103.1.0/24 80.80.1.1 - - - 200 200 500 600 i + * > 202.202.1.0/24 80.80.1.1 - - - 200 200 500 600 i` +``` + + +Replaces the AS-Path of matching prefixes with the locally configured AS **200** and **500 600**. + + +### set as-path prepend + + +The **set as-path prepend** command adds a **set** statement to a route map to prepend one or more Autonomous System (AS) numbers to the **as_path** attribute of a BGP route. + + +The **no set as-path prepend** and **default set as-path prepend** commands remove the specified set statements from the route map and update all corresponding routes. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +set as-path prepend auto | as_number [auto | as_number | last-as count] + + +no set as-path prepend auto | as_number [auto | as_number | last-as count] + + +default set as-path prepend auto | as_number [auto | as_number | last-as count] + + +**Parameters** + + +- **auto** - Prepends the peer AS number for peer inbound route maps and the local AS number for peer outbound route maps. + +- **as_number** - Prepends the specified AS number. Enter in plain notation (values range from **1-4294967295**) or in asdot notation as described in RFC 5396. In asdot notation, enter AS numbers from **1-65535** in plain notation, and enter AS numbers from **65536 to 4294967295** as two values separated by a dot. The first value is high-order and represents a multiple of **65536**, and the second value is low-order and represents a decimal integer. For example, AS number **65552** can be entered as either **65552** or 1.16 (i.e., 1*65536+16). However entered, EOS stores the AS numbers internally in plain decimal notation and appear that way in **show** outputs. + +- **last-as** **count** - Prepends the last AS number in the AS path *count* times. Values range from **1 to 15**. Mutually exclusive with the use of the **auto** cmdname or the entry of one or more specified AS numbers, and not supported in multi-agent mode. + + +**Examples** + + +- These commands create a route-map entry that prepends AS number **64496** and prepends either the peer or local AS number twice. + +``` +`switch(config)# **route-map map1** +switch(config-route-map-map1)# **set as-path prepend 64496 auto auto** +switch(config-route-map-map1)# **exit** + +switch(config)# **show route-map map1** +route-map map1 permit 10 + Description: + Match clauses: + SubRouteMap: + Set clauses: + set as-path prepend 64496 auto auto +switch(config)#` +``` + +- The commands create a route-map entry that prepends AS numbers **64496**, **64498**, and **65552**. + +``` +`switch(config)# **route-map map2** +switch(config-route-map-map2)# **set as-path prepend 64496 64498 1.16** +switch(config-route-map-map2)# **exit** + +switch(config)# **show route-map map2** +route-map map2 permit 10 + Description: + Match clauses: + SubRouteMap: + Set clauses: + set as-path prepend 64496 64498 65552 +switch(config)#` +``` + +- These commands create a route map entry that prepends the last AS number **12** times. + +``` +`switch(config)# **route-map map3** +switch(config-route-map-map3)# **set as-path prepend last-as 12** +switch(config-route-map-map3)# **exit** + +switch(config)# **show route-map map3** +route-map map3 permit 10 + Description: + Match clauses: + SubRouteMap: + Set clauses: + set as-path prepend last-as 12 +switch(config)#` +``` + + +### set community (route-map) + + +The **set community** command specifies community attribute modifications to routes selected for redistribution. The **set community none** command removes community attributes from the route. + + +The **no set community** and **default set community** commands remove the specified community from the Route-Map Configuration Modestatement by deleting the corresponding statement from the ***running config***. + + +**Command Mode** + + +Route-Map Configuration + + +**Command Syntax** + + +set community [gshut | aa:nn | community-list | internet | local-as | no-advertise | no-export | none | number] + + +no set community [gshut | aa:nn | additive | community-list | delete | internet | local-as | no-advertise | no-export | none | number] + + +default set community [gshut | aa:nn | additive | community-list | delete | internet | local-as | no-advertise | no-export | none | number] + + +**Parameters** + + +- **gshut** - Configures a graceful shutdown in BGP. + +- **aa:nn** - Configures the community AS and network number, separated by colon. Value ranges from **0:0 to 65535:65535**. + +- **community-list** - A label for community list. + +- **internet** - Advertises route to the Internet community. + +- **local-as** - Advertises route only to local peers. + +- **no-advertise** - Does not advertise route to any peer. + +- **no-export** - Advertises route only within BGP AS boundary. + +- **none** - Does not provide any community attributes. + +- **number** - Configures the community number. Value ranges from **1** to **4294967040**. + +- **additive** - Adds specified attributes to the current community. + +- **delete** - Removes specified attributes from the current community. + + +**Related Commands** + + +- [ip community-list](/um-eos/eos-border-gateway-protocol-bgp#xx1116784) + +- route-map + +- set (route-map) + +- set community (route-map) + + +**Guideline** + + +EOS does not support disabling the process of graceful shutdown community. + + +**Example** + + +This command advertises routes only to local peers. + +``` +`switch(config-route-map-map1)# **show active** +route-map map1 permit 10 + match community instances <= 50 + set community 0:456 0:2345 +switch(config-route-map-map1)# **set community local-as** +switch(config-route-map-map1)# **ip community-list 345 permit 23** +switch(config)# **route-map map1** +switch(config-route-map-map1)# **show active** +route-map map1 permit 10 + match community instances <= 50 + set community 0:456 0:2345 local-as +switch(config-route-map-map1)#` +``` + + +### set extcommunity (route-map) + + +The **set extcommunity** command specifies extended community attribute modifications to routes selected for redistribution. The **set extcommunity none** command removes extended community attributes from the route. + + +The **no set extcommunity** and **default set extcommunity** commands remove the specified **set extcommunity** command from the Route-Map Configuration Mode statement by deleting the corresponding statement from ***running-config***. + + +**Command Mode** + + +Route-Map Configuration Mode + + +**Command Syntax** + + +set extcommunity cond_x [cond_2][cond_n][mod_type] + + +set extcommunity none + + +no set extcommunitycond_x [cond_2][cond_n][mod_type] + + +default set extcommunity cond_x [cond_2][cond_n][mod_type] + + +default set extcommunity none + + +**Parameters** + + +- **cond_x** - Specifies extended community route map modification. Command may contain multiple attributes. Options include the following: + + +- **rt** **asn:nn** - Specifies the route target attribute (AS:network number). + +- **rt** **ip-address:nn** - Specifies the route target attribute (IP address: network number). + +- **soo** **ASN:nn** - Specifies the site of origin attribute (AS:network number). + +- **soo** **IP-address:nn** - Specifies the site of origin attribute (IP address: network number). + +- **mod_type**- Specifies the route map modification method. Options include the following: + + +- **no parameter** - Specifies the command to replace an existing route map with specified parameters. + +- **additive** - Specifies the command to add specified parameters to existing route map. + +- **delete** - Specifies the command to remove specified parameters from existing route map. + + +**Related Commands** + + +- route-map enters route map configuration mode. + +- set (route-map) specifies attribute modifications for the redistributed routes. + + +**Example** + + +This command creates a route map entry in **map1** that sets the route target extended community attribute. + +``` +`switch(config)# **route-map map1** +switch(config-route-map-map1)# **set extcommunity rt 10.13.2.4:100** +switch(config-route-map-map1)#` +``` + + +### show (ACL configuration modes) + + +The **show** command displays the contents of an Access Control List (ACL). + + +- **show** or **show pending** displays the list as modified in ACL configuration mode. + +- **show active** displays the list as stored in running-config. + +- **show comment** displays the comment stored with the list. + +- **show diff** displays the modified and stored lists, with flags denoting the modified rules. + + +Exiting the ACL configuration mode stores all pending ACL changes to ***running-config***. + + +**Command Mode** + + +ACL Configuration + + +IPv6-ACL Configuration + + +Std-ACL Configuration + + +Std-IPv6-ACL Configuration + + +MAC-ACL Configuration + + +**Command Syntax** + + +show + + +show active + + +show comment + + +show diff + + +show pending + + +**Examples** + + +The examples in this section assume these ACL commands are entered as specified. + + +- These commands are stored in **none**: + + +``` +`10 permit ip 10.10.10.0/24 any +20 permit ip any host 10.21.10.1 +30 deny ip host 10.10.10.1 host 10.20.10.1 +40 permit ip any any +50 remark end of list` +``` + +- The current edit session removed this command. This change is not yet stored to **none**: + + +``` +`20 permit ip any host 10.21.10.1` +``` + +- The current edit session added these commands ACL. They are not yet stored to **none**: + + +``` +`20 permit ip 10.10.0.0/16 any +25 permit tcp 10.10.20.0/24 any +45 deny pim 239.24.124.0/24 10.5.8.4/30` +``` + +- This command displays the ACL, as stored in the configuration. + +``` +`switch(config-acl-test_1)# **show active** +IP Access List test_1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.21.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 50 remark end of list` +``` + +- This command displays the pending ACL, as modified in ACL configuration mode. + +``` +`switch(config-acl-test_1)# **show pending** +IP Access List test_1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip 10.10.0.0/16 any + 25 permit tcp 10.10.20.0/24 any + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 45 deny pim 239.24.124.0/24 10.5.8.4/30 + 50 remark end of list` +``` + +- This command displays the difference between the saved and modified ACLs. + + +- Rules added to the pending list are denoted with a plus sign (+). + +- Rules removed from the saved list are denoted with a minus sign (-) + +``` +`switch(config-acl-test_1)# **show diff** +--- ++++ +@@ -1,7 +1,9 @@ + IP Access List test_1 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.21.10.1 + 20 permit ip 10.10.0.0/16 any + 25 permit tcp 10.10.20.0/24 any + 30 deny ip host 10.10.10.1 host 10.20.10.1 + 40 permit ip any any + 45 deny pim 239.24.124.0/24 10.5.8.4/30` +``` + + +### show hardware tcam profile + + +The **show hardware tcam profile** command displays the hardware specific information for the current operational TCAM profile in the running configuration. + + +This command is applicable to DCS-7280(E/R) and DCS-7500(E/R) series switches only. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show hardware tcam profile [[profileName [[feature featureName] detail]]|[detail] + + +**Parameters** + + +- **profileName** Selects the named profile. + +- **feature**featureNameSelects the specific feature by name. + +- **detail** Displays the content of the TCAM profile. + + +**Guidelines** + + +If the profile cannot be programmed, the Status column will print ‘ERROR‘. Any features that use TCAM functionality will not work properly. Do not expect any features to work if the profile is in the ‘ERROR’ state. If there are warnings or errors, a summary message will display warnings or errors found in programming the profile in addition to the system log messages. + + +**Examples** + + +- The **show hardware tcam profile** lists the TCAM profile status on each line card. In case of successful programming it is as shown below. + +``` +`switch(config)# **show hardware tcam profil**e + Configuration Status +FixedSystem testprofile testprofile` +``` + +- If the profile cannot be programmed, the Status column will print ‘ERROR‘. + +``` +`(config)# **show hardware tcam profile** + Configuration Status +Linecard3 newprofile1 **ERROR** +Linecard4 newprofile1 **ERROR** Linecard5 newprofile1 ERROR +Linecard6 newprofile1 **ERROR** +Linecard7 newprofile1 **WARNING** + +Detailed Programming Status +Linecard3, Linecard4, Linecard5 +[Error] feature flow is not supported on this hardware platform +Linecard7 +[Warning] the key size of feature flow exceeds the configured key size limit` +``` + +- **The show hardware tcam profile detail** command displays further info about the TCAM profile features. +Note: The profile contains all the features that are untouched after copying from the base profile. + + +``` +`switch(config-hw-tcam)# **show hardware tcam profile myprofile detail** +Profile myprofile [ FixedSystem ] + Feature: acl port ip egress + Key size: 320 + Key Fields: dscp, dst-ip, ip-frag, ip-protocol, l4-dst-port, + l4-src-port, src-ip + + Feature: acl port ip ingress + Key size: 320 + Key Fields: dscp, dst-ip, ip-frag, ip-protocol, l4-dst-port, l4-ops, + l4-src-port, src-ip, tcp-control, ttl + + Feature: acl port ipv6 egress + Key size: 320 + Key Fields: dst-ipv6, ip-protocol, ipv6-next-header, + ipv6-traffic-class, l4-dst-port, l4-src-port, src-ipv6, + tcp-control, ttl + + Feature: acl port ipv6 ingress + Key size: 320 + Key Fields: dst-ipv6, ip-protocol, ipv6-next-header, + ipv6-traffic-class, l4-dst-port, l4-ops, l4-src-port, + src-ipv6, tcp-control, ttl + + Feature: acl port ipv6 source-only egress + Key size: 320 + Key Fields: ip-protocol, src-ipv6 + + Feature: acl port mac egress + Key size: 320 + Key Fields: dst-mac, ether-type, src-mac +...` +``` + +- You can use the **show hardware tcam profile** command without the **detail** keyword to see all of the features configured in a profile without seeing how the features are defined. + +``` +`(config-hw-tcam-profile-newfeature)# **show hardware tcam profile default** + +Features enabled in TCAM profile default: [ Linecard3, Linecard4, Linecard6, Linecard +7, Linecard8, Linecard9, Linecard10 ] + +mpls +acl vlan ipv6 +acl subintf ipv6 +acl vlan ipv6 egress +acl port ipv6 +pbr ipv6 +acl vlan ip +acl subintf ip +acl port ip +tunnel vxlan +acl port mac +pbr ip +pbr mpls +qos ipv6 +qos ip +mirror ip +counter lfib +mpls pop ingress` +``` + + +### show access-lists + + +The **show access-lists** command displays the contents of all IPv4, IPv6, and MAC Access Control Lists (ACLs) on the switch in addition to the configuration and status. Use the **summary** option to display only the configuration and status, which contains details such as the name of the ACL, total rules configured, configured and active status containing interface information, and supplicant information as in the case of dynamic ACLs from dot1x sessions. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show access-lists[interface interface_acl] | [acl_name acl_name] acl_name | [scope summary] + + +**Parameters** + + +- **interface** - Filter by interfaces such as Ethernet, VLANs, and Port Channels. Selection options include the following: + + +- **no parameter** - Display all ACLs. + +- **interface_acl** - Display ACLs attached to the interface if present. + +- **acl_name** - Display the list name. Selection options include the following: + + +- **no parameter** - Display all ACLs. + +- **acl_name** - Display a specific ACL. + +- **scope** - Display detailed or summarized information. Selection options include the following: + + +- **no parameter** - Display all rules in the specified lists including the configuration and status. + +- **summary** - Display only the configuration and status of the ACL. + + +**Examples** + + +- This command displays all rules in all the ACLs including IPv4, IPv6, and MAC and the configuration and status. + +``` +`switch# **show access-lists** +Phone ACL bypass: disabled +IP Access List default-control-plane-acl [readonly] + counters per-entry + 10 permit icmp any any + 20 permit ip any any tracked [match 149061 bytes in 1721 packets, 0:00:00 ago] + 30 permit udp any any eq bfd ttl eq 255 + 40 permit udp any any eq bfd-echo ttl eq 254 + 50 permit udp any any eq multihop-bfd micro-bfd sbfd + 60 permit udp any eq sbfd any eq sbfd-initiator + 70 permit ospf any any + 80 permit tcp any any eq ssh telnet www snmp bgp https msdp ldp netconf-ssh gnmi [match 180 bytes in 3 packets, 0:03:08 ago] + 90 permit udp any any eq bootps bootpc snmp rip ntp ldp ptp-event ptp-general [match 984 bytes in 3 packets, 1 day, 9:02:21 ago] + 100 permit tcp any any eq mlag ttl eq 255 + 110 permit udp any any eq mlag ttl eq 255 + 120 permit vrrp any any + 130 permit ahp any any + 140 permit pim any any + 150 permit igmp any any + 160 permit tcp any any range 5900 5910 + 170 permit tcp any any range 50000 50100 + 180 permit udp any any range 51000 51100 + 190 permit tcp any any eq 3333 + 200 permit tcp any any eq nat ttl eq 255 + 210 permit tcp any eq bgp any + 220 permit rsvp any any + 230 permit tcp any any eq 9340 + 240 permit tcp any any eq 9559 + 250 permit udp any any eq 8503 + 260 permit udp any any eq lsp-ping + 270 permit udp any eq lsp-ping any + + Total rules configured: 27 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IP Access List v4Acl + 10 permit ip any any + + Total rules configured: 1 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +Standard IP Access List stAcl + 10 permit any + + Total rules configured: 1 + +IP Access List noRulesAcl + + Total rules configured: 0 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1 + +IPV6 Access List default-control-plane-acl [readonly] + counters per-entry + 10 permit icmpv6 any any [match 335448 bytes in 4424 packets, 0:01:13 ago] + 20 permit ipv6 any any tracked + 30 permit udp any any eq bfd hop-limit eq 255 + 40 permit udp any any eq bfd-echo hop-limit eq 254 + 50 permit udp any any eq multihop-bfd micro-bfd sbfd + 60 permit udp any eq sbfd any eq sbfd-initiator + 70 permit ospf any any + 80 permit 51 any any + 90 permit 50 any any + 100 permit tcp any any eq ssh telnet www snmp bgp https netconf-ssh gnmi + 110 permit udp any any eq bootps bootpc snmp ntp ptp-event ptp-general + 120 permit tcp any any eq mlag hop-limit eq 255 + 130 permit udp any any eq mlag hop-limit eq 255 + 140 permit tcp any any range 5900 5910 + 150 permit tcp any any range 50000 50100 + 160 permit udp any any range 51000 51100 + 170 permit udp any any eq dhcpv6-client dhcpv6-server + 180 permit tcp any eq bgp any + 190 permit tcp any any eq nat hop-limit eq 255 + 200 permit udp any any eq nat hop-limit eq 255 + 210 permit rsvp any any + 220 permit pim any any + 230 permit tcp any any eq 9340 + 240 permit tcp any any eq 9559 + 250 permit udp any any eq 8503 + 260 permit udp any any eq lsp-ping + 270 permit udp any eq lsp-ping any + + Total rules configured: 27 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IPV6 Access List v6Acl + 10 permit ipv6 3891:3c58:6300::/64 any + 20 permit ipv6 any host 2fe1:b468:24a:: + 30 deny ipv6 host 3411:91c1:: host 4210:cc23:d2de:: + + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +MAC Access List mlist + 10 deny any any + + Total rules configured: 1 + Configured on Ingress: Et11/1 + Configured on Egress: Et11/1 + Active on Ingress: Et11/1 + Active on Egress: Et11/1` +``` + +- This command displays only the configuration and status of each ACL on the switch. + +``` +`switch# **show access-lists summary** +Phone ACL bypass: disabled +IPV4 ACL default-control-plane-acl [readonly] + Total rules configured: 27 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IPV4 ACL v4Acl + Total rules configured: 1 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +Standard IPV4 ACL stAcl + Total rules configured: 1 + +IPV4 ACL noRulesAcl + Total rules configured: 0 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1 + +IPV6 ACL default-control-plane-acl [readonly] + Total rules configured: 27 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IPV6 ACL v6Acl + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +MAC ACL mlist + Total rules configured: 1 + Configured on Ingress: Et11/1 + Configured on Egress: Et11/1 + Active on Ingress: Et11/1 + Active on Egress: Et11/1` +``` + +- This command displays all rules in list2 ACL and the configuration and status. + + +``` +`switch# **show access-list list2** +IP Access List list2 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +IPV6 Access List list2 + 10 permit ipv6 3891:3c58:6300::/64 any + 20 permit ipv6 any host 2fe1:b468:24a:: + 30 deny ipv6 host 3411:91c1:: host 4210:cc23:d2de:: + + Total rules configured: 3 + Configured on Ingress: Et2/1 + Active on Ingress: Et2/1 +switch#` +``` + + +The above output displayed two ACLs as the switch had an IPv4 ACL and an IPv6 ACL with the same name. + +- This command displays all rules in list2 ACL on Ethernet 1/1 with the configuration and status. + +``` +`switch# **show access-list list2 interface Ethernet 1/1** +IP Access List list2 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 + + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 +switch#` +``` + + +### show ip access-lists + + +The **show ip access-lists** command displays the contents of IPv4 and standard IPv4 Access Control List (ACLs) on the switch with the configuration and status. Use the **summary** option to display only the configuration and status with details such the name of the ACL, total rules configured, configured and active status containing interface information, and supplicant information as in the case of dynamic ACLs from dot1x sessions. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show ip access-lists [interface interface_ipv4]|[acl_name acl_name] | [scope summary] + + +**Parameters** + + +- **interface** Filter on interfaces such as Ethernet, VLANs, and Port Channels. Selection options include the following: + + +- **no parameter** - Displays all IPv4 ACLs. + +- **interface_ipv4** - Display the ACLs on a specified interface. + + +- acl_name - Specify the name of a list to display. Selection options include the following: + + +- **no parameter** - Displays all IPv4 ACLs. + +- **acl_name** - Specify an IPv4 ACL to display. + + +- **scope** - Displays detailed or summarized information. Selection options include the following: + + +- **no parameter** - Display all rules in the specified lists with the configuration and status. + +- **summary** - Display only the configuration and status. + + +**Examples** + + +- This command displays all rules in list2 IPv4 ACL, configuration, and status. + +``` +`switch# **show ip access-lists list2** +IP Access List list2 + 10 permit ip 10.10.10.0/24 any + 20 permit ip any host 10.20.10.1 + 30 deny ip host 10.10.10.1 host 10.20.10.1 +Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 +switch#` +``` + + +The above output can also be displayed with the help of the **show ip access-lists interface Ethernet 1/1** command since the ACL list2 applies to the Ethernet 1/1 interface. + +- This command displays the name and number of rules in each list on the switch. + +``` +`switch# **show ip access-lists summary** +IPV4 ACL default-control-plane-acl + Total rules configured: 12 + Configured on: control-plane + Active on : control-plane + +IPV4 ACL list2 + Total rules configured: 3 +IPV4 ACL test1 + Total rules configured: 6 +Standard IPV4 ACL test_1 + Total rules configured: 1 +IPV4 ACL test_3 + Total rules configured: 0 +switch#` +``` + +- This command displays the summary and lists all the configured IPv4 ACLs. + +``` +`switch # **show ip access-lists summary** +IPV4 ACL default-control-plane-acl [readonly] + Total rules configured: 17 + Configured on Ingress: control-plane(default VRF) + Active on Ingress: control-plane(default VRF) + +IPV4 ACL ipAclLimitTest + Total rules configured: 0 + Configured on Egress: Vl2148,2700 + Active on Egress: Vl2148,2700` +``` + + +### show ip prefix-list + + +The **show ip prefix-list** command displays all rules for the specified IPv4 prefix list. The command displays all IPv4 prefix list rules if a prefix list name is not specified. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show ip prefix-list [display_items list_name]** + + +**Parameters** + + +**display_items** - Specifies the name of prefix lists to display rules. Options include: + + +- **no parameter** - Display all IPv4 prefix list rules. + +- **list_name** Specifies the IPv4 prefix list to display rules. + + +**Example** + + +This command displays all rules in the route-one IPv4 prefix list. + +``` +`switch(config-ip-pfx)# **show ip prefix-list** +ip prefix-list route-one + seq 10 deny 10.1.1.0/24 + seq 20 deny 10.1.0.0/16 + seq 30 permit 12.15.4.9/32 + seq 40 deny 1.1.1.0/24 +switch(config-ip-pfx)#` +``` + + +### show ipv6 access-lists + + +The **show ipv6 access-lists** command displays the contents of all IPv6 Access Control Lists (ACLs) on the switch with the configuration and status. Use the **summary** option to display only the configuration and status with contains details such as the name of the ACL, total rules configured, configured and active on status with interface information, and supplicant information in case of dynamic ACLs from dot1x sessions. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show ipv6 access-lists [ interface interface_ipv6] [supplicant supplicant][acl_name acl_name][scope summary] + + +**Parameters** + + +- **interface** Filter on interfaces such as Ethernet, VLANs, and Port Channels. Selection options include the following: + + +- **no parameter** - Displays all IPv6 ACLs. + +- **interface_ipv6** - Display the ACLs on a specified interface. + + +- acl_name - Specify the name of a list to display. Selection options include the following: + + +- **no parameter** - Displays all IPv6 ACLs. + +- **acl_name** - Specify an IPv6 ACL to display. + + +- scope - Displays detailed or summarized information. Selection options include the following: + + +- **no parameter** - Display all rules in the specified lists with the configuration and status. + +- **summary** Display only the configuration and status. + + +**Examples** + + +- This command displays all rules in test1 IPv6 ACL. + +``` +`switch# **show ipv6 access-lists list2** +IP Access List list2 + 10 permit ipv6 3891:3c58:6300::/64 any + 20 permit ipv6 any host 2fe1:b468:024a:: + 30 deny ipv6 host 3411:91c1:: host 4210:cc23:d2de::: +Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 +switch#` +``` + + +The above output can also be displayed using the **show ipv6 access-lists interface Ethernet 1/1** command since the ACL list2 applies to the Ethernet 1/1 interface. + +- This command displays the name and number of rules in each list on the switch. + +``` +`switch# **show ipv6 access-lists summary** +IPV6 ACL list2 + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +IPV6 ACL test1 + Total rules configured: 6 + +IPV6 ACL test_1 + Total rules configured: 1 + +Standard IPV6 ACL test_3 + Total rules configured: 0 +switch#` +``` + + +### show ipv6 prefix-list + + +The **show ipv6 prefix-list** command displays all rules for the specified IPv6 prefix list. The command displays all IPv6 prefix lists if a prefix list name is not specified. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ipv6 prefix-list [display_items list_name] + + +**Parameters** + + +**display_items** Specifies the name of prefix lists for which rules are displayed. Options include: + + +- **no parameter** All IPv6 prefix lists are displayed. + +- **list_name** Specifies the IPv6 prefix list for which rules are displayed. + + +**Examples** + + +- This command displays all rules in the map1 IPv6 prefix list: + +``` +`switch> **show ipv6 prefix-list map1** +ipv6 prefix-list map1 +seq 10 permit 3:4e96:8ca1:33cf::/64 +seq 15 deny 3:4400::/64 +seq 20 permit 3:11b1:8fe4:1aac::/64 +seq 30 permit 3:1bca:3ff2:634a::/64 +seq 40 permit 3:1bca:1141:ab34::/64` +``` + +- This command displays all prefix lists: + +``` +`switch> **show ipv6 prefix-list** +ipv6 prefix-list map1 +seq 10 permit 3:4e96:8ca1:33cf::/64 +seq 15 deny 3:4400::/64 +seq 20 permit 3:11b1:8fe4:1aac::/64 +seq 30 permit 3:1bca:3ff2:634a::/64 +seq 40 permit 3:1bca:1141:ab34::/64 +ipv6 prefix-list FREDD +ipv6 prefix-list route-five +ipv6 prefix-list map2 +seq 10 deny 10:1:1:1::/64 ge 72 le 80 +seq 20 deny 10:1::/32` +``` + + +### show mac access-lists + + +The show mac access-lists command displays the contents of all MAC Access Control Lists (ACLs) on the switch, along with their configuration and status. Use the summary option to display only the configuration and status, which contain details such as the name of the ACL, the total rules configured, and where the ACL is configured/active with a status containing specific interface information. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show mac access-lists [interface interface_acl] [acl_name acl_name] [scope summary] + + +**Parameters** + + +- **interface** - Filter by interfaces such as Ethernet, VLANs, and Port Channels. Selection options include the following: + + +- **no parameter** - Display all MAC ACLs. + +- **interface_acl** - Display MAC ACLs attached to the interface if present. + +- **acl_name** Display the list name. Selection options include the following: + + +- **no parameter** - Display all MAC ACLs. + +- **acl_name** - Display a specific MAC ACL. + +- **scope** - Display detailed or summarized information. Selection options include the following: + + +- **no parameter** - Display all rules in the specified lists including the configuration and status. + +- **summary** - Display only the configuration and status of the MAC ACL. + + +**Examples** + + +- This command displays all rules in **mtest2** MAC ACL. + +``` +`switch# **show mac access-list mlist2** +MAC Access List mlist2 + 10 permit 1024.4510.F125 0.0.0 any aarp + 20 permit any 4100.4500.0000 0.FF.FFFF novell + 30 deny any any + + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1` +``` + + +The above output can also be displayed with the help of **show mac access-lists interface Ethernet 1/1** command since the ACL mlist2 applies to the Ethernet 1/1 interface. + +- This command displays the number of rules in each MAC ACL on the switch. + +``` +`switch# **show mac access-list summary** +MAC ACL mlist1 + Total rules configured: 6 + +MAC ACL mlist2 + Total rules configured: 3 + Configured on Ingress: Et1/1 + Active on Ingress: Et1/1 + +MAC ACL mlist3 + Total rules configured: 1 + +MAC ACL mlist4 + Total rules configured: 0` +``` + + +### show platform arad acl tcam summary + + +The **show platform arad tcam summary** command displays the percentage of TCAM utilization per forwarding ASIC. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform arad acl tcam summary + + +**Parameter** + + +**summary** - Displays the ACL TCAM summary. + + +**Example** + + +This command displays the percentage of TCAM utilization per forwarding ASIC. + +``` +`switch# **show platform arad acl tcam summary** +The total number of TCAM lines per bank is 1024. + +======================================================== +Arad3/0: +======================================================== + Bank Used Used % Used By + 1 4 0 IP RACLs +Total Number of TCAM lines used is: 4 + +======================================================== +Arad3/4: +======================================================== + Bank Used Used % Used By + 1 2 0 IP RACLs +Total Number of TCAM lines used is: 2` +``` + + +### show platform arad acl tcam + + +The **show platform arad acl tcam** command displays the number of TCAM entries (hardware resources) occupied by the ACL on each forwarding ASIC. + + +This command applies only to DCS-7500E, DCS-7280E series switches. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform arad acl tcam [scope [detail | diff | hw | shadow | summary] + + +**Parameters** + + +**scope** Specifies the information displayed. Options include: + + +- **detail** - Displays the ACL TCAM details. + +- **diff** - Displays the difference between hardware and shadow. + +- **hw** - Displays the ACL entries from hardware. + +- **shadow** - Displays the ACL entries from shadow. + +- **summary** - Displays the ACL TCAM summary. + + +**Examples** + + +- This command displays the number of TCAM entries used by Arad0 ASIC. In this example, apply the ACL on two VLANs (**Vl2148** and **Vl2700**) but number of TCAM entries occupied is only one. + +``` +`switch# **show platform arad acl tcam detail** +ip access-list ipAclLimitTest (Shared RACL, 0 rules, 1 entries, direction out, +state success, Acl Label 2) +Fap: Arad0, Shared: true, Interfaces: Vl2148, Vl2700 +Bank Offset Entries +0 0 1 +Fap: Arad1, Shared: true, Interfaces: Vl2148 +Bank Offset Entries +0 0 1` +``` + +- This command displays the percentage of TCAM utilization per forwarding ASIC. + +``` +`switch# **show platform arad acl tcam summary** +The total number of TCAM lines per bank is 1024. +======================================================== +Arad0: +======================================================== + Bank Used Used % Used By + 0 1 0 IP Egress PACLs/RACLs +Total Number of TCAM lines used is: 1 +======================================================== +Arad1: +======================================================== + Bank Used Used % Used By + 0 1 0 IP Egress PACLs/RACLs +Total Number of TCAM lines used is: 1` +``` + + +### show platform arad mapping + + +The **show platform arad mapping** command displays the mapping between the interfaces and the forwarding ASICs. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show platform arad chip_name mapping** + + +**Parameter** + + +**chip_name** Specifies the Arad chip name. + + +**Example** + + +This command displays the mapping between the interfaces and the forwarding ASICs on the Arad3/0 chip. + +``` +`switch# **show platform arad arad3/0 mapping** +Arad3/0 Port SysPhyPort Voq ( Fap,FapPort) Xlge Serdes +------------------------------------------------------------------------------- + Ethernet3/1/1 34 288 (0 , 2) n/a (20) +...............................................................................` +``` + + +### show platform fap acl + + +The **show platform fap acl** command displays the ACL information of Sand platform devices. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**show platform fap acl [ipkgv | l4ops | mirroring | opkgv | pmf | tcam | udf | vsicfg**] + + +**Parameters** + + +- **ipkgv** - Displays the ACL Ingress Interface Specification (IPKGV) information. + +- **l4ops** - Displays the ACL Layer 4 Options (L4OPS) information. + +- **mirroring** - Displays the mirroring ACL information. + +- **opkgv** - Displays the ACL Egress Interface Specification (OPKGV) information. + +- **pmf** - Displays the Pmf. + +- **tcam** - Displays the ACL TCAM information. + +- **udf** - Displays the ACL UDF information. + +- **vsicfg** - Displays the ACL Virtual Switch Instance (VSI) CONFIG information. + + +**Guidelines** + + +Supported on DCS-7280SE and DCS-7500E series platforms only. + + +**Example** + + +This command displays the brief information of all installed mirroring ACLs. + +``` +`switch(config)# **show platform fap acl mirroring** + +============== + Aggregate ACLs +============== + + (list2:0->2) type=2; version=0 + - list2 [ prio 0 ] => session 2 + + (list1:10->1,list3:20->3) type=0; version=13 + - list3 [ prio 20 ] => session 3 + - list1 [ prio 10 ] => session 1 + +====================== + Interface-ACL Mapping +====================== + + Ethernet1 => (list1:10->1,list3:20->3) [ ipv4 ] + Ethernet33 => (list2:0->2) [ mac ]` +``` + + +### show platform fap acl tcam + + +The **show platform fap tcam** command displays the number of TCAM entries (hardware resources) occupied by the ACL on each forwarding ASIC of Sand platform devices. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**show platform fap acl tcam [detail | diff | hw | shadow | summary]** + + +**Parameter** + + +- **detail** - Displays the number of TCAM entries (hardware resources) occupied by the ACL on each forwarding ASIC. + +- **diff** - Displays the difference between hardware and shadow. + +- **hw** - Displays ACL entries from hardware. + +- **shadow** - Displays ACL entries from shadow. + +- **summary** - Displays the percentage of TCAM utilization per forwarding ASIC. + + +**Example** + + +This command displays the number of TCAM entries and other ACL TCAM detail. + +``` +`switch# **show platform fap acl tcam detail** +ip access-list ipAcl0000 (RACL, 1 rules, 2 entries, direction in, state success) + Shared: false + Interface: Vlan0002 + ------------------- + Fap: Arad3/0 + Bank Offset Entries + 1 0 2 + Interface: Vlan0003 + ------------------- + Fap: Arad3/0 + Bank Offset Entries + 1 2 2 + Fap: Arad3/4 + Bank Offset Entries + 1 0 2` +``` + + +### show platform fap acl tcam hw + + +The **show platform fap acl tcam hw** command displays the TCAM entries configured for each TCAM bank including policy-maps and corresponding traffic match. + + +This command applies only to DCS-7280(E/R), DCS-7500(E/R) series switches. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform fap fap_name acl tcam hw + + +**Parameters** + + +- **fap_name** - Specifies the switch chip-set name. + + +**Example** + + +This command displays the TCAM entries configured for each TCAM bank including policy maps and corresponding traffic matches. + +``` +`switch# **show platform fap Arad1 acl tcam hw** +================================================================================ +Arad1 Bank 0 Type: dbPdpIp, dbPdpIp6, dbPdpMpls, dbPdpNonIp, dbPdpTunnel +================================================================================ +---------------------------------------------------- +|Offs|X|PR|TT|R|QI|V6MC|DPRT|SPRT|F|DEST |V|ACT |H| +---------------------------------------------------- +|29 |4|59| | |01| | | | | |3|0008f|0| +| |4|59| | |01| | | | | |0|00000|0| +|30 |4|33| | |01| | | | | |3|0008f|0| +| |4|33| | |01| | | | | |0|00000|0| +|31 |4|32| | |01| | | | | |3|0008f|0| +| |4|32| | |01| | | | | |0|00000|0| +|32 |4| | | |01|ff02| | | | |3|00097|0| +| |4| | | |01|ff02| | | | |0|00000|0| +|33 |4|06| | |01| | |00b3| |26ffd|3|0009b|0| +| |4|06| | |01| | |00b3| |26ffd|0|00000|0| +|34 |4|06| | |01| |00b3| | |26ffd|3|0009b|0| +---------------------------------------------- +|Offs|X|R|QI|DAHI|PT|DALO |DEST |V|ACT |H| +---------------------------------------------- +----------------------------------------------------------------------------- +|Offs|X|TT0|QI|FOI|TT1|DEST |TT1P |PT|VX_DP|PN|F|MC|O|V|HDR OFFSETS |ACT |H| +================================================================================ +Arad1 Bank 1 Type: dbIpQos +================================================================================ +---------------------------------------------------------------------- +|Offs|X|TC|CL|DPRT|SPRT|VQ|L4OPS |PP|PR|F|V4_DIP |V4_SIP |V|ACT |H| +---------------------------------------------------------------------- +|0 |0| | | | | | |01| | | | |3|00000|0| +| |0| | | | | | |01| | | | |0|00000|0| +---------------------------------------------------------------------- +<-------OUTPUT OMITTED FROM EXAMPLE-------->` +``` + + +### show platform fap acl tcam summary + + +The **show platform fap acl tcam summary** command displays for each forwarding ASIC, the number of TCAM entries consumed per ACL type, and in which TCAM bank the entries are installed. A mirroring ACL does not consume TCAM resources unless attached to a mirroring source interface, and a mirroring destination is configured. If the mirroring destination is a GRE tunnel, at least one nexthop entry for the tunnel destination must be resolved before a TCAM entry is installed. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform fap acl tcam summary + + +**Example** + + +This command displays the number of TCAM entries consumed per ACL type, the bank installed, and ASIC. Three TCAM entries are consumed across two forwarding ASICs, two for IP ACLs, and one for MAC ACLs. + +``` +`switch# **show platform fap acl tcam summary** +======================================================== + Arad0: +======================================================== + Bank Used Used % Used By + 0, 1 2 0 IP Mirroring + Total Number of TCAM lines used is: 4 +======================================================== + Arad1: +======================================================== + Bank Used Used % Used By + 2 1 0 Mac Mirroring` +``` + + +### show platform trident tcam + + +The **show platform trident tcam** command displays the TCAM entries configured for each TCAM group including policy maps and corresponding hits. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform trident tcam [acl | cpu-bound | detail | directed-broadcast | entry | mirror | pbr | pipe | qos | shared | summary] + + +**Parameters** + + +- **no parameters** - Displays TCAM entries for each TCAM group. + +- **acl** - Displays the trident ACL information. + +- **cpu-bound** - Displays the trident cpu-bound information. + +- **detail** - Lists all TCAM entries. + +- **directed-broadcast** - Allows inbound broadcast IP packets with Source IP address as one of the permitted broadcast host. + +- **entry** - Displays the TCAM entry information. + +- **mirror** - Displays the trident Mirroring ACL information. + +- **pbr** - Displays the trident PBR ACL information. + +- **pipe** - Allows to specify a pipe for filtering. + +- **qos** - Displays the trident QOS information. + +- **shared** - Displays the ACL Sharing information. + +- **summary** - Displays the TCAM allocation information. + + +**Guidelines** + + +Applies only to DCS-7010, DCS-7050/DCS-7050X, DCS7250X, DCS-7300X series switches. + + +**Examples** + + +- This command displays the Trident mirroring ACL information. + +``` +`switch(config)# **show platform trident tcam mirror** +=== Mirroring ACLs on switch Linecard0/0 === + +Session: mir-sess2 + +INGRESS ACL mirAcl2* uses 2 entries + Assigned to ports: Ethernet32/1` +``` + +- This command displays the allowed IP Destination address from the in coming packets. + +``` +`switch# **show platform trident tcam directed-broadcast** +DirectedBroadcast Feature Tuples. +Src Ip Dst Ip Action Hits +--------------- --------------- ------- ------------ +10.1.1.1 192.164.2.15 Permit 0 +20.1.1.1 192.164.2.15 Permit 0 +30.1.1.1 192.164.2.15 Permit 0 +10.1.1.1 192.166.2.15 Permit 0 +20.1.1.1 192.166.2.15 Permit 0 +30.1.1.1 192.166.2.15 Permit 0 +10.1.1.1 192.168.2.255 Permit 0 +20.1.1.1 192.168.2.255 Permit 0 +30.1.1.1 192.168.2.255 Permit 0 +* 192.164.2.15 Deny 0 +* 192.166.2.15 Deny 0 +* 192.168.2.255 Deny 0` +``` + +- This command displays detailed information for the TCAM group. + +``` +`switch# **show platform trident tcam detail** +=== TCAM detail for switch Linecard0/0 === +TCAM group 9 uses 42 entries and can use up to 1238 more. + Mlag control traffic uses 4 entries. + 589826 0 hits - MLAG - SrcPort UDP Entry + 589827 0 hits - MLAG - DstPort UDP Entry + 589828 0 hits - MLAG - SrcPort TCP Entry + 589829 0 hits - MLAG - DstPort TCP Entry + CVX traffic reserves 6 entries (0 used). + L3 Control Priority uses 23 entries. + 589836 0 hits - URM - SelfIp UDP Entry + 589837 0 hits - URM - SelfIp TCP Entry +589848 0 hits - OSPF - unicast + 589849 71196 hits - OSPFv2 - Multicast + 589850 0 hits - OSPFv3 - Multicast + 589851 0 hits - OSPF Auth ESP - Multicast + 589852 0 hits - OSPF Auth ESP - Unicast + 589853 0 hits - IP packets with GRE type and ISIS protocol + 589854 0 hits - RouterL3 Vlan Priority 6,7 Elevator + 589855 0 hits - RouterL3 DSCP 48-63 Elevator + 589856 0 hits - RouterL3 Priority Elevator + 589857 0 hits - NextHopToCpu, Glean + 589858 0 hits - L3MC Cpu OIF + IGMP Snooping Flooding reserves 8 entries (6 used). +589864 0 hits - IGMP Snooping Restricted Flooding L3 from local +mlag peer + 589865 0 hits - IGMP Snooping Restricted Flooding L3 + L4 MicroBfd traffic reserves 1 entries (0 used). +TCAM group 13 uses 99 entries and can use up to 1181 more. + Dot1x MAB traffic uses 1 entries. + 851968 0 hits - Dot1xMab Rule + +<-------OUTPUT OMITTED FROM EXAMPLE--------> + +ck338.22:14:38(config-pmap-qos-policy1)#` +``` + + +### show route-map + + +The **show route-map** command displays the contents of configured route maps. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show route-map [map_name]** + + +**Parameters** + + +- **no parameter** Displays the content of all configured route maps. + +- **map_name** Displays the content of the specified route map. + + +**Examples** + + +- This command displays the **map1** route map. + +``` +`switch(config)# **show route-map map1** +route-map map1 permit 10 + Description: + Match clauses: + SubRouteMap: + Set clauses: + set as-path prepend last-as 12 + set as-path prepend auto auto` +``` + +- This command displays the **map** route map. + +``` +`switch> **show route-map map** +route-map map permit 5 + Match clauses: + match as 456 +Set clauses: +route-map map permit 10 + Match clauses: +match ip next-hop 2.3.4.5 + match as-path path_2 + Set clauses: + set local-preference 100` +``` + + +### system profile + + +The **system profile** command creates a new Ternary Content-Addressable Memory (TCAM) profile in the running configuration. + + +The **default system profile** and **no system profile** commands delete non-default TCAM profiles from the running configuration. + + +**Command Mode** + + +Hardware TCAM + + +**Command Syntax** + + +system profile [profile_name | default | mirroring-acl | pbr-match-nexthop-group | qos | tap-aggregation-default | tap-aggregation-extended | tc-counters] + + +**default system profile** + + +**no system profile** + + +**Parameters** + + +- **profile_name** - Creates a profile with the specified name. + +- **default** - Creates a default profile. + +- **mirroring-acl** - Creates a mirroring-ACL profile. + +- **pbr-match-nexthop-group** - Creates a pbr-match-nexthop-group profile. + +- **qos** - Creates a Quality of Service (QoS) profile. + +- **tap-aggregation-default** - Creates a tap-aggregation-default profile. + +- **tap-aggregation-extended** - Creates a tap-aggregation-extended profile. + +- **tc-counters** - Creates a tc-counters profile. + + +**Guideline** + + +Compatible with the DCS-7280SE and DCS-7500E series switches only. + + +**Examples** + + +- These commands create a mirroring-ACL profile. + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **system profile mirroring-acl** +switch(config-hw-tcam)# **show hardware tcam profile** + Configuration Status +FixedSystem mirroring-acl mirroring-acl +switch(config-hw-tcam)#` +``` + +- These commands delete non-default TCAM profiles. + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)#show hardware tcam profile + Configuration Status +Linecard9 mirroring-acl mirroring-acl +Linecard8 mirroring-acl mirroring-acl +Linecard3 mirroring-acl mirroring-acl +Linecard4 mirroring-acl mirroring-acl +Linecard6 mirroring-acl mirroring-acl +switch(config-hw-tcam)# **default system profile** +switch(config-hw-tcam)# **show hardware tcam profile** + Configuration Status +Linecard9 default default +Linecard8 default default +Linecard3 default default +Linecard4 default default +Linecard6 default default +switch(config-hw-tcam)#` +``` + +- These commands delete TCAM profiles. + +``` +`switch(config-hw-tcam)# **show hardware tcam profile** + Configuration Status +Linecard9 tc-counters tc-counters +Linecard8 tc-counters tc-counters +Linecard3 tc-counters tc-counters +Linecard4 tc-counters tc-counters +Linecard6 tc-counters tc-counters +switch(config-hw-tcam)# **no system profile** +switch(config-hw-tcam)# **show hardware tcam profile** + Configuration Status +Linecard9 default default +Linecard8 default default +Linecard3 default default +Linecard4 default default +Linecard6 default default +switch(config-hw-tcam)#` +``` diff --git a/docs/arista-scraped/ingress-and-egress-per-port-for-ipv4-and-ipv6-counters.md b/docs/arista-scraped/ingress-and-egress-per-port-for-ipv4-and-ipv6-counters.md new file mode 100644 index 00000000..6f6c76ef --- /dev/null +++ b/docs/arista-scraped/ingress-and-egress-per-port-for-ipv4-and-ipv6-counters.md @@ -0,0 +1,181 @@ + + + +# Ingress and Egress Per-Port for IPv4 and IPv6 Counters + + +This feature supports per-interface ingress and egress packet and byte counters for IPv4 +and IPv6. + + +This section describes Ingress and Egress per-port for IPv4 and IPv6 counters, including +configuration instructions and command descriptions. + + +Topics covered by this chapter include: + + +- Configuration + +- Show commands + +- Dedicated ARP Entry for TX IPv4 and IPv6 Counters + +- Considerations + + +## Configuration + + +IPv4 and IPv6 ingress counters (count **bridged and routed** +traffic, supported only on front-panel ports) can be enabled and disabled using the +**hardware counter feature ip in** +command: + + +``` +`**[no] hardware counter feature ip in**` +``` + + +For IPv4 and IPv6 ingress and egress counters that include only +**routed** traffic (supported on Layer3 interfaces such as +routed ports and L3 subinterfaces only), use the following commands: + + +Note: The DCS-7300X, DCS-7250X, DCS-7050X, and DCS-7060X platforms +do not require configuration for IPv4 and IPv6 packet counters for only routed +traffic. They are collected by default. Other platforms (DCS-7280SR, DCS-7280CR, and +DCS-7500-R) need the feature enabled. + + +``` +`**[no] hardware counter feature ip in layer3**` +``` + + +``` +`**[no] hardware counter feature ip out layer3**` +``` + + +### hardware counter feature ip + + +Use the **hardware counter feature ip** command to enable ingress +and egress counters at Layer 3. The **no** and **default** forms of the command +disables the feature. The feature is enabled by default. + + +**Command Mode** + + +Configuration mode + + +**Command Syntax** + + +**hardware counter feature ip in|out layer3** + + +**no hardware counter feature ip in|out layer3** + + +**default hardware counter feature in|out layer3** + + +**Example** + + +This example enables ingress and egress ip counters for Layer 3. +``` +`**switch(config)# hardware counter feature in layer3**` +``` + + +``` +`**switch(config)# hardware counter feature out layer3**` +``` + + +## Show commands + + +Use the [**show interfaces counters ip**](/um-eos/eos-ethernet-ports#xzx_RbdvgrfI6B) command to +display IPv4, IPv6 packets, and octets. + + +**Example** + + +``` +`switch# **show interfaces counters ip** +Interface IPv4InOctets IPv4InPkts IPv6InOctets IPv6InPkts +Et1/1 0 0 0 0 +Et1/2 0 0 0 0 +Et1/3 0 0 0 0 +Et1/4 0 0 0 0 +... +Interface IPv4OutOctets IPv4OutPkts IPv6OutOctets IPv6OutPkts +Et1/1 0 0 0 0 +Et1/2 0 0 0 0 +Et1/3 0 0 0 0 +Et1/4 0 0 0 0 +...` +``` + + +You can also query the output from the **show interfaces counters +ip** command through snmp via the ARISTA-IP-MIB. + + +To clear the IPv4 or IPv6 counters, use the [**clear +counters**](/um-eos/eos-ethernet-ports#topic_dnd_1nm_vnb) command. + + +**Example** +``` +`switch# **clear counters**` +``` + + +## Dedicated ARP Entry for TX IPv4 and IPv6 Counters + + +IPv4/IPv6 egress Layer 3 (**hardware counter feature ip out layer3**) +counting on DCS-7280SR, DCS-7280CR, and DCS-7500-R platforms work based on ARP entry of +the next hop. By default, IPv4's next-hop and IPv6's next-hop resolve to the same MAC +address and interface that shared the ARP entry. + + +To differentiate the counters between IPv4 and IPv6, disable +**arp** entry sharing with the following command: + + +``` +`**ip hardware fib next-hop arp dedicated**` +``` + + + + + Note: This command is required for IPv4 and IPv6 egress counters + to operate on the DCS-7280SR, DCS-7280CR, and DCS-7500-R platforms. + + + + +## Considerations + + + + + + + - Packet sizes greater than 9236 bytes are not counted by per-port IPv4 and IPv6 counters. + + - Only the DCS-7260X3, DCS-7368, DCS-7300, DCS-7050SX3, DCS-7050CX3, DCS-7280SR, + DCS-7280CR and DCS-7500-R platforms support the **hardware counter feature ip in** command. + + - Only the DCS-7280SR, DCS-7280CR and DCS-7500-R platforms support the **hardware counter feature ip [in|out] layer3** command. diff --git a/docs/arista-scraped/inter-vrf-local-route-leaking.md b/docs/arista-scraped/inter-vrf-local-route-leaking.md new file mode 100644 index 00000000..7c5b358d --- /dev/null +++ b/docs/arista-scraped/inter-vrf-local-route-leaking.md @@ -0,0 +1,305 @@ + + + +# Inter-VRF Local Route Leaking + + +Inter-VRF local route leaking allows the leaking of routes from one VRF (the source VRF) to +another VRF (the destination VRF) on the same router. +Inter-VRF routes can exist in any VRF (including the +default VRF) on the system. Routes can be leaked using the +following methods: + +- Inter-VRF Local Route Leaking using BGP +VPN + +- Inter-VRF Local Route Leaking using VRF-leak +Agent + + +## Inter-VRF Local Route Leaking using BGP VPN + + +Inter-VRF local route leaking allows the user to export and import routes from one VRF to another +on the same device. This is implemented by exporting routes from a VRF to the local VPN table +using the route target extended community list and importing the same route target extended +community lists from the local VPN table into the target VRF. VRF route leaking is supported +on VPN-IPv4, VPN-IPv6, and EVPN types. + + +Figure 1. Inter-VRF Local Route Leaking using Local VPN Table + + +### Accessing Shared Resources Across VPNs + + +To access shared resources across VPNs, all the routes from the shared services VRF must be +leaked into each of the VPN VRFs, and customer routes must be leaked into the shared +services VRF for return traffic. Accessing shared resources allows the route target of the +shared services VRF to be exported into all customer VRFs, and allows the shared services +VRF to import route targets from customers A and B. The following figure shows how to +provide customers, corresponding to multiple VPN domains, access to services like DHCP +available in the shared VRF. + + +Route leaking across the VRFs is supported +on VPN-IPv4, VPN-IPv6, and EVPN. + + +Figure 2. Accessing Shared Resources Across VPNs + + +### Configuring Inter-VRF Local Route Leaking + + +Inter-VRF local route leaking is configured using VPN-IPv4, VPN-IPv6, and EVPN. Prefixes can be +exported and imported using any of the configured VPN types. Ensure that the same VPN +type that is exported is used while importing. + + +Leaking unicast IPv4 or IPv6 prefixes is supported and achieved by exporting prefixes locally to +the VPN table and importing locally from the VPN table into the target VRF on the same +device as shown in the figure titled **Inter-VRF Local Route Leaking using Local VPN +Table** using the **route-target** command. + + +Exporting or importing the routes to or from the EVPN table is accomplished with the following +two methods: + +- Using VXLAN for encapsulation + +- Using MPLS for encapsulation + + +#### Using VXLAN for Encapsulation + + +To use VXLAN encapsulation type, make sure that VRF to VNI mapping is present and the interface +status for the VXLAN interface is up. This is the default encapsulation type for +EVPN. + + +**Example** + + +The configuration for VXLAN encapsulation type is as +follows: +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **address-family evpn** +switch(config-router-bgp-af)# **neighbor default encapsulation VXLAN next-hop-self source-interface Loopback0** +switch(config)# **hardware tcam** +switch(config-hw-tcam)# **system profile VXLAN-routing** +switch(config-hw-tcam)# **interface VXLAN1** +switch(config-hw-tcam-if-Vx1)# **VXLAN source-interface Loopback0** +switch(config-hw-tcam-if-Vx1)# **VXLAN udp-port 4789** +switch(config-hw-tcam-if-Vx1)# **VXLAN vrf vrf-blue vni 20001** +switch(config-hw-tcam-if-Vx1)# **VXLAN vrf vrf-red vni 10001**` +``` + + +#### Using MPLS for Encapsulation + + +To use MPLS encapsulation type to export +to the EVPN table, MPLS needs to be enabled globally on the device and +the encapsulation method needs to be changed from default type, that +is VXLAN to MPLS under the EVPN address-family sub-mode. + + +**Example** +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **address-family evpn** +switch(config-router-bgp-af)# **neighbor default encapsulation mpls next-hop-self source-interface Loopback0**` +``` + + +### Route-Distinguisher + + +Route-Distinguisher (RD) uniquely identifies routes from a particular VRF. +Route-Distinguisher is configured for every VRF from which routes are exported from or +imported into. + + +The following commands are used to configure Route-Distinguisher for a VRF. + + +``` +`switch(config-router-bgp)# **vrf vrf-services** +switch(config-router-bgp-vrf-vrf-services)# **rd 1.0.0.1:1** + +switch(config-router-bgp)# **vrf vrf-blue** +switch(config-router-bgp-vrf-vrf-blue)# **rd 2.0.0.1:2**` +``` + + +### Exporting Routes from a VRF + + +Use the **route-target export** command to export routes from a VRF to the +local VPN or EVPN table using the route target +extended community list. + + +**Examples** + +- These commands export routes from +**vrf-red** to the local VPN +table. +``` +`switch(config)# **service routing protocols model multi-agent** +switch(config)# **mpls ip** +switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-red** +switch(config-router-bgp-vrf-vrf-red)# **rd 1:1** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv4 10:10** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv6 10:20**` +``` + +- These commands export routes from +**vrf-red** to the EVPN +table. +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-red** +switch(config-router-bgp-vrf-vrf-red)# **rd 1:1** +switch(config-router-bgp-vrf-vrf-red)# **route-target export evpn 10:1**` +``` + + +### Importing Routes into a VRF + + +Use the **route-target import** command to import the exported routes from +the local VPN or EVPN table to the target VRF +using the route target extended community +list. + + +**Examples** + +- These commands import routes from the VPN +table to +**vrf-blue**. +``` +`switch(config)# **service routing protocols model multi-agent** +switch(config)# **mpls ip** +switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-blue** +switch(config-router-bgp-vrf-vrf-blue)# **rd 2:2** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv4 10:10** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv6 10:20**` +``` + +- These commands import routes from the EVPN +table to +**vrf-blue**. +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-blue** +switch(config-router-bgp-vrf-vrf-blue)# **rd 2:2** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import evpn 10:1**` +``` + + +### Exporting and Importing Routes using Route +Map + + +To manage VRF route leaking, control the export and import prefixes with route-map export or +import commands. The route map is effective only if the VRF or the VPN +paths are already candidates for export or import. The route-target +export or import commandmust be configured first. Setting BGP +attributes using route maps is effective only on the export end. + + +Note: Prefixes that are leaked are not re-exported to the VPN table from the target VRF. + +**Examples** + +- These commands export routes from +**vrf-red** to the local VPN +table. +``` +`switch(config)# **service routing protocols model multi-agent** +switch(config)# **mpls ip** +switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-red** +switch(config-router-bgp-vrf-vrf-red)# **rd 1:1** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv4 10:10** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv6 10:20** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv4 route-map EXPORT_V4_ROUTES_T0_VPN_TABLE** +switch(config-router-bgp-vrf-vrf-red)# **route-target export vpn-ipv6 route-map EXPORT_V6_ROUTES_T0_VPN_TABLE**` +``` + +- These commands export routes to from +**vrf-red** to the EVPN +table. +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-red** +switch(config-router-bgp-vrf-vrf-red)# **rd 1:1** +switch(config-router-bgp-vrf-vrf-red)# **route-target export evpn 10:1** +switch(config-router-bgp-vrf-vrf-red)# **route-target export evpn route-map EXPORT_ROUTES_T0_EVPN_TABLE**` +``` + +- These commands import routes from the VPN table to +**vrf-blue**. +``` +`switch(config)# **service routing protocols model multi-agent** +switch(config)# **mpls ip** +switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-blue** +switch(config-router-bgp-vrf-vrf-blue)# **rd 1:1** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv4 10:10** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv6 10:20** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv4 route-map IMPORT_V4_ROUTES_VPN_TABLE** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import vpn-ipv6 route-map IMPORT_V6_ROUTES_VPN_TABLE**` +``` + +- These commands import routes from the EVPN table to +**vrf-blue**. +``` +`switch(config)# **router bgp 65001** +switch(config-router-bgp)# **vrf vrf-blue** +switch(config-router-bgp-vrf-vrf-blue)# **rd 2:2** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import evpn 10:1** +switch(config-router-bgp-vrf-vrf-blue)# **route-target import evpn route-map IMPORT_ROUTES_FROM_EVPN_TABLE**` +``` + + +## Inter-VRF Local Route Leaking using VRF-leak +Agent + + +Inter-VRF local route leaking allows routes to leak from one VRF to another using a route +map as a VRF-leak agent. VRFs are leaked based on the preferences assigned to each +VRF. + + +### Configuring Route Maps + + +To leak routes from one VRF to another using a route map, use the [router general](/um-eos/eos-evpn-and-vcs-commands#xx1351777) command to enter Router-General +Configuration Mode, then enter the VRF submode for the destination VRF, and use the +[leak routes](/um-eos/eos-evpn-and-vcs-commands#reference_g2h_2z3_hwb) command to specify the source +VRF and the route map to be used. Routes in the source VRF that match the policy in the +route map will then be considered for leaking into the configuration-mode VRF. If two or +more policies specify leaking the same prefix to the same destination VRF, the route +with a higher (post-set-clause) distance and preference is chosen. + + +**Example** + + +These commands configure a route map to leak routes from **VRF1** +to **VRF2** using route map +**RM1**. +``` +`switch(config)# **router general** +switch(config-router-general)# **vrf VRF2** +switch(config-router-general-vrf-VRF2)# **leak routes source-vrf VRF1 subscribe-policy RM1** +switch(config-router-general-vrf-VRF2)#` +``` diff --git a/docs/arista-scraped/ipv4.md b/docs/arista-scraped/ipv4.md new file mode 100644 index 00000000..63622c09 --- /dev/null +++ b/docs/arista-scraped/ipv4.md @@ -0,0 +1,15434 @@ + + + +# IPv4 + + +Arista switches support Internet Protocol version 4 (IPv4) and Internet Protocol version 6 (IPv6) +for routing packets across network boundaries. This section describes +Arista’s implementation of IPv4 and includes these topics: + +- IPv4 Addressing + +- IPv4 Routing + +- IPv4 Multicast Counters + +- Route Management + +- IPv4 Route Scale + +- IP Source Guard + +- DHCP Server + +- DHCP Relay Global Configuration Mode + +- DHCP Relay Across VRF + +- DHCP Relay in VXLAN EVPN + +- DHCP +Snooping with Bridging + +- TCP MSS Clamping + +- IPv4 GRE Tunneling + +- GRE Tunneling Support + +- BfRuntime to Use Non-default VRFs + +- IPv4 Commands + + +## IPv4 Addressing + + +Each IPv4 network device is assigned a 32-bit IP address that identifies its network location. +These sections describe IPv4 address formats, data structures, configuration tasks, and +display options: + +- IPv4 Address Formats + +- IPv4 Address Configuration + +- Address Resolution Protocol (ARP) + +- Displaying ARP Entries + + +### IPv4 Address Formats + + +IPv4 addresses are composed of 32 bits, expressed in dotted decimal notation by four decimal +numbers, each ranging from **0** to +**255**. A subnet is identified by an IP address and an address +space defined by a routing prefix. The switch supports the following subnet formats: + +- **IP address and subnet mask:** The subnet mask is a 32-bit number (dotted decimal +notation) that specifies the subnet address space. The subnet address space is calculated +by performing an AND operation between the IP address and subnet mask. + +- **IP address and wildcard mask:** The wildcard mask is a 32-bit number (dotted +decimal notation) that specifies the subnet address space. Wildcard masks differ from +subnet masks in that the bits are inverted. Some commands use wildcard masks instead of +subnet masks. + +- **CIDR notation:** CIDR notation specifies the scope of the subnet space by using a +decimal number to identify the number of leading ones in the routing prefix. When +referring to wildcard notation, CIDR notation specifies the number of leading zeros in the +routing prefix. + + +**Examples** + +- These subnets (subnet mask and CIDR notation) are calculated +identically: +``` +`10.24.154.13 255.255.255.0 +10.24.154.13/24` +``` + +- The defined space includes all addresses between **10.24.154.0** +and **10.24.154.255**. These subnets (wildcard mask and CIDR +notation) are calculated +identically: +``` +`124.17.3.142 0.0.0.15 +124.17.3.142/28` +``` + + +The defined space includes all addresses between +**124.17.3.128** and +**124.17.3.143**. + + +### IPv4 Address Configuration + + +#### Assigning an IPv4 Address to an +Interface + + +The [ip +address](/um-eos/eos-data-plane-security#xx1144036) command specifies the +IPv4 address of an interface and the mask for the subnet to +which the interface is +connected. + +**Example**These commands configure +an IPv4 address with subnet mask for **VLAN +200**: +``` +`switch(config)# **interface vlan 200** +switch(config-if-Vl200)# **ip address 10.0.0.1/24** +switch(config-if-Vl200)#` +``` + + +#### Assigning an IPv4 Class E Address to an Interface + + +The ipvr +routable 240.0.0.0/4command +assigns a class E addresses to an interface. When +configured, the class E address traffic are routed through +BGP, OSPF, ISIS, RIP, static routes and programmed to the +FIB and kernel. By default, this command is disabled. + + +**Example** + +- These commands configure an IPv4 Class E +(**240/4**) address to an +interface. +``` +`switch(config)# **router general** +switch(config-router-general)# **ipv4 routable 240.0.0.0/4**` +``` + + +#### Assigning a Secondary IPv4 Address to an Interface + + +The [**ip +address secondary**](/um-eos/eos-data-plane-security#xx1144036) command +assigns a secondary IPv4 address to an interface. Each +interface can have multiple secondary IPv4 addresses +assigned to it. + + +**Example** + + +- Use the following commands to enter Ethernet Interface +Configuration Mode and add a secondary IP address, +192.168.168.25/32, to Ethernet interface, +Ethernet7/30/2: + +``` +`switch(config)# **interface Ethernet7/30/2** +switch(config-if-Et7/30/2)# **ip address 192.168.168.25/32 secondary** +switch(config-if-Et7/30/2)#` +``` + + +#### Detecting Duplicate IP Addresses on an Interface + + +The **ip address duplicate detection +disabled** command detects any +duplicate IP address on the interface. When the switch +detects the duplicate IP address, EOS generates a syslog +message. It helps the network operator to identify IP +addresses misconfiguration. By default, this feature is +enabled. + + +Note: This feature supports detecting +duplicate virtual IP, VARP, and VRRP addresses. + + +**Examples** + +- This command disables the feature on the +switch. +``` +`switch(config)# **ip address duplicate detection disabled**` +``` + +- This command enables the +feature. +``` +`switch(config)# **ip address duplicate detection logging**` +``` + +Note: Use +the commands in global configuration mode and not +per VRF. + + +This is an example of a Syslog message, when a duplicate IP address +is detected. + + +``` +`Mar 24 16:41:57 cd290 Arp: %INTF-4-DUPLICATE_ADDRESS_WITH_HOST: IP address 100.1.1.2 +configured on interface Ethernet1/1 is in use by a host with +MAC address 00:00:01:01:00:00 on interface Ethernet1/1 in VRF default` +``` + + +### Address Resolution Protocol +(ARP) + + +Address Resolution Protocol (ARP) maps IP addresses to MAC addresses recognized by +the local network devices. The ARP cache consists of a table that stores the +correlated addresses of the devices that the router facilitates data +transmissions. + + +After receiving a packet, routers use ARP to find the device MAC address assigned to +the packet destination IP address. If the ARP cache contains both addresses, the +router sends the packet to the specified port. If the ARP cache does not contain +the addresses, ARP broadcasts a request packet to all devices in the subnet. The +device at the requested IP address responds and provides its MAC address. ARP +updates the ARP cache with a dynamic entry and forwards the packet to the +responding device. Add static ARP entries to the cache using the CLI. + + +#### Proxy ARP + + + +Proxy ARP enables a network device (proxy) to respond to ARP requests for network addresses on a +different network with its MAC address. Traffic to the destination directs +to the proxy device which then routes the traffic toward the ultimate +destination. + + +#### Configuring ARP + + +The switch uses ARP cache entries to correlate 32-bit IP addresses to 48-bit hardware addresses. +The arp aging +timeout command specifies the duration of +dynamic address entries in the Address Resolution Protocol (ARP) cache for +addresses learned through the Layer 3 interface. The default duration is +**14400** seconds (four hours). + + + +Entries refresh and expire at a random time within the range of +**80%-100%** of the cache expiry time. The +refresh attempts three times at an interval of **2%** +of the configured timeout. + + +Static ARP entries never time out and +must be removed from the table manually. + + +**Example** + + +This command specifies an ARP cache duration of **7200** +seconds (two hours) for dynamic addresses added to the ARP cache learned +through **VLAN +200**. +``` +`switch(config)# **interface vlan 200** +switch(config-if-Vl200)# **arp aging timeout 7200** +switch(config-if-Vl200)# **show active** +interface Vlan200 + arp aging timeout 7200 +switch(config-if-Vl200)#` +``` + + +The **arp** command adds a static entry to an +Address Resolution Protocol (ARP) cache. + + +**Example** + + +This command adds a static entry to the ARP cache in the default +VRF. +``` +`switch(config)# **arp 172.22.30.52 0025.900e.c63c arpa** +switch(config)#` +``` + + +The arp proxy +max-delay command enables delaying proxy ARP +requests on the configuration mode interface. EOS disables Proxy ARP by +default. When enabled, the switch responds to all ARP requests, including +gratuitous ARP requests, with target IP addresses that match a route in the +routing table. When a switch receives a proxy ARP request, EOS performs a +check to send the response immediately or delay the response based on the +configured maximum delay in milliseconds (ms). + + +**Example** + + +Use the following command to set a delay of *500ms* before returning a +response to a proxy ARP +request. +``` +`switch(config)# **arp proxy max-delay 500ms**` +``` + + +#### Gratuitous ARP + + +EOS broadcasts gratuitous ARP packets using a device in response to an internal change rather +than as a response to an ARP request. The gratuitous ARP packet consists of +a request packet (no reply expected) that supplies an unrequested update of +ARP information. In a gratuitous ARP packet, both the source and destination +IP addresses use the IP of the sender, and the destination MAC address uses +the broadcast address (**ff:ff:ff:ff:ff:ff**). + + +Gratuitous ARP packets generate to update ARP tables after an IPv4 address or a MAC address +change occurs. + + +##### Configuring Gratuitous ARP + + +By default, Arista switch interfaces reject gratuitous ARP request packets. The arp gratuitous +accept command configures an L3 +interface to accept the gratuitous ARP request packets sent from a +different device in the network and add the mappings to the ARP +table. Gratuitous ARP can be configured on Ethernet interfaces, +VLANs/SVI, or L3 port channels, but has no effect on L2 +interfaces. + + +**Example** + + +These commands enable gratuitous ARP packet acceptance on +**interface ethernet +2/1**. +``` +`switch (config)# **interface ethernet 2/1** +switch (config-if-Et2/1)# **arp gratuitous accept**` +``` + + +### Displaying ARP Entries + + +The show ip arp command displays ARP cache entries that map an IP address +to a corresponding MAC address. The table displays addresses by their +host names when the command includes the +**resolve** argument. + + +**Examples** + +- This command displays ARP cache entries that map MAC +addresses to IPv4 +addresses. +``` +`switch> **show ip arp** + +Address Age (min) Hardware Addr Interface +172.25.0.2 0  004c.6211.021e Vlan101, Port-Channel2 +172.22.0.1 0  004c.6214.3699 Vlan1000, Port-Channel1 +172.22.0.2 0  004c.6219.a0f3 Vlan1000, Port-Channel1 +172.22.0.3 0  0045.4942.a32c Vlan1000, Ethernet33 +172.22.0.5 0  f012.3118.c09d Vlan1000, Port-Channel1 +172.22.0.6 0  00e1.d11a.a1eb Vlan1000, Ethernet5 +172.22.0.7 0  004f.e320.cd23 Vlan1000, Ethernet6 +172.22.0.8 0  0032.48da.f9d9 Vlan1000, Ethernet37 +172.22.0.9 0  0018.910a.1fc5 Vlan1000, Ethernet29 +172.22.0.11 0  0056.cbe9.8510 Vlan1000, Ethernet26 + +switch>` +``` + +- This command displays ARP cache entries that map MAC +addresses to IPv4 addresses. The output displays +host names assigned to IP addresses in place of +the +address. +``` +`switch> **show ip arp resolve** + +Address Age (min) Hardware Addr Interface +green-vl101.new         0  004c.6211.021e Vlan101, Port-Channel2 +172.22.0.1 0  004c.6214.3699 Vlan1000, Port-Channel1 +orange-vl1000.n         0  004c.6219.a0f3 Vlan1000, Port-Channel1 +172.22.0.3 0  0045.4942.a32c Vlan1000, Ethernet33 +purple.newcompa         0  f012.3118.c09d Vlan1000, Port-Channel1 +pink.newcompany         0  00e1.d11a.a1eb Vlan1000, Ethernet5 +yellow.newcompa         0  004f.e320.cd23 Vlan1000, Ethernet6 +172.22.0.8 0  0032.48da.f9d9 Vlan1000, Ethernet37 +royalblue.newco         0  0018.910a.1fc5 Vlan1000, Ethernet29 +172.22.0.11 0  0056.cbe9.8510 Vlan1000, Ethernet26 + +switch>` +``` + + +#### ARP Inspection + + +The Address Resolution Protocol (ARP) inspection command ip arp +inspection vlan activates a +security feature that protects the network from ARP spoofing. EOS +intercepts ARP requests and responses on untrusted interfaces on +specified VLANs and verifies intercepted packets to ensure valid +IP-MAC address bindings. On trusted interfaces, all incoming ARP +packets process and forward without verification, and all invalid ARP +packets are dropped. + + +##### Enabling and Disabling ARP Inspection + + +By default, EOS disables ARP inspection on all VLANs. + + +**Examples** + +- This command enables ARP inspection on VLANs +**1** through +**150**. +``` +`switch(config)# **ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- This command disables ARP inspection on VLANs +**1** through +**150**. +``` +`switch(config)# **no ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- This command sets the ARP inspection default +to VLANs **1** through +**150**. +``` +`switch(config)# **default ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- This command enable ARP inspection on multiple +VLANs **1** through +**150** and +**200** through +**250**. +``` +`switch(config)# **ip arp inspection vlan 1-150,200-250** +switch(config)#` +``` + + +##### Syslog for Invalid ARP Packets +Dropped + + +After dropping an invalid ARP packet, EOS +displays the following syslog message appears. The log +severity level can be set higher if required. + + +``` +`%SECURITY-4-ARP_PACKET_DROPPED: Dropped ARP packet on interface Ethernet28/1 Vlan +2121 because invalid mac and ip binding. Received: 00:0a:00:bc:00:de/1.1.1.1.` +``` + + +##### Displaying ARP Inspection States + + +The command show ip arp inspection vlan displays the configuration and +operation state of ARP inspection. For a VLAN range +specified by **show ip arp inspection +vlan**displays only VLANs with ARP +inspection enabled. If you do not specify a VLAN, the output +displays all VLANs with ARP inspection enabled. The +operation state turns to **Active** when the hardware +traps ARP packets for inspection. + + +**Example** + + +This command displays the configuration and operation state of ARP +inspection for VLANs **1** through +**150**. +``` +`switch(config)# **show ip arp inspection vlan 1 - 150** + +VLAN 1 +---------- +Configuration +: Enabled +Operation State : Active +VLAN 2 +---------- +Configuration +: Enabled +Operation State : Active +{...} +VLAN 150 +---------- +Configuration +: Enabled +Operation State : Active + +switch(config)#` +``` + + +##### Displaying ARP Inspection Statistics + + +The command show ip arp inspection statistics displays the statistics +of inspected ARP packets. For a VLAN specified by +**show ip arp inspection +vlan**, the output displays only VLANs +with ARP inspection. If you do not specify a VLAN, the +output displays all VLANs with ARP inspection enabled. + + +The command clear arp inspection statistics clears ARP inspection. + + +**Examples** + +- This command displays ARP inspection +statistics for **VLAN +1**. +``` +`switch(config)# **show ip arp inspection statistics vlan 2** + +Vlan : 2 +------------ +ARP Req Forwarded = 20 +ARP Res Forwarded = 20 +ARP Req Dropped = 1 +ARP Res Dropped = 1 + +Last invalid ARP: +Time: 10:20:30 ( 5 minutes ago ) +Reason: Bad IP/Mac match +Received on: Ethernet 3/1 +Packet: +  Source MAC: 00:01:00:01:00:01 +  Dest MAC: 00:02:00:02:00:02 +  ARP Type: Request +  ARP Sender MAC: 00:01:00:01:00:01 +  ARP Sender IP: 1.1.1 + +switch(config)#` +``` + +- This command displays ARP inspection +statistics for **ethernet interface +3/1**. +``` +`switch(config)# **show ip arp inspection statistics ethernet interface 3/1** + +Interface : 3/1 +-------- +ARP Req Forwarded = 10 +ARP Res Forwarded = 10 +ARP Req Dropped = 1 +ARP Res Dropped = 1 + +Last invalid ARP: +Time: 10:20:30 ( 5 minutes ago ) +Reason: Bad IP/Mac match +Received on: VLAN 10 +Packet: +  Source MAC: 00:01:00:01:00:01 +  Dest MAC: 00:02:00:02:00:02 +  ARP Type: Request +  ARP Sender MAC: 00:01:00:01:00:01 +  ARP Sender IP: 1.1.1 + +switch(config)#` +``` + +- This command clears ARP inspection +statistics. +``` +`switch(config)# **clear arp inspection statistics** +switch(config)#` +``` + + +##### Configuring Trust Interface + + +By default, all interfaces are untrusted. The command ip arp inspection trust +configures the trust state of an interface. + + +**Examples** + +- This command configures the trust state of an +interface. +``` +`switch(config)# **ip arp inspection trust** +switch(config)#` +``` + +- This command configures the trust state of an +interface to +untrusted. +``` +`switch(config)# **no ip arp inspection trust** +switch(config)#` +``` + +- This command configures the trust state of an +interface to the +default. +``` +`switch(config)# **default ip arp inspection trust** +switch(config)#` +``` + + +##### Configuring Rate Limit + + +After enabling ARP inspection, EOS traps ARP packets to the CPU. When the incoming ARP rate +exceeds expectations, two actions can be taken. For +notification purposes, the command ip arp inspection logging +enables logging of incoming ARP packets. The command ip arp inspection limit +disables the interfaces and prevents a denial-of-service +attack.. + + +**Examples** + +- This command enables logging of incoming ARP +packets when the rate exceeds the configured value +and sets the rate to +**2048**, the upper limit +for the number of invalid ARP packets allowed per +second. Then, it sets the burst consecutive +interval to monitor interface for a high ARP rate +to **15** seconds. + +``` +`switch(config)# **ip arp inspection logging rate 2048 burst interval 15** +switch(config)#` +``` + +- This command configures the rate limit of +incoming ARP packets to disable the interface when +the incoming ARP rate exceeds the configured +value, and sets the rate to +**512**, the upper limit for +the number of invalid ARP packets allowed per +second. Then sets the burst consecutive interval +to monitor the interface for a high ARP rate to +**11** seconds. + +``` +`switch(config)# **ip arp inspection limit rate 512 burst interval 11** +switch(config)#` +``` + +- This command displays verification of the +interface specific configuration. + +``` +`switch(config)# **interface ethernet 3/1** +switch(config)# **ip arp inspection limit rate 20 burst interval 5** +switch(config)# **interface Ethernet 3/3** +switch(config)# **ip arp inspection trust** +switch(config)# **show ip arp inspection interfaces** + + Interface      Trust State  Rate (pps) Burst Interval + -------------  -----------  ---------- -------------- + Et3/1          Untrusted    20         5 + Et3/3          Trusted      None       N/A + +switch(config)#` +``` + + +##### Disabling Errors Caused by ARP Inspection + + +If the incoming ARP packet rate on an interface exceeds the configured rate limit in burst +interval, EOS disables the interface by default. If +errdisabled, the interface remains in this state until you +intervene with the command **errdisable detect +cause arp-inspection**. For example, +after you perform a **shutdown** or +**no shutdown** of the +interface or it automatically recovers after a certain time +period. The command **errdisable recovery cause +arp-inspection** enables auto +recovery. The command **errdisable recovery +interval** enables sharing the auto +recovery interval among all disabled interfaces. See the +chapter [Data Transfer Introduction](/um-eos/eos-data-transfer#xx1133499) for information on all +**errdisable** commands. + + +**Examples** + +- This command enables errdisable caused by an +ARP inspection +violation. +``` +`switch(config)# **errdisable detect cause arp-inspection** +switch(config)#` +``` + +- This command disables errdisable caused by an +ARP inspection +violation. +``` +`switch(config)# **no errdisable detect cause arp-inspection** +switch(config)#` +``` + +- This command enables auto +recovery. +``` +`switch(config)# **errdisable recovery cause arp-inspection** +switch(config)#` +``` + +- This command disables auto +recovery. +``` +`switch(config)# **no errdisable recovery cause arp-inspection** +switch(config)#` +``` + +- This command enables sharing the auto recovery +interval of **10** seconds +among all errdisable +interfaces. +``` +`switch(config)# **errdisable recovery interval 10** +switch(config)#` +``` + +- This command disables sharing the auto +recovery interval of **10** +seconds among all errdisable +interfaces. +``` +`switch(config)# **no errdisable recovery interval 10** +switch(config)#` +``` + +- This command displays the reason for a port +entering the errdisable +state. +``` +`switch(config)# **show interfaces status errdisabled** + +Port         Name         Status       Reason +------------ ------------ ------------ --------------- +Et3/2                    errdisabled  arp-inspection + +switch(config)#` +``` + + +##### Configuring Static IP MAC Binding + + +The ARP inspection command ip source binding allows you to add static +IP-MAC binding. If enabled, ARP inspection verifies incoming +ARP packets based on the configured IP-MAC bindings. The +static IP-MAC binding entry can only be configured on Layer +2 ports. By default, there is no binding entry on the +system. + + +**Examples** + +- This command configures static IP-MAC binding +for IP address +**127.0.0.1,** MAC address +**0001.0001.0001**, +**vlan 1**, and Ethernet +interface **slot 4** and +**port +1**. +``` +`switch(config)# **ip source binding 127.0.0.1 0001.0001.0001 vlan 1 interface +ethernet 4/1** +switch(config)#` +``` + +- This command configures static IP-MAC binding +for IP address +**127.0.0.1**, MAC address +**0001.0001.0001**, +**vlan 1**, and +**port-channel interface +20**. +``` +`switch(config)# **ip source binding 127.0.0.1 0001.0001.0001 vlan 1 interface +port-channel 20** +switch(config)#` +``` + +- This command displays the configured IP-MAC +binding entries. Note that the Lease column +displays dynamic DHCP snooping binding entries. +For static binding entries, lease time displays as +infinite. +``` +`switch(config)# **show ip source binding 127.0.0.1 0001.0001.0001 static vlan 1 +interface port-channel 20** + +MacAddress      IpAddress   Lease(sec)  Type   VLAN  Interface +--------------- ----------- ----------- ------ ----- -------------- +0001.0001.0001  127.0.0.1  infinite    static 1     Port-Channel20 + +switch(config)#` +``` + + +## IPv4 Routing + + +Internet Protocol version 4 (IPv4) is a communications protocol used for relaying network packets +across a set of connected networks using the Internet Protocol suite. Routing transmits +network layer data packets over connected independent subnets. Each subnet is assigned +an IP address range, and each device on the subnet is assigned an IP address from that +range. The connected subnets have IP address ranges that do not overlap. + + +A router is a network device that connects +multiple subnets. Routers forward inbound packets to the subnet whose +address range includes the packets’ destination address. IPv4 and IPv6 +are internet layer protocols that define packet-switched internetworking, +including source-to-destination datagram transmission across multiple +networks. + + +These sections describe IPv4 routing and route creation options: + +- Enabling IPv4 Routing + +- Static and Default IPv4 Routes + +- Dynamic IPv4 Routes + +- Viewing IPv4 Routes and Network Components + + +### Enabling IPv4 Routing + + +When IPv4 routing is enabled, the switch attempts to deliver inbound packets to destination IPv4 +addresses by forwarding them to interfaces or next-hop addresses specified +by the forwarding table. + + +The ip routing command enables IPv4 routing. + + +**Example** + + +This command enables IP +routing: +``` +`switch(config)# **ip routing** +switch(config)#` +``` + + +### Static and Default IPv4 Routes + + +Static routes are entered through the CLI and are typically used when dynamic protocols cannot +establish routes to a specified destination prefix. Static routes are also useful when dynamic +routing protocols are not available or appropriate. Creating a static route associates a +destination IP address with a local interface. The routing table refers to these routes as +connected routes available for redistribution into routing domains defined by dynamic routing +protocols. + + +The ip route command creates a static route. The destination is a network +segment; the next-hop is either an IP address or a routable interface port. When multiple +routes exist to a destination prefix, the route with the lowest administrative distance takes +precedence. + + +By default, the administrative distance assigned to static routes is **1**. +Assigning a higher administrative distance to a static route +configures it to be overridden by dynamic routing data. For example, a +static route with a distance value of **200** is +overridden by OSPF intra-area routes, which have a default distance of +**110**. + + +A route tag is a 32-bit number that is attached to a route. Route maps use tags to filter routes. +Static routes have a default tag value of **0**. + + +**Example** + + +This command creates a static +route: +``` +`switch(config)#**ip route 172.17.252.0/24 vlan 500** +switch(config)#` +``` + + +#### Creating Default IPv4 Routes + + +The default route denotes the packet forwarding +rule that takes effect when no other route is configured for a specified +IPv4 address. All packets with destinations that are not established +in the routing table are sent to the destination specified by the default +route. + + +The IPv4 destination prefix is **0.0.0.0/0**, and the next-hop is the +default gateway. + + + + +**Example** + + +This command creates a default route and establishes +**192.14.0.4** as the default +gateway +address: +``` +`switch(config)#**ip route 0.0.0.0/0 192.14.0.4** +switch(config)#` +``` + + +#### Resolution RIB Profiles for Static Routes + + +Specify a Resolution RIB Profile as a system-connected per next-hop for a +static route. System-connected describes a static route that only resolves if the next hop +can be reached over a connected route. If you do not specify a system-connected route, the +static route resolves if the next hop can be reached over any type of route in the FIB, +including a connected route or a tunnel RIB. route. + + +**Configuring Resolution RIB Profile for Static Routes** + + +Use the following command to configure a Resolution RIB Profile for static route, 10.0.0.0/24, and 10.1.0.0: + + +``` +`switch(config)#**ip route vrf myVRF 10.0.0.0/24 10.1.0.0 resolution ribs system-connected**` +``` + + +**Displaying Resolution Profiles for Static Routes** + + +Use the **show ip route** command: + + +``` +`switch(config)#**show ip route** +interface Ethernet1 + mtu 1500 + no switchport + ip address 10.1.1.1/24 + ! +interface Ethernet2 + no switchport + ip address 10.10.10.1/24 + +ip route 10.100.100.0/24 10.10.10.2 resolution ribs system-connected + ! +arp 10.1.1.2 00:22:33:44:55:66 arpa +arp 10.10.10.2 00:22:33:44:55:67 arpa + ! +mpls tunnel static st1 10.10.10.2/32 10.1.1.2 Ethernet1 label-stack 9000` +``` + + +### Dynamic IPv4 Routes + + +Dynamic routing protocols establish dynamic routes. These protocols also maintain the routing +table and modify routes to adjust for topology or traffic changes. Routing protocols +assist the switch in communicating with other devices to exchange network information, +maintaining routing tables, and establishing data paths. + + +The switch supports these dynamic IPv4 +routing protocols: + + +- [OSPFv2 Introduction](/um-eos/eos-open-shortest-path-first-version-2#xzx_XvxFOLC7zF) + +- [Border Gateway Protocol (BGP)](/um-eos/eos-border-gateway-protocol-bgp) + +- [Routing Information Protocol (RIP)](/um-eos/eos-routing-information-protocol-rip) + +- [IS-IS](/um-eos/eos-is-is) + + +### Viewing IPv4 Routes and Network +Components + + +#### Displaying the FIB and Routing Table + + +The show ip route command displays routing table entries that are in the +forwarding information base (FIB), including static routes, routes to directly connected +networks, and dynamically learned routes. Multiple equal-cost paths to the same prefix are +displayed contiguously as a block, with the destination prefix displayed only on the first +line. + + +The **show running-config** command displays configured commands not in the +FIB. The show ip route summary command displays the number of +routes, categorized by source, in the routing table. + + +**Examples** + +- This command displays IP routes learned through +BGP. +``` +`switch> **show ip route bgp** + +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, A - Aggregate + + B E 170.44.48.0/23 [20/0] via 170.44.254.78 + B E 170.44.50.0/23 [20/0] via 170.44.254.78 + B E 170.44.52.0/23 [20/0] via 170.44.254.78 + B E 170.44.54.0/23 [20/0] via 170.44.254.78 + B E 170.44.254.112/30 [20/0] via 170.44.254.78 + B E 170.53.0.34/32 [1/0] via 170.44.254.78 + B I 170.53.0.35/32 [1/0] via 170.44.254.2 + via 170.44.254.13 + via 170.44.254.20 + via 170.44.254.67 + via 170.44.254.35 + via 170.44.254.98 + +switch>` +``` + +- This command displays a summary of routing table +contents. +``` +`switch> **show ip route summary** + +Route Source Number Of Routes +------------------------------------- +connected 15 +static 0 +ospf 74 + Intra-area: 32 Inter-area:33 External-1:0 External-2:9 + NSSA External-1:0 NSSA External-2:0 +bgp 7 + External: 6 Internal: 1 +internal 45 +attached 18 +aggregate 0 + +switch>` +``` + + +#### Displaying the IP Route Age + + +The show ip route age command displays the time when the route for the +specified network was present in the routing table. It does not +account for changes in parameters like metrics, next hop etc. + + +**Example:** + + +This command displays the time since the last update to ip route +**172.17.0.0/20**. +``` +`switch> **show ip route 172.17.0.0/20 age** + +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I - ISIS, A - Aggregate + + B E 172.17.0.0/20 via 172.25.0.1, **age 3d01h** + +switch>` +``` + + +#### Displaying Gateways + + +A gateway is a router that provides access to another network. The gateway of last resort, also +known as the default route, is the route that a packet uses when the route to its +destination address is unknown. The IPv4 default route in is +**0.0.0.0/0**. + + +The show ip route gateway command displays IP addresses of all gateways +(next hops) used by active routes. + + +**Example** + + +This command displays next hops used by active +routes. +``` +`switch> **show ip route gateway** + +The following gateways are in use: + 172.25.0.1 Vlan101 + 172.17.253.2 Vlan2000 + 172.17.254.2 Vlan2201 + 172.17.254.11 Vlan2302 + 172.17.254.13 Vlan2302 + 172.17.254.17 Vlan2303 + 172.17.254.20 Vlan2303 + 172.17.254.66 Vlan2418 + 172.17.254.67 Vlan2418 + 172.17.254.68 Vlan2768 + 172.17.254.29 Vlan3020 + +switch>` +``` + + +#### Displaying Host Routes + + +The show ip route host command displays all host routes in the host +forwarding table. Host routes are those whose destination prefix is the entire address (mask += **255.255.255.255** or prefix = **/32**). Each +displayed host route is labeled with its purpose: + + +- **F**      static routes from the FIB. + +- **R**     routes defined because the IP address is an interface address. + +- **B**      broadcast address. + +- **A**      routes to any neighboring host for which the switch has an ARP +entry. + + +**Example** + + +This command displays all host routes in the host forwarding +table. +``` +`switch# **show ip route host** + +R - receive B - broadcast F - FIB, A - attached + +F 127.0.0.1 to cpu +B 172.17.252.0 to cpu +A 172.17.253.2 on Vlan2000 +R 172.17.253.3 to cpu +A 172.17.253.10 on Vlan2000 +R 172.17.254.1 to cpu +A 172.17.254.2 on Vlan2901 +B 172.17.254.3 to cpu +B 172.17.254.8 to cpu +A 172.17.254.11 on Vlan2902 +R 172.17.254.12 to cpu + +F 172.26.0.28 via 172.17.254.20 on Vlan3003 + via 172.17.254.67 on Vlan3008 + via 172.17.254.98 on Vlan3492 +via 172.17.254.86 on Vlan3884 + via 172.17.253.2 on Vlan3000 +F 172.26.0.29 via 172.25.0.1 on Vlan101 +F 172.26.0.30 via 172.17.254.29 on Vlan3910 +F 172.26.0.31 via 172.17.254.33 on Vlan3911 +F 172.26.0.32 via 172.17.254.105 on Vlan3912 + +switch#` +``` + + +## IPv4 Multicast Counters + + +IPv4 multicast counters allow +association of IPv4 multicast routes with a packet or byte counter. + + +This chapter contains the following sections. + +- Multicast Counters Hardware Overview + +- Multicast Counters iBGP and eBGP Configuration + +- Configuring IPv4 Multicast Counters + + +### Multicast Counters Hardware +Overview + + +This section describes a hardware overview for multicast counters, and contains the following +sections. + +- Platform Independent Requirements for Counters + +- Policer Counter Overview + +- BGP Functions Supported for Arista Switches + +- Additional Requirements + + +#### Platform Independent Requirements +for Counters + + +The following platform independent requirements include: + +- Enable/Disable counters. + +- Clear counters. + +- Show counters. + +- Configure counter mode for byte (default) or frame mode. + + +#### Policer Counter Overview + + +The switch hardware has two policer banks, each with 4k entries, and each entry has one +32-bit entry1 and one 32-bit entry2, which can be used as either a packet counter or +byte counter. + + +In the pipeline, each bank can have one policer index coming from upstream blocks, which +means different features cannot update multiple policer entries in the same bank +simultaneously. Therefore, different features cannot share entries in the same bank. + + +Each FFU/BST entry points to a corresponding RAM in switch hardware routing. A policer +index is saved in the action ram, so when installing a multicast route into hardware, +the platform code will get a policer index and save it in the action field. A counter is +not added to the action field if a policer index is unavailable. + + +Switch hardware can have multiple features competing for the policer banks. It is +desirable to have a platform command to reserve policer banks dedicated to a certain +feature. + + +The following command reserves one or two policer banks to be used only by the named +feature: + + +**[no] platform fm6000 [nat|acl|qos|multicast] policer banks +<1|2>** + + +Available bank(s) are reserved for the feature. Otherwise the command will take effect at +the next reboot or FocalPointV2 agent restart. This reservation guarantees the +configured number of bank(s) for this feature. However, the feature can still possibly +obtain the other policer bank if it needs more, and the other bank is available. + + +If a feature has a pending reservation request which is not fulfilled because of +availability, and some other feature frees a bank, the bank will be allocated to the +pending feature. + + +#### BGP Functions Supported for +Arista Switches + + +Arista switches support these BGP functions: + +- A single BGP instance + +- Simultaneous internal (IBGP) and external (EBGP) peering + +- Multiprotocol BGP + +- BGP Confederations + + +#### Additional Requirements + + +On switch hardware, the following additional requirements include: + +- Reservation of policer banks. + +- Notification of policer bank availability when +a policer entry is freed by other features. + + +### Multicast Counters iBGP and +eBGP Configuration + + +This section describes the commands required to configure an iBGP and an eBGP topology, and +contains the following sections. + +- Policer Usage + + +#### Policer Usage + + +There are two types of counters – those created by wildcard creation and by specific creation. +When a specific counter is required, and the hardware runs out +of policer entries, a wildcard counter is forced to give up its +policer entry. + + +Suppose the user configures a specific counter, and the Starter Group (SG) already has a +wildcard-created counter. In that case, this counter is upgraded +to a specific one, with no change in the hardware policer index. +If the user configures both a wildcard counter and a specific +counter for this SG and subsequently deletes the specific +counter, the counter for this SG is downgraded to a wildcard, +with no change in the hardware policer index. However, if +another specific counter is pending for a hardware policer +index, then this policer entry will be assigned to that counter +due to its higher precedence. + + +Even if a counter is configured by the user, in order to conserve the use +of hardware resources, do not allocate a policer entry until a +real route (G, S) is programmed into the Frame Filtering and +Forwarding Unit (FFU). + + +### Configuring IPv4 Multicast +Counters + + +Perform the following CLI steps +to configure IPv4 multicast counters on the FM6000 platform: + + +- Execute the global +configuration command: + + +- **no****|****default** +**ip multicast count** +**bytes****|** +**packets** + + +Enables wildcard counters. Also used to change bytes/packets mode. When +hardware runs of resources, specific creation has priority to preempt +counters from wildcard creation. The **bytes****|** +**packets** optional keyword enables the counter to be +in either bytes mode or packets mode. This mode applies to all counters. All +counter values will be reset to zero when the counter mode changes. + + +- **no****|****default** +**ip multicast count** +** ** + + +This only takes effect when **ip multicast count** is +enabled. Either ** ** or +**bytes****|****packets** +optional keyword is used. They can not be used concurrently. + + +No | default Commands: (default is same +as no) + + +- **`no ip multicast count`** Deletes all multicast counters, including explicit +** ** routes + +- **`no ip multicast count`** +** ** Removes +the config. Do not delete the counter because the +wildcard is still active. + +- If no **** is specified, +all multicast routes will have counters unless the hardware +runs out of resources. The creation of counters is referred +to as “wildcard creation.” + +- If **** is specified, only +**** will get a +counter (and no other route). The creation of counters is +referred to as “specific creation.” By default, all mcast +routes will have counters allocated. This **** configuration is applicable when +the hardware runs out of resources. Specific +**** creation has +priority to preempt counters from wildcard +creation. + + +The **byte****|****frame** +optional keyword enables the counter to be in either byte mode or frame +mode. This mode applies to all counters. When the counter mode changes, all +counter values will be reset to zero. + + +Either ****, or +**byte****|****frame** +optional keywords are used but cannot be used together. All counters are +**byte****|****frame**. +The **byte****|****frame** +mode is global and not applicable on a **** basis. +- Execute clear +command: + + +``` +`**clear ip multicast count **` +``` +- Execute show +command: + + +``` +`**show multicast fib ipv4 count**` +``` + + +This command currently exists but does not +show anything. + + +This show command is intended to display +the following (example): + + +``` +`switch> **show multicast fib ipv4 count** +Activity poll time: 60 seconds +225.1.1.1 100.0.0.2 +Byte: 123 +Vlan100 (iif) +Vlan200 +Activity 0:00:47 ago` +``` + + +Total counts are the sum of counts from all sources in that group. + + +The count value can be **N/A** if a mroute does not have an associated +counter. + + +If the count value for any source in a **G** is **N/A**, then the total counts for **G** will be shown as **N/A**. However, the count values for other sources are still shown. + + +## Route Management + + +When enabling routing, the switch discovers the best route to a packet destination address by +exchanging routing information with other devices. EOS disables IP routing by +default. + + +The following sections describes routing features that EOS supports: + +- Route Redistribution + +- Equal Cost Multipath Routing (ECMP) and Load Sharing + +- Unicast Reverse Path Forwarding (uRPF) + +- Routing Tables / Virtual Routing and Forwarding (VRF) + +- RIB Route Control + + +### Route Redistribution + + +Route redistribution advertises connected (static) routes or routes +established by other routing protocols into a dynamic routing +protocol routing domain. By default, the switch advertises only +routes in a routing domain established by the protocol that +defined the domain. + + +Route redistribution commands specify the scope of the redistribution +action. By default, all routes from a specified protocol, or all +static routes, advertise into the routing domain. Commands can +filter routes by applying a route map and defining the subset of +routes to advertise. + + +### Equal Cost Multipath Routing +(ECMP) and Load Sharing + + +Equal Cost Multi-Path (ECMP) provides a routing strategy to forward traffic over multiple paths +with equal routing metric values. + + +#### Configuring ECMP (IPv4) + + +EOS assigns all ECMP paths with the same tag value, and commands that +change the tag value of a path also change the tag value of all +paths in the ECMP route. + + +In a network topology using ECMP routing, hash polarization may result +when all switches perform identical hash calculations. Hash +polarization leads to uneven load distribution among the data paths. +Switches select different hash seeds to perform hash calculations +and avoid hash polarization. + + +The ip load-sharing command provides the hash seed with an algorithm for +distributing data streams among multiple equal-cost routes to a +specified subnet. + + +**Example** + + +This command sets the IPv4 load sharing hash seed to +**20**: +``` +`switch(config)# **ip load-sharing fm6000 20** +switch(config)#` +``` + + +#### Multicast Traffic Over ECMP + + +The switch attempts to spread outbound unicast and multicast traffic to all ECMP route paths +equally. To disable the sending of multicast traffic over ECMP, use +the [multipath none](/um-eos/eos-multicast-architecture#xx1151679) +command or the no version of the [multipath deterministic](/um-eos/eos-multicast-architecture#xx1216054) command. + + +#### Resilient ECMP + + +Resilient ECMP uses prefixes where routes are not desired for rehashing due to link flap, +typically where ECMP participates in load balancing. Resilient ECMP +configures a fixed number of next-hop entries in the hardware ECMP +table for all the routes within a specified IP address prefix. +Implementing fixed table entries for a specified next-hop address +allows the data flow hash to a valid next-hop number to remain +intact even when some next-hops go down or come back online. + + +Enable resilient ECMP for all routes within a specified prefix using the ip hardware fib ecmp resilience + + +command. The command specifies the maximum number of next-hop addresses +that the hardware ECMP table contains for the specified IP prefix +and configures a redundancy factor that facilitates the duplication +of next-hop addresses in the table. The fixed table space for the +address uses the maximum number of next hops multiplied by the +redundancy factor. When the table contains the maximum number of +next-hop addresses, the redundancy factor specifies the number of +times to list each address. When the table contains fewer than the +maximum number of next-hop addresses, the table space entries fill +with additional duplication of the next-hop addresses. + + +EOS supports resilient ECMP for IPv6 IP addresses. + + +**Example** + + +This command configures a hardware ECMP table space of 24 entries for the +IP address **10.14.2.2/24**. A maximum of six +next-hop addresses can be specified for the IP address. When the +table contains six next-hop addresses, each appears in the table +four times. When the table contains fewer than six next-hop +addresses, each address duplicates until filling all of the 24 table +entries. +``` +`switch(config)# **ip hardware fib ecmp resilience 10.14.2.2/24 capacity 6 redundancy 4** +switch(config)#` +``` + + +#### Unequal Cost Multipath (UCMP) for Static Routes + + +Unequal Cost Multipath (UCMP) for Static Routes provides a mechanism to forward traffic from a device on an ECMP route with the ratio of the +weights used for next-hops and program them into the Forwarding Information Database (FIB). + + +**Configuring UCMP for Static Routes** + + +Use the following commands to configure UCMP on the VRF, ***myVRF***, +with an FEC maximum size of 100: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **vrf myVRF** +switch(config-router-general-vrf-myVRF)# **route static ucmp forwarding fec maximum-size 100**` +``` + + +#### Aggregate Group Monitoring (AGM) for ECMP + + +This feature allows the monitoring of packets and bytes traversing the members of the +configured ECMP groups on the switch with a high time resolution. Once enabled, AGM +collects data for the specified duration, writes it to the specified file on the switch +storage, and then stops. + + +**Supported Platforms** + + +- DCS-7260CX3 + +- DCS-7060X5 + +- DCS-7388 + +- DCS-7060X6 + + +#### Configuring AGM for ECMP Groups + + +Note: You must have at least one ECMP Group configured on the switch. + + +To begin collecting data on the switch at 100 millisecond intervals for +1800 seconds, use the following command: + + +``` +`switch(config)# **start snapshot counters ecmp poll interval 100 milliseconds duration 1800 seconds**` +``` + + +Specify an optional URL to store the data. If not specified, the files +store in the non-persistent storage at +**/var/tmp/ecmpMonitor**. + + +If providing a URL, it must point to a valid file system. EOS allows the +following file systems: + + +- **file** - The path must start with +**/tmp** or +**/tmp**. The files store in +the non-persistent storage. + +- **flash** - Files store in persistent +storage. + + +Use the following command to interrupt the snapshot before the end of the +configured duration: + + +``` +`switch# **stop snapshot counters ecmp**` +``` + + +To delete previous snapshots, use the following command: + + +``` +`switch# **clear snapshot counters ecmp id_range**` +``` + + +If you do not specify a range of IDs, then all previous snapshots delete +from the system. + + +#### Displaying AGM for ECMP Information + + +Use the **show snapshot counters ecmp history** to display information about the configuration. + + +``` +`switch# **show snapshot counters ecmp history** + Request ID: 17 + Output directory URL: file:/var/tmp/ecmpMonitor + Output file name(s): ecmpMonitor-17-adj1284.ctr, ecmpMonitor-17-adj1268.ctr + Complete: True + Poll interval: 1000 microseconds + Total poll count: 59216 + Start time: 2024-06-17 17:58:36 + Stop time: 2024-06-17 17:59:36 + + L2 Adjacency ID Interfaces + --------------------- ---------------------------------------------------- + 1268 Ethernet54/1, Ethernet41/1, Ethernet1/1, Ethernet57/1 + 1284 Ethernet20/1, Ethernet35/1, Ethernet41/1, Ethernet8/1, Ethernet1/1` +``` + + +The output displays the list of previous snapshots including any current ones as well as the following information: + + +- **Request ID** - Identifies the snapshot Request ID to use for the **clear** +command. + +- **Output directory URL** - Identifies the snapshot storage location. + +- **Complete** - Identifies the snapshot completion status. + +- **Poll Interval** - Identifies the configured polling interval for the snapshot. + +- **Total poll count** - Identifies the total number of hardware counters collected.d + +- **Start time** and **Stopped time** - Identifies the system time when the snapshot +started and stopped. + +- **L2 Adjacency ID** and +**Interfaces** - The summary +of the ECMP groups monitored by AGM. + + +#### Configuring IP-over-IP Hierarchical FEC + + +When the next hop of an IP route, the dependent route, resolves over another IP +route, the resolving route, the adjacency information of the FEC for +the resolving route duplicates into the dependent route FEC. +Configuring IP over IP Hierarchical FEC prevents duplication of the +adjacency information, and instead, the dependent route FEC points +to the resolving route FEC to form a hierarchical FEC for the +dependent route. This helps avoid unnecessary allocation of scarce +ECMP FECs in the case where the dependent route does not use ECMP, +but the resolving route does use ECMP. + + +Use the following commands to enable IP-over-IP HFEC: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **rib fib fec hierarchical resolution**` +``` + + +#### Resilient Equal-Cost Multi-Path(RECMP) Deduping + + +Routes covered by a Resilient Equal-Cost Multi-Path (RECMP) prefix consists of routes +that use hardware tables dedicated for Equal-Cost Multi-Path (ECMP) routing. Resilient ECMP +(RECMP) deduping reduces the number of ECMP hardware table entries allocated by the switch to +force the routes with the same set of next hops but point to different hardware table entries +and point to the same hardware table entry when encountering high hardware resource utilization. +Forcing RECMP routes to change the hardware table entry that they point to may potentially cause +a traffic flow disruption for any existing flows going over that route. The deduping process +attempts to minimize the amount of potential traffic loss. +Each route needs to allocate +hardware table entries in the ASIC that contain forwarding information for the route, such as +the next-hops and egress links used by each next-hop uses. The network device uses these +hardware table entries when making forwarding decisions for a packet meant for a certain +route. These ECMP hardware tables have limited size and can fill up quickly if allocating a +large number of these hardware table entries. One option to ease the usage of these hardware +tables can force RECMP routes to share hardware table entries. + + +RECMP routes can point to +the same hardware table entry if they share the same set of next hops and the order of the +next-hops. However, RECMP routes may end up sharing the same set of next-hops, but the +next-hop ordering may be different between them, and the routes end up occupying different +hardware table entries in the ASIC. RECMP routing has a property where the current ordering of +next-hops for a given route can be influenced by the previous order. The ordering between the +routes can differ because these routes had a different set of next hops at some previous time +before they finally converged onto the same set of next-hops. + + +When the ECMP hardware +resource usage crosses the high threshold, the deduping process begins, and it lasts until the +ECMP hardware resource usage falls below the low threshold. Use the **ip hardware +fib next-hop resource optimization thresholds** command to modify the +thresholds. + + +##### Configuring Resilient ECMP Deduping + + +EOS disabled Resilient ECMP Deduping by default. + +- Use the following command to disable all the hardware resource optimization + features: +``` +`switch(config)# **ip hardware fib next-hop resource optimization disabled**` +``` + +- Use the following command to re-enable the all hardware resource optimization + features after disabling + them: +``` +`switch(config)# **no ip hardware fib next-hop resource optimization disabled**` +``` + +- Use the following command to configure the thresholds for starting and stopping the optimization: +``` +`switch(config)# **ip hardware fib next-hop resource optimization thresholds low <20> high <80>**` +``` + + + + + Note: + + + - The value specified for the threshold represents the percentage of resource +utilization, and uses an integer between **0** and +**100**. + + - Setting the high threshold to **80** indicates that +optimization starts when the resource utilization is above +**80%**. The default value of this threshold is +**90**. + + - Setting the low threshold to **20** indicates that +optimization stops when the resource utilization is below +**20%**. The default value of this threshold is +**85**. + + + + + + + +##### Show Commands + + +- The **show ip hardware fib summary** command displays the statistics +of this RECMP +deduping: +**Example** + + +``` +`switch# **show ip hardware fib summary** +Fib summary +----------- +Adjacency sharing: disabled +BFD peer event: enabled +Deletion Delay: 0 +Protect default route: disabled +PBR: supported +URPF: supported +ICMP unreachable: enabled +Max Ale ECMP: 600 +UCMP weight deviation: 0.0 +Maximum number of routes: 0 +Fib compression: disabled +**Resource optimization for adjacency programming: enabled +Adjacency resource optimization thresholds: low 20, high 80**` +``` + +The last two +lines of the output shows if RECMP deduping is enabled, and the corresponding threshold +values for starting and stopping the optimization process. + +- The **show hardware capacity** command displays the utilization of +the hardware resources. The example below shows the multi-level hierarchy ECMP +resources: +``` +`switch# **show hardware capacity** +Forwarding Resources Usage + +Table Feature Chip Used Used Free Committed Best Case High + Entries (%) Entries Entries Max Watermark + Entries +------- --------------- ------- -------- ------- -------- ------------ ------------ --------- +ECMP 0 0% 4095 0 4095 0 +ECMP Mpls 0 0% 4095 0 4095 0 +ECMP Routing 0 0% 4095 0 4095 0 +ECMP VXLANOverlay 0 0% 4095 0 4095 0 +ECMP VXLANTunnel 0 0% 3891 0 3891 0` +``` + + +##### Limitations + + +- With RECMP deduping, optimization of a sub-optimal ECMP route requires releasing and +reallocating hardware resources for the route. Therefore the process may increase overall +convergence time for route programming. It may not be desirable to always start the +optimization when the sufficent hardware resource existt. The threshold value for starting +the optimization should be adjusted based on the route scale of the network. + +- The deduping of ECMP hardware resources may cause potential traffic flow disruption for +traffic flows going over RECMP routes with changing hardware table entries. While the +deduping process tries to minimize the amount of traffic flow disruption, it is still +sometimes inevitable. + +- RECMP hardware table entries can only be deduped to other RECMP hardware table entries +that share the same set of nexthops. This puts a limit to the amount of RECMP hardware table +entries that can be reduced to the number of RECMP hardware table entries with unique +nexthop sets. + + +### Cluster Load Balancing + + +Cluster load balancing distributes incoming network traffic across a cluster of servers +to improve performance, increase reliability, and ensure high availability. By +preventing any single server from becoming overwhelmed by network traffic, cluster load +balancing optimizes resource utilization and minimizes response times. + + +The core networking capability of cluster load balancing uses a load balancer acting as a +limiting factor for a group of servers acting asa cluster. When a client request +arrives, the load balancer intercepts it and by using various algorithms, decides which +server in the cluster can best provide handling of the request. The decision can be +based on server health, current load, or a simple round-robin rotation. The selected +server then processes the request and sends the response back to the client. + + +The `**load-balance**` command specifies the hashing algorithm and +fields to use for Equal-Cost Multi-Path (ECMP) load balancing on a router. ECMP allows a +router to use multiple next-hop addresses for the same destination, distributing traffic +across these paths. The configuration determines which parts of the packet header, such +as source IP or destination IP, to use for hash value creation. This ensures that a +single flow, a stream of packets with the same header information, consistently uses the +same path, preventing packet reordering and improving performance for applications such +as TCP.s + + +Arista Cluster Load Balancing (CLB) optimizes traffic flows in data center clusters, +particularly for AI/ML workloads, using RoCE (Remote Direct Memory Access (RDMA) over +Converged Ethernet), and intelligently places flows in both directions to ensure +balanced traffic across all paths in a spine-leaf topology. By monitoring RoCE traffic +and making real-time adjustments to ensure consistent and high throughput communication +between Graphics Processing Units (GPU) servers, CLB eliminates bottlenecks and improves +overall network utilization. + + +#### Configuring Cluster Load Balancing + + +Access CLB commands in the Global Configuration mode and configure the fields used for +the hashing algorithm. + + +Use the following command to enter the CLB configuration +mode: +``` +`switch(config)# **load-balance cluster** +switch(config-clb)#` +``` + + +CLB supports VXLAN bridging and routed as the forwarding mode and encapsulation to deliver packets between Top of +Rack (TOR) switches over the uplinks. + + +Use the following command to configure VXLAN bridging as the forwarding mode: + + +``` +`switch(config-clb)# **forwarding type bridged encapsulation vxlan**` +``` + + +Use the following commands to configured the forwarding mode as routed: + + +``` +`switch(config-clb)# **forwarding type routed**` +``` + + +If configuring the CLB forwarding type as routed, you must add the prefix length to match +the length used for the network on each TOR with GPU addresses, and by default, supports +only one length. For example, if the GPUs on a TOR use IPv4 addresses from +`10.0.0.1` to `10.0.0.255`, then configure the prefix +length as `24`. + + +``` +`switch(config-clb)# **destination grouping prefix length 24**` +``` + + +Use the `**flow source**` parameter to add the method for learning +flows. By default, EOS only supports +`learning`: +``` +`switch(config-clb)# **flow source learning**` +``` + + +To load-balance traffic on the TORs, use the round-robin method. EOS does not support any other method of load-balancing traffic. + + +``` +`switch(config-clb)# **load-balance method flow round-robin**` +``` + + +By default, flow aging timeout has a value of 600 seconds with a minimum of 30 seconds. Setting the interval between bursts +of training job network communication below 30 seconds negatively impacts performance as flows can be incorrectly +aged out of the hardware. Use the following command to configure the interval to 60 seconds: + + +``` +`switch(config-clb)# **flow source learning** +switch(config-clb-flow-learning)# **aging timeout 60 seconds**` +``` + + +CLB requires identification of the ports connected to the same GPU server. Use the **port groups** + to configure the ports and flows from the interfaces load-balance with each other. EOS does not limit the number of groups, +however, Arista Networks recommends using only one group per GPU server. + + +Use the following commands to add ***server1*** and interfaces, +***Et15/1,16/1,17/1,18/1***, to the port group: + + +``` +`switch(config-clb)# **port group host server1** +switch(config-clb-portgroup-server1)# **member Et15/1,16/1,17/1,18/1**` +``` + + +To limit the number of flows programmed for a port group and preserve hardware TCAM resources, use the following command to limit +the number of flows to 800: + + +``` +`switch(config-clb-portGroup-server1)# **flow limit 800**` +``` + + +Configure CLB flow match type as VXLAN bridging IPv4 traffic, and configure a VXLAN interface for the flow. + + +``` +`switch(config)# **interface vxlan1** +switch(config)# **flow match encapsulation vxlan ipv4**` +``` + + +You can also configure the default flow match type as a non-VXLAN IPv4 packet: + + +``` +`switch(config)# **flow match encapsulation none ipv4**` +``` + + +##### Displaying Cluster Load Balancing Information + + +Use the **show load-balance cluster status** to display the current status of CLB: + + +``` +`switch# **show load-balance cluster status** +CLB Status: enabled +Port Group Name Fallback DSCP Fallback Traffic Class +--------------------- ------------------- ---------------------- +group0 46 - +group1 - 3` +``` + + +Use the **show load-balance cluster flows** to display all programmed flows: + + +``` +`switch# **show load-balance cluster flows** +VRF SA DA Queue Pair Rx Intf Flow Assignment +--------- -------- -------- ---------------- ------------- --------------- +default 10.98.0.1 10.99.0.1 1000 Et15/1 Et1/1 10.0.0.2 +default 10.98.0.2 10.99.0.2 1001 Et16/1 Et2/1 10.1.0.2 +default 10.98.0.3 10.99.0.3 1002 Et17/1 Et5/1 10.2.0.2 +default 10.98.0.4 10.99.0.4 1003 Et18/1 Et6/1 10.3.0.2 + +Total flows: 4, displayed: 4` +``` + + +#### Cluster Load Balancing for a Spine + + +Cluster Load Balancing on a Spine router ensures optimal load balancing flows used as +part ofGPU-based cluster communication in a network with multiple links connecting a +TOR router to a Spine router. + + +When enabled on a Spine, the router monitors RoCE traffic from a TOR and applies optimal +load balancing when forwarding traffic to the next TOR router host the destination GPU +server. + + +##### Configuring Cluster Load Balancing for a Spine + + +Note: Only the multi-agent routing model supports CLB. +Note: Perform the following commands only on a Spine router. +Use the following command to enter the CLB Configuration +Mode: +``` +`switch(config)# **load-balance cluster** +switch(config-clb)#` +``` + + +Configure the forwarding mode and encapsulation to forward packets on the Spine switch. EOS only supports +**`routed`** for IPv4: + + + +``` +`switch(config-clb)# forwarding type routed` +``` + + +Enter the following command to configure flow learning for the Spine switch: + + +``` +`switch(config)# **flow source learning**` +``` + + +The **load-balance method** command configures load balancing flows and must be +entered on the Spine switch: + + +``` +`switch(config-clb)# **load-balance method flow spine port-index**` +``` + + +Configure the number of ports connecting the Spine to the Leaf switch. Every TOR connected to the Spine +must have the same number of ports connecting to the Spine. + + +``` +`switch(config-clb)# **spine port group size 2**` +``` + + +CLB requires configuring the identification of the port group that connect the Spine switch to a TOR. The following output +provides an example configuration of two port groups on TOR1 and TOR2, each with 2 ports: + + +``` +`port group spine TOR1 + member 10 Ethernet12/1 + member 20 Ethernet1/1 +... +port group spine TOR2 + member 10 Ethernet5/1 + member 20 Ethernet13/1` +``` + + +The ports within a group display in order of increasing priority by the number assigned to each port. Ethernet1/1 and +Ethernet13/1 have the second position in the configuration. + + +To limit the number of flows programmed for a port group and preserve hardware TCAM resources, use the following command to limit +the number of flows to 800: + + +``` +`switch(config-clb-portGroup-server1)# **flow limit 800**` +``` + + +Configure CLB flow match type as VXLAN bridging IPv4 traffic, and configure a VXLAN interface for the flow. + + +``` +`switch(config)# **interface vxlan1** +switch(config)# **flow match encapsulation vxlan ipv4**` +``` + + +You can also configure the default flow match type as a non-VXLAN IPv4 packet: + + +``` +`switch(config)# **flow match encapsulation none ipv4**` +``` + + +### Unicast Reverse Path Forwarding +(uRPF) + + +Unicast Reverse Path Forwarding (uRPF) verifies the accessibility of source IP addresses +in forwarded packets from a switch. When uRPF determines that the routing table does not +contain an entry with a valid path to the packet source IP address, the switch drops the +packet. + + +IPv4 and IPv6 uRPF operate independently. Configure uRPF on a VRF. Commands that do not +specify a VRF utilize the default instance. uRPF does not affect multicast routing. + + +uRPF defines two operational modes: + + +- **Strict mode** - In strict mode, uRPF also verifies that a received packet on +the interface with the routing table entry uses that entry for the return +packet. + +- **Loose mode** - uRPF validation does not verify the inbound packet ingress +interface. + + +#### uRPF Operation + + +Configure uRPF on interfaces. For packets arriving on a uRPF-enabled interface, the source IP +address examines the source and destination addresses of unicast routing table entries and +verifies it. + + +uRPF requires a reconfigured routing table to support IP address verification. When enabling uRPF +for the first time, unicast routing becomes briefly disabled to facilitate the routing table +reconfiguration. The initial enabling of uRPF does not affect multicast routing. + + +A packet fails uRPF verification if the table does not contain an entry whose source or +destination address that matches the packet’s source IP address. In strict mode, the uRPF also +fails when the matching entry’s outbound interface does not match the packet’s ingress +interface. + + +uRPF does not verify the following packets: + +- DHCP with a source that uses **0.0.0.0** and a destination uses +**255.255.255.255**. + +- IPv6 link local in the following format -**FE80::/10**. + +- Multicast packets + + +##### ECMP uRPF + + +When verifying ECMP routes, strict mode checks all possible paths to determine the correct +interface receives the packet. ECMP groups with a maximum of eight routing table entries +support strict mode. The switch reverts to loose mode for ECMP groups that exceed eight +entries. + + +##### Default Routes + + +uRPF strict mode provides an **allow-default** option that accepts default +routes. On interfaces that enable allow-default and define a default route, uRPF strict mode +validates a packet even when the routing table does not contain an entry that matches the +packet’s source IP address. If not enabling allow-default, uRPF does not consider the +default route when verifying an inbound packet. + + +##### Null Routes + + +**NULL0** routes drop traffic destined to a specified prefix. When +enabling uRPF, traffic originating from null route prefixes drops in strict and loose modes. + + +#### uRPF Configuration + + +Enable Unicast Reverse Path Forwarding (uRPF) for IPv4 packets ingressing the configuration mode +interface using the ip verify command. + + +Note: uRPF cannot be enabled on interfaces with ECMP member FECs. + +**Examples** + +- This command enables uRPF loose mode on **interface vlan +17**. +``` +`switch(config)# **interface vlan 17** +switch(config-if-Vl17)# **ip verify unicast source reachable-via any** +switch(config-if-Vl17)# **show active** + interface Vlan17 + ip verify unicast source reachable-via any +switch(config-if-Vl17)#` +``` + +- This command enables uRPF strict mode on **interface vlan +18**. +``` +`switch(config)# **interface vlan 18** +switch(config-if-Vl18)# **ip verify unicast source reachable-via rx** +switch(config-if-Vl18)# **show active** + interface Vlan18 + ip verify unicast source reachable-via rx +switch(config-if-Vl18)#` +``` + + +### Routing Tables / Virtual Routing +and Forwarding (VRF) + + +An IP routing table is a data table that lists the routes to network destinations and metrics (distances) associated with those routes. A routing table is also known as a Routing Information Base (RIB). + + +Virtual Routing and Forwarding (VRF) allows traffic separation by maintaining multiple routing +tables. Arista switches support multiple VRF instances: + + +- A default global VRF + +- Multiple user-defined VRFs + + +The number of user-defined VRFs supported +varies by platform. VRFs can be used as management or data plane VRFs. + +- Management VRFs have routing disabled and typically used for +management-related traffic. + +- Dataplane VRFs have routing enabled and support routing protocols and packet +forwarding, including both hardware and software. + + +Trident, FM6000, and Arad platform switches support dataplane VRFs. + + +VRFs support unicast IPv4 and IPv6 traffic +and multicast traffic. Loopback, SVI, and routed ports may be added to +VRFs. Management ports may be added without any hardware forwarding. + + +To allow overlap in the sets of IP addresses used by different VRF instances, a Route +Distinguisher (RD) may be prepended to each address. RFC4364 defines RDs. + + +#### Default VRF + + +EOS creates the default VRF automatically and you cannot renamed or configure +it. Some configuration options accept ***default*** as a VRF input. + + +#### User-Defined VRFs + + +Create a user-defined VRF with the vrf instance command. After creating it, +a VRF may be assigned a Route Distinguisher (RD) with the rd (VRF configuration mode) command in +the VRF submode of Router-BGP Configuration Mode. + + +**Examples** + +- These commands create a VRF named +**purple**, place the switch +in BGP VRF configuration mode for that VRF, and +specify a route distinguisher for the VRF, +identifying the administrator as **AS +530**, and assigning +**12** as its local +number. +``` +`switch(config)# **vrf instance purple** +switch(config-vrf-purple)# **router bgp 50** +switch(config-router-bgp)# **vrf purple** +switch(config-router-bgp-vrf-purple)# **rd 530:12** +switch(config-router-bgp-vrf-purple)#` +``` + +- To add interfaces to a user-defined VRF, enter +configuration mode for the interface and use the +vrf (Interface mode) +command. Loopback, SVI, and routed ports can be +added to a VRF.These commands add +**vlan 20** to the VRF named +**purple**. +``` +`switch(config)# **interface vlan 20** +switch(config-if-Vl20)# **vrf purple** +switch(config-if-Vl20)#` +``` + +- The show vrf command shows +information about user-defined VRFs on the +switch.This command displays information for +the VRF named +**purple**. +``` +`switch# **show vrf purple** +Vrf     RD         Protocols  State       Interfaces +------- ---------- ---------- ----------- ------------ +purple  64496:237  ipv4       no routing  Vlan42, Vlan43 + +switch>` +``` + + +##### rd (VRF configuration +mode) + + +The **rd** command issued in VRF Configuration Mode is a legacy command +supported for backward compatibility. To configure a Route Distinguisher +(RD) for a VRF, use the rd (VRF configuration mode) +command. + + +Note: Legacy RDs that were assigned to a VRF in VRF Configuration Mode still appear in +**show vrf** outputs if an RD has not +been configured in Router-BGP VRF Configuration Mode, but they no longer +have an effect on the system. + + +#### Context-Active VRF + + +The context-active VRF specifies the default VRF commands to use when displaying or refreshing +routing table data. + + +VRF-context aware commands include: + +- clear arp-cache + +- show ip + +- show ip arp + +- show ip route + +- show ip route gateway + +- show ip route host + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +This command specifies **magenta** as the context-active +VRF. +``` +`switch# **cli vrf magenta** +switch# **show routing-context vrf** +Current VRF routing-context is magenta` +``` + + +The show routing-context vrf command displays the context-active VRF. + + +**Example** + + +This command displays the context-active +VRF. +``` +`switch# **show routing-context vrf** +Current VRF routing-context is magenta +switch#` +``` + + +### RIB Route Control + + +The Routing Information Base (RIB) consists of the routing information learned by the routing +protocols, including static routes. The Forwarding Information Base (FIB) consists of +the routes actually used to forward traffic through a router. + + +Forwarding Information Base (FIB) performs IP destination prefix-based switching decisions. +Similar to a routing table, the FIB maintains the forwarding information for the winning +routes from the RIB. When routing or topology changes occur in the network, the IP +routing table information updates, and reflects the changes in the FIB. + + +#### Configuring FIB policy + + +The RIB calculates the best or winning routes to each destination and place these routes in the +forwarding table. Then advertises the best routes based on the configured +FIB policy. + + +For example, a FIB policy can be configured to deny the routes for FIB programming, however, it +does not prevent these routes fromadvertising a routing protocol, or +redistributed into another routing domain, or used for recursive resolution +in the IP RIB. FIB policies control the size and content of the routing +tables, and the best route to take to reach a destination. + + +Use the **rib ipv4 | ipv6 fib policy** command to enable an FIB policy for +a specific VRF in the Router General Configuration Mode. + + +EOS supports the following match statements: + +- **match interface** + +- **match** **[ ip | +ipv6 ] address** **prefix-list** + +- **match** **[ ip | +ipv6 ] +resolved-next-hop** +**prefix-list** + +- **match isis level** + +- **match metric** + +- **match source-protocol** + + +**Example** + + +The following example enables FIB policy for IPv4 in the default VRF, using the +route map, +**map1**. +``` +`switch(config)# **router general** +switch(config-router-general)# **vrf default** +switch(config-router-general-vrf-default)# **rib ipv4 fib policy map1**` +``` + + +##### Configuring FIB Route Limits + + +The FIB route count for a VRF table includes FIB routes from most protocol +sources, such as BGP, IGP, static routes, and address families. +After the FIB routes reach a configured limit on the VRF, EOS +suppresses new BGP route additions in the FIB to avoid exceeding the +limit. Other types of routes continue to add to the FIB table after +the configured limit has been exceeded. + + +EOS maintains suppressed routes for each VRF and address family in a suppressed routes list. If the FIB table reduces routes below +the configured limit, then routes on the suppressed routes list install into the table. If a BGP route becomes suppressed due to the table +limit, the BGP route does not advertise to peers. + + +The FIB route limit does not affect routes already installed in the FIB. When configuring a lower limit on the FIB table, existing BGP +routes remain in the table. Only new BGP routes become suppressed based on the new limit configuration. + + +| Protocol +| Apply to the FIB Route Count +| FIB Route Suppression Supported +| + + +| BGP +| Yes +| Yes +| + + +| IGP +| Yes +| No +| + + +| Static +| Yes +| No +| + + +| Other +| Yes +| No +| + + +| ARP +| No +| No +| + + +Use the following command to configure a global route limit for IPv4 to 100 and warn when the table has consumed 80% +of the limit: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **fib route limit** +switch(config-router-general-fib-route-limit)# **ipv4 limit 100 warning-limit 80 percent**` +``` + + +All VRFs inherit the global configuration unless explicitly configured with a limit. + + +Use the following command to limit the number of routes to 100 on VRF +purple and warn when the table has +consumed 80% of the limit: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **vrf purple** +switch(config-router-general-vrf-purple)# **fib ipv4 route limit 100 warning-limit 80 percent**` +``` + + +To disable the feature, use the following command: + + +``` +`switch(config-router-general-vrf-purple)# **fib ipv4 route limit disabled**` +``` + + +Configure globally suppressing BGP routes in case of a route limit overflow using the following commands: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **fib route limit** +switch(config-router-general-fib-route-limit)# **action protocol bgp route overflow suppress**` +``` + + +Use the **show fib [ipv4 | ipv6] route limit [vrf vrf_name] suppressed** +command to display information about suppressed routes in the FIB table: + + +``` +`switch# **show fib ipv4 route limit suppressed** +VRF: default + Address-Family IPv4: + 12 routes suppressed + 201.1.0.0/24 (bgp) + 201.1.4.0/24 (bgp) + 201.1.5.0/24 (bgp) + 201.1.6.0/24 (bgp) + 201.1.7.0/24 (bgp) + 201.1.8.0/24 (bgp) + 201.1.9.0/24 (bgp) + 201.1.10.0/24 (bgp) + 201.1.11.0/24 (bgp) + 201.1.12.0/24 (bgp) + 201.1.13.0/24 (bgp) + 201.1.14.0/24 (bgp)` +``` + + +#### Displaying FIB Information + + +Use the **show rib route fib policy exclude** command to + display the RIB information. The **fib policy excluded** option + displays the RIB routes excluded from programming into the FIB by the FIB policy. + + +**Example** + + +The following example displays the routes filtered by FIB policy using the **fib + policy excluded** option of the **show rib route + ip|ipv6** + command. +``` +`switch# **show rib route ipv6 fib policy excluded** +switch# **show rib route ip bgp fib policy excluded** + +VRF name: default, VRF ID: 0xfe, Protocol: bgp +Codes: C - Connected, S - Static, P - Route Input + B - BGP, O - Ospf, O3 - Ospf3, I - Isis + > - Best Route, * - Unresolved Nexthop + L - Part of a recursive route resolution loop +>B 10.1.0.0/24 [200/0] + via 10.2.2.1 [115/20] type tunnel + via 10.3.5.1, Ethernet1 + via 10.2.0.1 [115/20] type tunnel + via 10.3.4.1, Ethernet2 + via 10.3.6.1, Ethernet3 +>B 10.1.0.0/24 [200/0] + via 10.2.2.1 [115/20] type tunnel + via 10.3.5.1, Ethernet1 + via 10.2.0.1 [115/20] type tunnel + via 10.3.4.1, Ethernet2 + via 10.3.6.1, Ethernet3` +``` + + +#### Displaying RIB Route Information + + +Use the show rib route ip command to view the IPv4 RIB information. + + +**Example** + + +This command displays IPv4 RIB static + routes. +``` +`switch# **show rib route ip static** + +VRF name: default, VRF ID: 0xfe, Protocol: static +Codes: C - Connected, S - Static, P - Route Input + B - BGP, O - Ospf, O3 - Ospf3, I - Isis + > - Best Route, * - Unresolved Nexthop + L - Part of a recursive route resolution loop +>S 10.80.0.0/12 [1/0] + via 172.30.149.129 [0/1] + via Management1, directly connected +>S 172.16.0.0/12 [1/0] + via 172.30.149.129 [0/1] + via Management1, directly connected + +switch#` +``` + + +## IPv4 Route Scale + + +Optimize IPv4 routes to achieve route scale when route distribution has many routes with one or +two parameters, and each parameter consisting of prefix lengths +**12**, **16**, +**20**, **24**, +**28**, and +**32**. If configuring two separate prefix +lengths, in any order, one must have the prefix length of +**32**. + + +Note: IPv4 Route Scale cannot be used with AlgoMatch. +The following sections describe IPv4 route scale configuration, show commands, and system log +messages: + +- Configuring IPv4 Route Scale + +- IPv4 Routescale with 2-to-1 Compression + +- Show +Commands + + +### Configuring IPv4 Route Scale + + +Enable IPv4 route scale using the ip hardware fib optimize command in +the Global Configuration Mode. The platform Layer 3 agentrestarts to +ensure IPv4 routes optimization with the agent SandL3Unicast terminate command in the Global +Configuration Mode. + + +**Example** + + +This configuration command allows configuring prefix lengths +**12** and +**32**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 12 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +One of the two prefixes uses a prefix-length of +**32** required in the instance when +using two prefixes. For this command to take effect, you must +restart the platform Layer 3 agent. + + +**Example** + + +This configuration command restarts the platform Layer 3 agent to ensure +IPv4 route +optimization. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 routes and then +re-adds them to the hardware. + + +**Example** + + +This configuration command allows configuring prefix lengths +**32** and +**16**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 32 16** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +One of the two prefixes uses a prefix-length of +**32** required in the instance when +using two prefixes. For this command to take effect, you must +restart the platform Layer 3 agent. + + +**Examples** + +- This configuration command restarts the platform Layer 3 +agent to ensure IPv4 route +optimization. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting +the platform Layer 3 agent results in deletion of +all IPv4 routes and then re-adds them to the +hardware. + +- This configuration command allows configuring prefix +length +**24**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 24** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +In this instance, when configuring a single prefix-length, the +configuration does not require a prefix-length of +**32**. For this command to +take effect, you must restart the platform Layer 3 agent. + + +**Examples** + +- This configuration command restarts the platform Layer 3 +agent to ensure IPv4 route +optimization. +``` +`switch(config)#**agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting +the platform Layer 3 agent results in deletion of +all IPv4 routes and then re-adds them to the +hardware. + +- This configuration command allows configuring the +prefix length +**32**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +For this command to take effect, you must restart +the platform Layer 3 agent. + +- This configuration command restarts the platform Layer 3 +agent to ensure IPv4 route +optimization. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + +Restarting +the platform Layer 3 agent results in deletion of +all IPv4 routes and then re-adds them to the +hardware. + +- This configuration command disables the prefix lengths +**12** and +**32** +configuration. +``` +`switch(config)#**no ip hardware fib optimize exact-match prefix-length 12 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are not optimized` +``` + + +One of the two prefixes uses a prefix-length of +**32** required in the instance when +using two prefixes. For this command to take effect, you must +restart the platform Layer 3 agent. + + +**Examples** + +- This configuration command restarts the platform Layer 3 +agent to ensure no IPv4 route +optimization. +``` +`switch(config)#**agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting +the platform Layer 3 agent results in deletion of +all IPv4 routes and then re-adds them to the +hardware. + +- This configuration command attempts to configure the +prefix lengths **20** and +**28** which triggers an +error exception. One of the two prefixes in this +command must be a prefix-length of +**32** required when adding +two +prefixes. +``` +`switch(config)#**ip hardware fib optimize exact-match prefix-length 20 28** +% One of the prefix lengths must be 32` +``` + + +IPv4 routes of certain prefix lengths can be optimized for enhanced route +scale. The following command disables prefix optimization on the +specified VRF(s) to provide more flexibility. + + +**Examples** + +- This configuration command disables prefix optimization +on the default +VRF. +``` +`switch(config)# **ip hardware fib optimize disable-vrf default** +! Please restart layer 3 forwarding agent to ensure that the disable-vrf option change takes effect` +``` + +- This configuration command disables prefix optimization +on VRFs named **vrf1** and +**vrf2**. +``` +`switch(config)# **ip hardware fib optimize disable-vrf vrf1 vrf2** +! Please restart layer 3 forwarding agent to ensure that the disable-vrf option change takes effect` +``` + +- This configuration command restarts the platform Layer 3 +agent to ensure that the disable-vrf +configuration takes +effect. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +**Examples** + +- This configuration command enables prefix optimization +on the default +VRF. +``` +`switch(config)# **ip hardware fib optimize vrf default prefix-length 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + +- This configuration command enables prefix optimization +on VRFs named **vrf1** and +**vrf2**. +``` +`switch(config)# **ip hardware fib optimize vrf vrf1 vrf2 prefix-length 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + +- This configuration command disables optimization on +**vrf1** and +**vrf2** optimization +configured in above +example. +``` +`switch(config)# **no ip hardware fib optimize vrf vrf1** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +The **platform trident forwarding-table partition +flexible** command enables ALPM Mode in +Flexible UFT mode using a subset of resources, so ALPM and Exact +Match can coexist. + + +**Examples** + +- This configuration command sets up the flexible +partition. +``` +`switch(config)# **platform trident forwarding-table partition flexible ?** + alpm Shared UFT bank entries for the ALPM table + exact-match Shared UFT bank entries for the exact-match table + l2-shared Shared UFT bank entries for the MAC table + l3-shared Shared UFT bank entries for the host table` +``` + +- ALPM gives the route prefix in DEFIM (TCAM table for +longest prefix matched (LPM) lookup) and ALPM +tables. +``` +`switch(config)# **platform trident forwarding-table partition flexible alpm ?** + 184320 Upto 180K LPM routes + 368640 Upto 360K LPM routes` +``` + + +Note: The size parameter has following values: + +- DCS-7300X3: 180k and 360k are accepted. + +- CCS-720XP: 144k and 96k are accepted. + +- Other sizes are invalid. + + +#### Reserving IPv4 and IPv6 Optimized Prefixes + + +The Large Exact Match (LEM) table stores routes of one or two prefix lengths that belong to a default or non-default VRF. When the LEM table becomes +full, the Longest Prefix Match table stores the routes. This enables reservation of some entries in the LEM table for a specific VRF. + + +Note: The platform Layer 3 agentrestarts to ensure IPv4 routes optimization with the agent SandL3Unicast terminate command in the Global +Configuration Mode. + +Use the following command to create reservations for 25 IPv4 optimized prefixes on VRF blue: + + +``` +`switch(config)# ip hardware fib optimize vrf blue prefixes minimum count 25 +! Please restart the SandL3Unicast agent to reserve space for optimized FIB prefixes` +``` + + +Use the following command to create reservations for 35 IPv6 prefixes on VRF green: + + +``` +`switch(config)# ipv6 hardware fib optimize vrf green prefixes minimum count 35 + ! Please restart the SandL3Unicast agent to reserve space for optimized FIB prefixes` +``` + + +Use the following command to restart the Layer 3 agent and allow the changes to take effect: + + +``` +`switch#agent SandL3Unicast terminate + Sandl3Unicast was terminated` +``` + + +Restarting the agent impacts all forwarding as the command deletes all routes and re-adds them to the switch. + + +LEM reservations on a VRF persist independently of VRF deletion. Explicitly remove the configuration using the **no** version +of the command. + + +### IPv4 Routescale with 2-to-1 Compression + + +The IPv4 routescale with2-to-1 compression optimizes certain prefix lengths and +enhances the route scale capabilities on 7500R, 7280R, 7500R2, and 7280R2 platforms. The +compression is best suited to achieve route scale when route distribution has a large number +of routes of one or two prefix lengths. + + +#### Configuring IPv4 Routescale 2-to-1 Compression + + +Use the **compress** command to increase the hardware resources +available for the specified prefix length. This command allows configuring up to one +compressed prefix length, and this command is supported only on 7500R, 7280R, 7500R2, +and 7280R2 platforms. + + +Note: The **compress** command takes effect only +when you restart the platform Layer3 agent on 7500R, 7280R, 7500R2, and 7280R2 +platforms. Use command **agent SandL3Unicast terminate** to +restart the platform Layer3 agent. + +**Examples** + +- In the following example we are configuring prefix length +**20** and **24**, expanding +prefix length **19** and **23**, and +compressing prefix length +**25**. +``` +`switch(config)# **ip hardware fib optimize prefix-length 20 24 expand 19 23 compress 25** + ! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + +- In the following example we are configuring prefix length +**20** and **23**, expanding +prefix length **19**, compressing prefix length +**24**. +``` +`switch(config)# **ip hardware fib optimize prefix-length 20 23 expand 19 compress 24** + ! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + +- Optionally, you can also use the **internet** profile to configure the IPv4 +route scale +compression. +``` +`switch(config)# **ip hardware fib optimize prefixes profile internet** + ! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +Configure a new TCAM profile for the **compress** configuration to work, and disable a few features in the new TCAM profile to make space for the flex-route feature in the hardware. Features like **acl vlan ip** and the **mirror ip** have to be disabled, if you need any of these features or any other features to be enabled with flex-route feature, contact the Arista team. + + +The **internet** profile works differently based on whether the flex-route feature is enabled in the TCAM profile or not. If the flex-route feature is enabled, the **internet** profile behaves like **ip hardware fib optimize prefix-length 20 23 expand 19 22 compress 24**. If the flex-route feature is disabled, the **internet** profile behaves as **ip hardware fib optimize prefix-length 20 24 expand 19 23**. + + +**Example** +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile flex-route copy default** +switch(config-hw-tcam-profile-flex-route)# **feature flex-route copy system-feature-source-profile** +switch(config-hw-tcam-profile-flex-route-feature-flex-route)# **exit** +switch(config-hw-tcam-profile-flex-route)# **no feature acl vlan ip** +switch(config-hw-tcam-profile-flex-route)# **no feature mirror ip** +switch(config-hw-tcam-profile-flex-route)# **exit** +Saving new profile 'flex-route' +switch(config-hw-tcam)# **system profile flex-route**` +``` + + +#### Limitations + + +- A maximum of two prefix lengths can be optimized directly at any point of time, of +which only one can be a non-nibble aligned prefix length. Additional prefix lengths +can be optimized using the **expand** or the +**compress** options. + +- A maximum of 1-to-4 way expansion and 2-to-1 way compression into any optimized +prefix length is supported. Multiple expansion prefix lengths can be programmed at +any time, however, there can be just one compression prefix length programmed at any +given point in time. + +- A maximum of **4096** next-hops can be reliably pointed to by +the compressed prefixes using 2-to-1 way compression. + +- The 2-to-1 compression cannot be enabled along with unicast RPF. When both features +are enabled together, unicast RPF functionality may not be correct. + +- The flex-route feature in TCAM profiles based only on the default profile, while +disabling the **acl vlan ip** and the **mirror +ip** features. Contact the Arista team if any other feature, +that is not available in the default TCAM profile, is required to be supported along +with the flex-route feature, including support for Mirror to GRE tunnel or ACLs on +SVI. + +- VXLAN is not supported with the compress option of this feature. +There is no Syslog or a warning message when VXLAN is configured along with the +2-to-1 way compression feature. + + +### Show Commands + + +Display the IPv4 route scale summary using the show platform arad ip route +summary command in the Global Configuration Mode. Resources for all IPv4 route scale +routes are displayed by the show platform +arad ip route command for the Global Configuration Mode. + + +**Examples** + +- This command displays hardware resource usage for IPv4 routes. + +``` +`switch(config)# **show platform arad ip route summary** + +Total number of VRFs: 1 +Total number of routes: 25 +Total number of route-paths: 21 +Total number of lem-routes: 4` +``` + +- This command shows resources for all IPv4 routes in hardware. +Routes that use the additional hardware resources appear with an asterisk (*). + +``` +`switch(config)# **show platform arad ip route** + +Tunnel Type: M(mpls), G(gre) +* - Routes in LEM +------------------------------------------------------------------------------------------------ +| Routing Table | | +|------------------------------------------------------------------------------------------------ +|VRF| Destination | | | |Acl | |ECMP | FEC | Tunnel +|ID | Subnet | Cmd | Destination |VID |Label| MAC / CPU Code |Index|Index|T Value +------------------------------------------------------------------------------------------------ +|0 |0.0.0.0/8 |TRAP |CoppSystemL3DstMiss|0 | - |ArpTrap | - |1030 | - +|0 |100.1.0.0/32 |TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - |1032 | - +|0 |100.1.0.0/32 |TRAP |CoppSystemIpUcast |0 | - |Receive | - |32766| - +|0 |100.1.255.255/32|TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - |1032 | - +|0 |200.1.255.255/32|TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - |1032 | - +|0 |200.1.0.0/16 |TRAP |CoppSystemL3DstMiss|1007| - |ArpTrap | - |1029 | - +|0 |0.0.0.0/0 |TRAP |CoppSystemL3LpmOver|0 | - |SlowReceive | - |1024 | - +|0 |4.4.4.0/24* |ROUTE|Et10 |1007| - |00:01:00:02:00:03| - |1033 | - +|0 |10.20.30.0/24* |ROUTE|Et9 |1006| - |00:01:00:02:00:03| - |1027 | -` +``` + + +## IP Source Guard + + +IP Source Guard (IPSG) prevents IP spoofing attacks. + + +IP Source Guard (IPSG) filters inbound IP packets based on the source MAC and IP addresses. +Hardware supports IPSG. IPSG enabled on a Layer 2 port verifies +IP packets received on this port. EOS permits packets if each +packet source MAC and IP addresses match user-configured IP-MAC +binding entries on the receiving VLAN and port. EOS drops +packets with no match immediately. + + +### Configuring IPSG + + +IPSG applies only to Layer 2 ports, and you enable it using the ip verify source command for the Global +Configuration Mode. When configured on Layer 3 ports, IPSG does not take +effect until this interface converts to Layer 2. + + +Layer 2 Port-Channels, not member ports, support IPSG. The IPSG configuration on port channels +supersedes the configuration on the physical member ports. Therefore, source +IP MAC binding entries should be configured on port channels using the ip source binding command. When configured on a +port channel member port, IPSG does not take effect until deleting this port +from the port channel configuration. + + +**Examples** + +- These configuration commands exclude VLAN IDs +**1** through +**3** from IPSG filtering. +When enabled on a trunk port, IPSG filters the inbound IP +packets on all allowed VLANs. IP packets received on VLANs +**4** through +**10** on +**ethernet 36** filter using +IPSG, while those received on VLANs +**1** through +**3** are +permitted. +``` +`switch(config)# **no ip verify source vlan 1-3** +switch(config)# **interface ethernet 36** +switch(config-if-Et36)# **switchport mode trunk** +switch(config-if-Et36)# **switchport trunk allowed vlan 1-10** +switch(config-if-Et36)# **ip verify source** +switch(config-if-Et36)#` +``` + +- This configuration command configures source IP-MAC binding +entries to IP address **10.1.1.1**, +MAC address **0000.aaaa.1111**, +**VLAN ID 4094**, and +**interface ethernet +36**. +``` +`switch(config)# **ip source binding 10.1.1.1 0000.aaaa.1111 vlan 4094 interface ethernet 36** +switch(config)#` +``` + + +### DHCP Server Show Commands + + +Use the **show dhcp server** command to display DHCP server +information. + +- DHCPv4 display + example: +``` +`switch# **show dhcp server ipv4** +IPv4 DHCP Server is active +Debug log is enabled +DNS server(s): 10.2.2.2 +DNS domain name: domainFoo +Lease duration: 1 days 0 hours 0 minutes +TFTP server: +serverFoo (Option 66) +10.0.0.3 (Option 150) +TFTP file: fileFoo +Active Leases: 1 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Inactive (Could not determine VRF) + Ethernet2 Inactive (Not in default VRF) + Ethernet3 Inactive (Kernel interface not created yet) + Ethernet4 Inactive (Not up) + Ethernet5 Inactive (No IP address) + Ethernet6 Active + +Vendor information: +Vendor ID: default + Sub-options Data +---------------- ---------------- + 1 192.0.2.0, 192.0.2.1 + +Vendor ID: vendorFoo + Sub-options Data +---------------- ----------- + 2 192.0.2.2 + 3 “Foo” + +Subnet: 10.0.0.0/8 +Subnet name: subnetFoo +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.1.1.1 10.2.2.2 +Lease duration: 3 days 3 hours 3 minutes +Default gateway address: 10.0.0.3 +TFTP server: +subnetServerFoo (Option 66) +10.0.0.4 (Option 150) +TFTP boot file: subnetFileFoo +Active leases: 1 +Reservations: +MAC address: 1a1b.1c1d.1e1f +IPv4 address: 10.0.0.1 + +MAC address: 2a2b.2c2d.2e2f +IPv4 address: 10.0.0.2` +``` + +- For DHCPv6, there are two additional fields in subnet information output, + **Direct** field and the + **Relay** field. These two fields specify if + the DHCP Server is accepting broadcast or relayed messages. +The +**Direct** field displays +**Active** when the subnet matches the +interface with DHCPv6 configured. This indicates the server is accepting broadcast +messages. + + +The **Direct** field +displays **Inactive** when there is another existing subnet already matching +the interface, or when the subnet matches more than one DHCP configured +interface. + + +Examples of outputs for the DHCPv6 **show dhcp +server** command: + +In this example, DHCPv6 is configured +with subnet **fe80::/10** while being enabled on +**Ethernet1** with address +**fe80::1/64** and on + **Ethernet3** with address +**fe80::2/64**. +``` +`switch# **show dhcp server ipv6** +IPv6 DHCP server is active +Debug log is enabled +DNS server(s): fe80::6 +DNS domain name: testaristanetworks.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: + Interface Status +--------------- ------ + Ethernet1 Active + Ethernet3 Active + +Subnet: fe80::/10 +Subnet name: foo +Range: fe80::1 to fe80::3 +DNS server(s): fe80::4 fe80::5 +Direct: Inactive (Multiple interfaces match this subnet: Ethernet1 Ethernet3) +Relay: Active +Active leases: 0` +``` + +- This example illustrates when multiple subnets match an interface. In this + example, DHCPv6 is configured with subnets **fc00::/7** and +**fe80::/10** while being enabled on **Ethernet1** with + address **fe80::1/10** and + **fc00::1/7**. +``` +`switch# **show dhcp server ipv6** +IPv6 DHCP server is active +DNS server(s): fc00::2 +DNS domain name: testaristanetworks.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: + Interface Status +--------------- ------ + Ethernet1 Active + +Subnet: fc00::/7 +Subnet name: foo +Range: fc00::1 to fc00::5 +DNS server(s): fc00::6 fc00::8 +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0 + +Subnet: fe80::/10 +Subnet name: bar +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0` +``` + +- When a subnet is disabled, the **show dhcp server** + command displays the disable message with a reason. The number of active leases of + the disabled subnets will be **0**. In this example, there are + overlapping subnets. +``` +`switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Active + +Subnet: 10.0.0.0/24 (Subnet is disabled - overlapping subnet 10.0.0.0/8) +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0 + +Subnet: 10.0.0.0/8 (Subnet is disabled - overlapping subnet 10.0.0.0/24) +DNS server(s): +Default gateway address: 10.0.0.3 +Active leases: 0` +``` + +- In this example, the display output shows overlapping + ranges. +``` +`switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Active + +Subnet: 10.0.0.0/8 (Subnet is disabled - range 10.0.0.9-10.0.0.12 overlaps with an existing pool) +Range: 10.0.0.1 to 10.0.0.10 +Range: 10.0.0.9 to 10.0.0.12 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0` +``` + +- This example shows duplicate static IP address + reservation. +``` +`Subnet: 10.0.0.0/8 (Subnet is disabled - ipv4-address 10.0.0.11 is reserved more than once) +Subnet name: +DNS server(s): +Default gateway address: 10.0.0.3 +Active leases: 0 +Reservations: +MAC address: 1a1b.1c1d.1e1f +IPv4 address: 10.0.0.11 + +MAC address: 2a2b.2c2d.2e2f +IPv4 address: 10.0.0.11` +``` + +- Use the **show dhcp server leases** command to display + detailed information about the IP addresses allocated by the DHCP Server (including + the IP address, the expected end time for that address, the time when the address is + handed out, and the equivalent MAC + address). +``` +`switch# **show dhcp server leases** +10.0.0.10 +End: 2019/06/20 17:44:34 UTC +Last transaction: 2019/06/19 17:44:34 UTC +MAC address: 5692.4c67.460a + +2000:0:0:40::b +End: 2019/06/20 18:06:33 UTC +Last transaction: 2019/06/20 14:36:33 UTC +MAC address: 165a.a86d.ffac` +``` + + +## DHCP Server + + +The router with DHCP Server enabled acts as a server that allocates and delivers +network addresses with desired configuration parameters to its hosts. + + +The DHCP server is based on ISC Kea. + + +The router with an DHCP Server enabled acts as a server that allocates and delivers +network addresses with desired configuration parameters to its hosts. + + +DHCP Server support includes: + + +DHCPv4 support includes: + +- Configurable on different interfaces: Routed, VLAN, LAG, Sub-interface, and LAG +Sub-interface. + +- Configurable lease time for allocated network addresses. + +- Configurable DNS domain. + +- Configurable DNS servers. + +- Configurable subnets with parameters: + +- Default gateway + +- DNS servers + +- Ranges + +- Lease time + + + Additional features for DHCPv4 include: + +- Configurable TFTP server + +- Configurable TFTP bootfile + + + Additional features for DHCPv4 includes: + +- Configurable Vendor options with sub options + +- Configurable sub option types include: IPv4 address, array of IPv4 addresses, +and string + +- TFTP bootfile now supports an URI + + +Additional features for DHCPv4 include a configurable static IP address for exclusive use +by a given client, based on the client’s MAC address. + + +Example deployment: + + +DHCP Server on an aggregation switch, via VXLAN tunnels. + + +### Configuring DHCP Servers + + +Global DHCP server options are configured per address family and apply to all +subnets. These commands are accessed at the `**config-dhcp-server**` +level. + + +To enter the DHCP server global configuration mode, use the following +commands: + + +``` +`switch# **configure** +switch(config)# **dhcp server** +switch(config-dhcp-server)#` +``` + + +To disable the DHCP server: + + +``` +`switch(config-dhcp-server)# **disabled**` +``` + + +Use the following commands to configure the DNS servers. Only two servers can be +configured globally per address family. + + +``` +`switch(config-dhcp-server)# **dns server ipv4 192.0.2.4 192.0.2.5** +switch(config-dhcp-server)# **dns server ipv6 2001:db8:0:10::53 2001:db8:0:10::5353**` +``` + + +The following commands configure the domain names for allocated IP +addresses. For example, add a domain with the name +**podV4.example.com** for DHCPv4 and a domain with the +name **podV6.example.com** for DHCPv6. + + +``` +`switch(config-dhcp-server)# **dns domain name ipv4 podV4.example.com** +switch(config-dhcp-server)# **dns domain name ipv6 podV6.example.com**` +``` + + +The following commands configure lease time for the allocated IP +addresses. For example, configure the lease time as one (1) day. + + +``` +`switch(config-dhcp-server)# **lease time ipv4 1 days 0 hours 0 minutes** +switch(config-dhcp-server)# **lease time ipv6 1 days 0 hours 0 minutes**` +``` + + +The following command configures the TFTP Server-Name. The server can be +in the form of either an IPv4 address or a fully qualified domain +name and only available in DHCPv4. For example, configure the TFTP +server with the IPv4 address, 192.0.2.6. + + +``` +`switch(config-dhcp-server)# **tftp server option 66 ipv4 192.0.2.6**` +``` + + +The following command configures the TFTP Servers. + + +``` +`switch(config-dhcp-server)# **tftp server option 150 ipv4 192.0.2.6 192.0.2.7**` +``` + + +The following command configures the TFTP Server Bootfile-Name, only +available in DHCPv4. + + +``` +`switch(config-dhcp-server)# **tftp server file ipv4 bootfile.conf**` +``` + + +The following command configures Vendor specific option. To enter the +Vendor option submode **config-dhcp-vendor-ipv4** from +**config-dhcp-server** config mode, specify a vendor +class identifier, only available in DHCPv4. For example, Vendor +option for clients with vendor class identifier +vendorClassIDA. + + +``` +`switch(config-dhcp-server)# **vendor-option ipv4 vendorClassIDA**` +``` + + +The following command configures ***default***. If you do not configure the +***default***, the DHCP Server sends the configured Vendor option to +clients requesting a Vendor option with a vendor class identifier that does not match +any configured Vendor option. + + +``` +`switch(config-dhcp-server)# **vendor-option ipv4 default**` +``` + + +The following command configures suboptions for the Vendor. The +configuration sends the resulting Vendor option in a hexadecimal +format to the desired client. The output displays aVendor option +with a suboption with IPv4 address 192.0.2.8, for clients with the +vendor class identifier vendorClassIDA, resulting +in Vendor option 1:4:c0:0:2:8. + + +``` +`Sub option number is 1 +Length of the Data is 4 +Data is c0:0:2:8 +dhcp server +vendor-option ipv4 vendorClassIDA +sub-option 1 type ipv4-address data 192.0.2.8` +``` + + +The following command configures the Vendor option with IPv4 addresses 192.0.2.8 and +192.0.2.9, for clients with the vendor class identifier +vendorClassIDA, resulting in the Vendor option +fe:8:c0:0:2:8:c0:0:2:9. + + +``` +`switch(config-dhcp-server)# **vendor-option ipv4 vendorClassIDA sub-option 254 type array ipv4-address data 192.0.2.8 192.0.2.9**` +``` + + +The following command configures Vendor option with a string “vendor”, +for all clients whose vendor class identifier does not match any +configured Vendor option, resulting in Vendor option +1e:3:46:4f:4f.. + + +``` +`switch(config-dhcp-server)# **vendor-option ipv4 default sub-option 30 type string data "vendor"**` +``` + + +The following command sets up Vendor option holding two suboptions, suboption 1 holds +the IPv4 address 192.0.2.8, and suboption 2 holds a string “vendor”, for all clients +whose vendor class identifier does not match any configured Vendor option, resulting in +Vendor option 1:4:c0:0:2:8:2:3:46:4f:4f. + + +``` +`switch(config-dhcp-server)# **vendor-option ipv4 default sub-option 1 type ipv4-address data 192.0.2.8 sub-option 2 type string data “vendor"**` +``` + + +#### Configuring DHCP Server Subnets + + +DHCP Server settings can also be configured per subnet and +overrides the DHCP Server global mode configurations. There can be +multiple subnets configured, but they must not overlap. EOS disables +overlapping subnets. + + +The following command enters DHCP Server subnet mode under the +IPv4 address family. + + +``` +`switch# **config** +switch(config)# **dhcp server** +switch(config-dhcp-server)# **subnet 192.0.2.0/32**` +``` + + +The following command configures the name of the subnet. For example, name subnetv4 +for DHCPv4. + + +``` +`switch(config-dhcp-subnet-ipv4)# **name subnetv4**` +``` + + +The following command configures range of IP addresses of the subnet. The +range must be within the subnet mask, otherwise the subnet becomes +disabled. + + +``` +`switch(config-dhcp-subnet-ipv4)# **range 192.0.2.100 192.0.2.199**` +``` + + +The following command configures the DNS servers for a subnet. Configure +up to 2 servers per subnet. + + +``` +`switch(config-dhcp-subnet-ipv4-range)# **dns server 192.0.2.1 192.0.2.10**` +``` + + +The following command configures the lease time for allocated IP addresses of the +subnet. +``` +`switch(config-dhcp-subnet-ipv4)# **lease time ipv4 3 days 0 hours 0 minutes**` +``` + + +The following command configures the default-gateway for a subnet. + +``` +`switch(config-dhcp-server)# **subnet 192.0.2.0/32** +switch(config-dhcp-subnet-ipv4)# **default-gateway 192.0.2.3**` +``` + + +The following command configures the TFTP Server-Name for a subnet. The server can be +in the form of either an IPv4 address or a fully qualified domain name, but can only +be configured for +DHCPv4. +``` +`switch(config-dhcp-subnet-ipv4)# **tftp server option 66 subnet-tftp.example.com**` +``` + + +The following command configures a list of TFTP servers. The server can only be in +the form of an IP address, but can only be configured for +DHCPv4. +``` +`switch(config-dhcp-subnet-ipv4)# **tftp server option 150 192.0.2.6 192.0.2.7**` +``` + + +The following command configures the TFTP server Bootfile-Name for a subnet, but can +only be configured for +DHCPv4. +``` +`switch(config-dhcp-subnet-ipv4)# **tftp server file subnet-bootfile.conf**` +``` + + +**Example DHCP Server Subnets +Configuration** +``` +`switch# **config** +switch(config)# **dhcp server** +switch(config-dhcp-server)# **subnet 192.0.2.0/32** +switch(config-dhcp-subnet-ipv4)# **name subnetv4** +switch(config-dhcp-subnet-ipv4)# **range 192.0.2.100 192.0.2.199** +switch(config-dhcp-subnet-ipv4-range)# **dns server 192.0.2.1 192.0.2.10** +switch(config-dhcp-subnet-ipv4)# **lease time ipv4 3 days 0 hours 0 minutes** +switch(config-dhcp-server)# **subnet 192.0.2.0/32** +switch(config-dhcp-subnet-ipv4)# **default-gateway 192.0.2.3** +switch(config-dhcp-subnet-ipv4)# **tftp server option 66 subnet-tftp.example.com** +switch(config-dhcp-subnet-ipv4)# **tftp server option 150 192.0.2.6 192.0.2.7** +switch(config-dhcp-subnet-ipv4)# **tftp server file subnet-bootfile.conf**` +``` + + +The following command configures a static IP address for exclusive use by a client. +Enter the **dhcp-server-subnet** configuration submode, +***(config-dhcp-mac-address-ipv4)*** from and specify the client MAC +Address. The IP address must not be used by another client. Only DHCPv4 addresses +allowed for this configuration. + +``` +`switch(config-dhcp-subnet-ipv4)# **reservations** +switch(config-dhcp-sub-v4-reserve)# **mac-address 1a1b.1c1d.1e1f** +switch(config-dhcp-sub-v4-rsrv-mac-address)# **ipv4-address 192.0.2.0**` +``` + + +### Displaying DHCP Information + + +#### Show DHCP Server Information + + +The following command displays the DHCP Server information. + + +``` +`switch# **show dhcp server ipv4** +IPv4 DHCP Server is active +Debug log is enabled +DNS server(s): 192.0.2.4 192.0.2.5 +DNS domain name: podV4.example.com +Lease duration: 1 days 0 hours 0 minutes +TFTP server: 192.0.2.6 (Option 66) +192.0.2.6 192.0.2.7 (Option 150) +TFTP file: https://[john.doe@www.example.com](mailto:john.doe@www.example.com):123/example/one +Active Leases: 1 +IPv4 DHCP interface status: +Interface Status +------------------------------------------------- +Ethernet1 Inactive (Could not determine VRF) +Ethernet2 Inactive (Not in default VRF) +Ethernet3 Inactive (Kernel interface not created yet) +Ethernet4 Inactive (Not up) +Ethernet5 Inactive (No IP address) +Ethernet6 Inactive (No Link Local address) +Ethernet7 Inactive (DHCP relay is configured for this interface) +Ethernet8 Inactive (DHCP relay is always on) +Ethernet9 Active + +Vendor information: +Vendor ID: default +Sub-options Data +---------------- ---------------- +1 192.0.2.0 +2 “vendor” + +Vendor ID: vendorClassIDA +Sub-options Data +---------------- -------------------- +254 192.0.2.8, 192.0.2.9 + +Subnet: 192.0.2.0/24 +Subnet name: subnetFooV4 +Range: 192.0.2.100 to 192.0.2.199 +DNS server(s): 192.0.2.1 192.0.2.10 +Lease duration: 3 days 0 hours 0 minutes +Default gateway address: 192.0.2.3 +TFTP server: + subnet-tftp.example.com (Option 66) + 192.0.2.6 192.0.2.7 (Option 150) + TFTP boot file: subnet-bootfile.conf + Active leases: 1 + Reservations: + MAC address: 1a1b.1c1d.1e1f + IPv4 address: 192.0.2.201 + MAC address: 2a2b.2c2d.2e2f + IPv4 address: 192.0.2.150` +``` + + +#### Displaying Disabled Subnets + + +When a subnet becomes disabled, the **show dhcp server +[ipv4|ipv6]** output displays the disabled message under +Disabled reason(s). None of the disabled subnets have active +leases. Currently, the output displays only 2 disabled reasons. + + +``` +`switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: +Interface Status +------------------------------------------------- +Ethernet1 Active + +Subnet: 10.0.0.0/24 (Subnet is disabled) +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0 +Disabled reason(s): +Overlapping subnets: 10.0.0.0/8 + +Subnet: 10.0.0.0/8 (Subnet is disabled) +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.5.5.5 +Default gateway address: 10.0.0.3 +Active leases: 0 +Disabled reason(s): +Overlapping subnets: 10.0.0.0/24 + +For Overlapping ranges: +switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: +Interface Status +------------------------------------------------- +Ethernet1 Active + +Subnet: 10.0.0.0/8 (Subnet is disabled) +Range: 10.0.0.1 to 10.0.0.10 +Range: 10.0.0.9 to 10.0.0.12 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0 +Disabled reason(s): +Overlapping range: 10.0.0.9 to 10.0.0.12 + +E.g. Duplicate static IP address reservation: +Subnet: 10.0.0.0/8 (Subnet is disabled) +Subnet name: +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.5.5.5 +Default gateway address: 10.0.0.3 +Active leases: 0 +Reservations: +MAC address: 1a1b.1c1d.1e1f +IPv4 address: 10.0.0.11 + +MAC address: 2a2b.2c2d.2e2f +IPv4 address: 10.0.0.11 + +Disabled reason(s): +Duplicate IPv4 address reservation: 10.0.0.11` +``` + + +For DHCPv6, ***Direct*** and ***Relay*** indicates that the DHCP +Server accepts broadcast and relayed messages. + + +``` +`switch# **show dhcp server ipv6** +IPv6 DHCP server is active +Debug log is enabled +DNS server(s): fe80::6 +DNS domain name: aristanetworks.example.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: +Interface Status +--------------- ------ +Ethernet1 Active +Ethernet3 Active + +Subnet: fe80::/10 +Subnet name: foo +Range: fe80::1 to fe80::3 +DNS server(s): fe80::4 fe80::5 +Direct: Inactive (Multiple interfaces match this subnet: Ethernet1 Ethernet3) +Relay: Active +Active leases: 0` +``` + + +For DHCPv6, a subnet may match only one interface and vice versa. Otherwise the +subnet is disabled and no lease assigned for that subnet. + + +``` +`interface Ethernet1 +no switchport +ipv6 address 2001:db8:0:10::1/64 +dhcp server ipv6 +interface Ethernet3 +no switchport +ipv6 address 2001:db8:0:11::1/64 +dhcp server ipv6 +dhcp server +subnet 2001:db8::/56` +``` + + +The following enables DHCPv6 on Ethernet1 (with address fc00::1/7 and fe80::1/10), +and then configures subnets fc00::/7 and fe80::/64 for DHCPv6. + + +``` +`interface Ethernet1 +no switchport +ipv6 address fc00::1/7 +ipv6 address fe80::1/64 link-local +dhcp server ipv6 +dhcp server +subnet fc00::/7 +subnet fe80::/64 + +#**show dhcp server ipv6** +IPv6 DHCP server is active +DNS server(s): fc00::2 +DNS domain name: aristanetworks.example.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: +Interface Status +--------------- ------ +Ethernet1 Active + +Subnet: fc00::/7 +Subnet name: foo +Range: fc00::1 to fc00::5 +DNS server(s): fc00::6 fc00::8 +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0 + +Subnet: fe80::/64 +Subnet name: subnetBarV6 +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0` +``` + + +#### Leases + + +The following output displays the IP addresses allocated by the DHCP Server with the +**show dhcp server [ipv4|ipv6] leases** command. It +also displays the expected end time for the address, the time when the address is +assigned, and the equivalent MAC address. + + +``` +`switch# **show dhcp server leases** +10.0.0.10 +End: 2019/06/20 17:44:34 UTC +Last transaction: 2019/06/19 17:44:34 UTC +MAC address: 5692.4c67.460a + +2000:0:0:40::b +End: 2019/06/20 18:06:33 UTC +Last transaction: 2019/06/20 14:36:33 UTC +MAC address: 165a.a86d.ffac` +``` + + + +## DHCP Relay Global Configuration Mode + + + + +Configure DHCP Relay using the dhcp relay command in + the global configuration mode. The command places the switch in DHCP Relay mode and allows + the configuration of DHCP Relay on several interfaces with a single command. The + configuration entered in the DHCP Relay global configuration mode can be overridden by + equivalent interface specific commands. + + +**Examples** + + +The **dhcp relay** command places the switch in the DHCP Relay + configuration + mode. +``` +`switch(config)# **dhcp relay** +switch(config-dhcp-relay)#` +``` + + +Specify the IP address of the default DHCP or DHCPv6 Server. Multiple IP addresses can be + specified and DHCP requests forward to all specified helper addresses. Configure an + **ip helper-address +IP_Address** under each desired routing interface. + + +Use the following commands to forward DHCP broadcast packets received on interface + **Ethernet1** and **Vlan2** to DHCP + servers at **10.0.0.1**, **10.0.0.2**, and to + hostname + **DefaultDHCPHostname**: +``` +`switch(config)# **interface ethernet1** +switch(config-if-Et1)# **no switchport** +switch(config-if-Et1)# **ip address 192.168.1.1/16** + +switch(config)# **interface vlan2** +switch(config-if-Et1)# **ip address 172.16.1.1/16** + +switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **server 10.0.0.1** +switch(config-dhcp-relay)# **server 10.0.0.2** +switch(config-dhcp-relay)# **server DefaultDHCPHostname**` +``` + + +Use the following commands to forward DHCPv6 broadcast packets received on interface + **ethernet1** to a DHCPv6 Server at + **fc00::3**. +``` +`switch(config)# **interface ethernet1** +switch(config-if-Et1)# **no switchport** +switch(config-if-Et1)# **ipv6 address fc00::1/10** + +switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **server fc00::3**` +``` + + +The configuration points a routed interface to the specified DHCP and DHCPv6 server, if the + configuration meets following criteria: + +- The default VRF contains the routed interface. + +- The interface has an IP address configured. + +- The configuration does not occur on a Management or a Loopback interface. + + + + + Use the following commands to remove the default DHCP or DHCPv6 + Server. +``` +`switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **no server 10.0.0.1** +switch(config-dhcp-relay)# **no server 10.0.0.2** +switch(config-dhcp-relay)# **no server DefaultDHCPHostname** +switch(config-dhcp-relay)# **no server fc00::3**` +``` + + +To override the default DHCP Server on an interface, the parameter,**ip + helper-addressIP_Address**, must be used. + + +Use the following commands to forward a DHCP broadcast packet received on interface + Ethernet1 to DHCP Servers at **10.0.0.1**, + **10.0.0.2** and hostname + **DefaultDHCPHostname**, but VLAN2 broadcasts packets to the + DHCP Server at **10.0.0.3** + only. +``` +`switch(config)# **interface ethernet 1** +switch(config-if-Et1)# **no switchport** +switch(config-if-Et1)# **ip address 192.168.1.1/16** + +switch(config)# **interface vlan2** +switch(config-if-Et1)# **ip address 172.16.1.1/16** +switch(config-if-Et1)# **ip helper-address 10.0.0.3** + +switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **server 10.0.0.1** +switch(config-dhcp-relay)# **server 10.0.0.2** +switch(config-dhcp-relay)# **server DefaultDHCPHostname**` +``` + + +To override the default DHCPv6 Server on an interface, the parameter, **ipv6 + helper-address +IPv6_Address>** must be used. + + +Use the following commands to forward a DHCPv6 broadcast packet received on interface + Ethernet1 to DHCPv6 Server at **fc00::3**, and VLAN2 broadcasts + packets to DHCPv6 Server at **fc00::4** + only. +``` +`switch(config)# **interface ethernet 1** +switch(config-if-Et1)# **no switchport** +switch(config-if-Et1)# **ipv6 address fc00::1/10** + +switch(config)# **interface vlan2** +switch(config-if-Et1)# **ipv6 address fc00::2/10** +switch(config-if-Et1)# **ipv6 helper-address fc00::4** + +switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **server fc00::3**` +``` + + +Configure DHCP Relay for IPv4 unnumbered interfaces by adding a DHCP IPv4 helper address + and configuring the vendor option. + + +Use the **information option** command to enter DHCP Relay + Information Option Configuration + Mode: +``` +`switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **information option** +switch(config-information-option)# **vendor-option**` +``` + + +Configure Option-37 in DHCPv6 Relay to include the host name of the switch along with MAC + address and interface name in the remote id of the option. It requires the +`remote-id` format to be specified in the configuration mode. + + +Use the following command to add the + remote-id: +``` +`switch(config)# **ipv6 dhcp relay option remote-id format %m:%h:%p**` +``` + + +You can disable DHCP or DHCPv6 Relay functionality from a specific interface. This disables + both DHCP Relay global and interface mode configurations. + + +Use the following command to disable DHCP Relay functionality + only. +``` +`switch(config)# **interface vlan3** +switch(config-if-Et1)# **dhcp relay ipv4 disabled**` +``` + + +Use the following to disable DHCPv6 Relay functionality + only. +``` +`switch(config)# **interface vlan3** +switch(config-if-Et1)# **dhcp relay ipv6 disabled**` +``` + + + + + +### Displaying DHCP Relay + + +The **show ip dhcp relay** command displays all the +interfaces enabled with DHCP Relay and the server configured on these interfaces. + + +**Example** +``` +`switch# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is disabled +DHCPv6 Relay Link-layer Address Option (79) is disabled +DHCPv6 Relay Remote ID (Option 37) encoding format: MAC address:interface ID +DHCP Smart Relay is disabled +Default L3 interface DHCP servers: + DHCPv4 servers: 10.0.0.1 + 10.0.0.2 + DefaultDHCPHostname + DHCPv6 servers: fc00::3 +Interface: Ethernet1 + DHCP Smart Relay is disabled + DHCPv6 all subnet relaying is disabled + Using default DHCPv4 servers + Using default DHCPv6 servers +Interface: Ethernet2 + DHCP Smart Relay is disabled + DHCPv6 all subnet relaying is disabled + Using default DHCPv4 servers + DHCPv6 servers: fc00::4 +Interface: Vlan2 + DHCP Smart Relay is disabled + DHCPv6 all subnet relaying is disabled + DHCPv4 servers: 11.0.0.3 + DHCPv6 servers: fc00::4 +Interface: Vlan3 + DHCP Smart Relay is disabled + DHCPv6 all subnet relaying is disabled + DHCPv4 Relay is disabled + DHCPv6 Relay is disabled` +``` + + + Use the **show ip dhcp relay** command to display DHCP Relay for unnumbered + interfaces: + +``` +`switch# **show ip dhcp relay** + DHCP Relay Option (82) is enabled + DHCP Relay vendor-specific suboption (9) under information option (82)` +``` + + +### DHCP Relay Across VRF + + +The EOS DHCP relay agent supports +forwarding of DHCP requests to DHCP servers located in a different VRF +to the DHCP client interface VRF. In order to enable VRF support for +the DHCP relay agent, Option 82 (DHCP Relay Agent Information Option) +must first be enabled. The DHCP relay agent uses Option 82 to pass client +specific information to the DHCP server. + + +These sections describe DHCP Relay across VRF features: + +- Configuring DHCP Relay + +- DHCP Relay Global Configuration Mode Show +Command + + +The DHCP relay agent inserts Option 82 information into the DHCP forwarded request, which +requires the DHCP server belongs to a network on an interface, and that interface +belongs to a different VRF than the DHCP client interface. Option 82 information +includes the following: + +- **VPN identifier** - The VRF name for the ingress interface of the DHCP +request, inserted as sub-option 151. + + +Table 1. VPN Identifier + +| SubOpt +| Len +| ASCII VRF Identifier +| + + +| 151 +| 7 +| V +| R +| F +| N +| A +| M +| E +| + +- **Link selection** - The subnet address of the interface that receives the +DHCP request, inserted as sub-option 5. After enabling the DHCP smart relay, the +link selection fills with the subnet of the active address. The relay agent sets +the Gateway IP address (gIPaddr) to its IP address so that DHCP messages can be +routed over the network to the DHCP server. +Table 2. Link Selection + +| SubOpt +| Len +| Subnet IP Address +| + + +| 5 +| 4 +| A1 +| A2 +| A3 +| A4 +| + +- **Server identifier override** - The primary IP address of the interface that +receives the DHCP request, inserted as sub-option 11. After enabling the DHCP +smart relay, the server identifier fills with the active address, one of the +primary or secondary addresses chosen by smart relay mechanism. +Table 3. Link Selection + +| SubOpt +| Len +| Overriding Server Identifier +Address +| + + +| 11 +| 4 +| B1 +| B2 +| B3 +| B4 +| + +- **VSS control suboption as suboption 152** - The DHCP server strips out this +suboption when sending the response to the relay, indicating that the DHCP +server used VPN information to allocate IP address. + +- **Circuit ID** - Identifies the circuit, interface or VLAN, on the switch that received the request. + +- **Remote ID** - Identifies the remote host. + + +Note: The DHCP server must be capable of handling VPN identifier information in Option 82. + + + +Direct communication between DHCP client and server may not be possible if they reside in +separate VRFs. The Server identifier override and Link Selection sub-options set the +relay agent to act as the DHCP server, and enable all DHCP communication to flow through +the relay agent. + + +The relay agent adds all the appropriate sub-options, and forwards all request packets, including +renew and release,to the DHCP server. When the relay receives the DHCP server response +messages, EOS removes Option 82 information and forwards the response to the DHCP client +in the client VRF. + + +#### Configuring DHCP Relay + + +The DHCP relay agent information option is inserted in DHCP messages relayed to the DHCP server. +The ip helper-address command enables DHCP relay on an interface +and relays DHCP messages to the specified IPv4 address. + + +**Example** + + +This command enables DHCP relay on the **interface ethernet 1/2**; +and relays DHCP messages to the server at +**1.1.1.1**. +``` +`switch(config)# **interface ethernet 1/2** +switch(config-if-Et1/2)# **ip helper-address 1.1.1.1** +switch(config-if-Et1/2)#` +``` + + +The commands provided in the following examples enable the attachment of VRF-related tags +in the relay agent information option. If both the DHCP client interface and server +interface exist on the same VRF, default or non-default, then EOS does not insert the +VRF-related DHCP relay agent information option. + + +**Examples** + +- This command configures the DHCP relay to add option 82 +information. +``` +`switch(config)# **ip dhcp relay information option**` +``` + +- These commands configures two new VRF instances and assign them Route +Distinguishers +(RDs). +``` +`switch(config)# **vrf instance mtxxg-vrf** +switch(config-vrf-mtxxg-vrf)# **router bgp 50** +switch(config-router-bgp)# **vrf mtxxg-vrf** +switch(config-router-bgp-vrf-mtxxg-vrf)# **rd 5546:5546** +switch(config)# **vrf instance qchyh-vrf** +switch(config-vrf-qchyh-vrf)# **router bgp 50** +switch(config-router-bgp)# **vrf qchyh-vrf** +switch(config-router-bgp-vrf-qchyh-vrf)# **rd 218:218**` +``` + +- This command configures an interface connected to DHCP client in vrf +**mtxxg-vrf** and assigns an IP +address. +``` +`switch(config)# **interface ethernet 9** +switch(config-if-Et9)# **no switchport**` +``` + +- This command configures the DHCP client interface in VRF +**mtxxg-vrf**. +``` +`switch(config-if-Et9)# **vrf mtxxg-vrf** +switch(config-if-Et9)# **ip address 10.10.0.1/16**` +``` + +- This command configures the server interface in VRF +**qchyh-vrf.** +``` +`switch(config-if-Et11)# **vrf qchyh-vrf** +switch(config-if-Et11)# **ip address 10.40.0.1/16**` +``` + +- This command configures a helper address for a DHCP server in VRF +**qchyh-vrf**. +``` +`switch(config-if-Et11)# **ip helper-address 10.40.2.3 vrf qchyh-vrf**` +``` + + +##### Configuring Option 82 + + +Use the following commands to enter Information Option (Option 82) insertion and configure the format of information options: + + +``` +`switch(config)# **dhcp relay** +switch(config-dhcp-relay)# **information option** +switch(config-information-option)#` +``` + + +To specify the format for the **circuit-id encoding**, use the following command: + + +``` +`switch(config-information-option)# **circuit-id encoding (%x | %p)**` +``` + + +The default format uses string denoted by **%p**. Setting the encoding to **%x** enables +hex encoding for the circuit ID. The configured value must be a valid hex number. If not configured, DHCP Relay uses the default format. + + +To specify the format for the **remote-id encoding**, use the following command: + + +``` +`switch(config-information-option)# **remote-id encoding (%x | %p)**` +``` + + +The default format uses string denoted by **%p**. Setting the encoding to **%x** enables +hex encoding for the remote ID. The configured value must be a valid hex number. If not configured, DHCP Relay uses the default format. + + +#### DHCP Relay Global Configuration Mode Show Command + + +**Example** + + +This command displays the VRF specifier for the +server: +``` +`switch# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is enabled +DHCP Smart Relay is disabled +Interface: Ethernet9 +Option 82 Circuit ID: Ethernet9 +DHCP Smart Relay is disabled +DHCP servers: 10.40.2.3 +10.40.2.3:vrf=qchyh-vrf` +``` + + +### DHCP Relay in VXLAN EVPN + + +The ip dhcp relay information option (Global) +command enables the configuration of the DHCP server to uniquely identify +the origin of the request using a source-interface and the helper address. +Configure the source interface with a routable address used by the +DHCP server to uniquely identify the DHCP relay agent that forwarded the +client request. + + +#### Configuring DHCP Relay in VXLAN EVPN (IPv4) + + +Use the following command to enable the DHCP relay information option +(**Option 82**) required to specify +a source interface. + + +``` +`switch(config)# **ip dhcp relay information option**` +``` + + +The following configures a Loopback interface as the source interface. + + +``` +`switch(config)# **interface Loopback1** +switch(config-if-Lo1)# **ip address 1.1.1.1/24**` +``` + + +Use the following commands to configure the Loopback interface as the +specified source interface for the helper address. + + +``` +`switch(config)# **interface vlan100** +switch(config-if-Vl100)# **ip helper-address 10.1.1.4 source-interface Loopback1**` +``` + + +Use the following commands to configure the Loopback interface when the +DHCP server resides in a different VRF +(**red**). The source interface must +be configured in the DHCP server VRF for the command to take effect. + + +``` +`switch(config)# **interface Loopback3** +switch(config-if-Lo3)# **vrf red** +switch(config-if-Lo3)# **ip address 1.1.1.1/24** + +switch(config)# **interface vlan100** +switch(config-if-Vl100)# **ip helper-address 10.1.1.4 vrf red source-interface Loopback3**` +``` + + +The following command disables the use of source interface along with the +helper address. + + +``` +`switch(config)# **interface vlan100** +switch(config-if-Vl100)# **no ip helper-address 10.1.1.4 source-interface Loopback1**` +``` + + +#### Configuring DHCP Relay in VXLAN EVPN (IPv6) + + +Use the following commands to configure a local interface. + + +``` +`switch(config)# **interface Loopback2** +switch(config-if-Vl100)# **ipv6 address 2001::10:20:30:1/128**` +``` + + +Use the following commands to configure the Loopback interface as the +local interface for the helper address. + + +``` +`switch(config)# **interface vlan200** +switch(config-if-Vl200)# **ipv6 dhcp relay destination 2002::10:20:30:2 local-interface Loopback2**` +``` + + +Use the following commands to configure the Loopback interface when the +DHCP server is in a different VRF (**red**). +The local interface must be configured in the DHCP server's VRF for +the command to take effect. + + +``` +`switch(config)# **interface Loopback4** +switch(config-if-Lo4)# **vrf red** +switch(config-if-Lo4)# **ipv6 address 2001::10:20:30:1/128** + +switch(config)# **interface vlan200** +switch(config-if-Vl200)# **ipv6 dhcp relay destination 2002::10:20:30:2 vrf red local-interface Loopback4**` +``` + + +Use the following command to disable the use of local interface along +with the helper address. + + +``` +`switch(config-if-Vl200)# **no ipv6 dhcp relay destination 2002::10:20:30:2 local-interface Loopback4**` +``` + + +The following command displays the status of DHCP relay option +(**Option 82**) and lists the +configured DHCP servers. + + +``` +`switch# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is enabled +DHCP Smart Relay is disabled +Interface: Vlan100 + Option 82 Circuit ID: Vlan100 + DHCP Smart Relay is disabled + DHCP servers: 10.1.1.4 +Interface: Vlan200 + Option 82 Circuit ID: Vlan100 + DHCP Smart Relay is disabled + DHCP servers: 2002::10:20:30:2` +``` + + +## DHCP Snooping with Bridging + + +In this configuration, in addition to sending DHCP packets to relay after +adding information option, the packets can also bridge within the VLAN. In the bridging mode, the +switch intercepts DHCP packets, inserts option-82 if not already present, and bridges the packet +within the VLAN. This mode of DHCP snooping can be configured without DHCP relay +configuration. +Note: EOS supports DHCP Snooping with Bridging on MLAG configurations. + + + +### Configuring DHCP Snooping with Bridging + + +Following are the steps to configure DHCP snooping with bridging: + +- Enable DHCP snooping feature using the ip dhcp snooping +command. +``` +`switch# **ip dhcp snooping**` +``` + +- Enable the insertion of option-82 in DHCP request packets using the ip dhcp snooping information option +command. By default, option-82 is disabled and must be enabled for +DHCP Snooping to be +functional. +``` +`switch# **ip dhcp snooping information option**` +``` + +- Enable DHCP snooping on the corresponding VLANs using the ip dhcp snooping vlan command. By default,EOS +disables DHCP snooping on any +VLAN. +``` +`**switch# ip dhcp snooping vlan**` +``` + +- Set the circuit-id information sent in option-82. By default, EOS sends the +Interface name and VLAN ID. Remote circuit-id contains the MAC address +of the relay +agent. +``` +`switch# **ip dhcp snooping information option circuit-id type 2 format** +%h:%p Hostname and interface name +%p:%v Interface name and VLAN ID` +``` + +- Enable bridging capabilities of DHCP snooping using the ip dhcp snooping bridging command. This +command enables DHCP snooping with or without DHCP relay +configuration. +``` +`switch# **ip dhcp snooping bridging**` +``` + + + + + + + +### DHCP Snooping with Bridging Show Commands + + +The show ip dhcp snooping displays the DHCP snooping with bridging +information. +``` +`switch# **show ip dhcp snooping** +DHCP Snooping is enabled +DHCP Snooping is operational +DHCP Snooping is configured on following VLANs: + 650 +**DHCP Snooping bridging is operational on following VLANs:** + 650 +Insertion of Option-82 is enabled + Circuit-id sub-option Type: 0 + Circuit-id format: Interface name:Vlan ID + Remote-id: 00:1c:73:8d:eb:67 (Switch MAC)` +``` + + +### Troubleshooting + + +- Configure all the needed commands so that DHCP snooping is enabled and operational on all +the VLANs. + +- **show ip dhcp snooping** displays whether the DHCP snooping is +operational or not. + +- **show ip dhcp snooping counters** displays if snooped packets are +getting dropped or not. + +- **show ip dhcp snooping counters debug** displays the reason for +packets getting dropped. +``` +`switch# **show ip dhcp snooping counters debug** +Counter Requests Responses +----------------------------- ----------------- ----------------- +Received 3 2 +Forwarded 3 2 +Dropped - Invalid VlanId 0 0 +Dropped - Parse error 0 0 +Dropped - Invalid Dhcp Optype 0 0 +Dropped - Invalid Info Option 0 0 +Dropped - Snooping disabled 0 0` +``` + +- Check if the packets are hitting the TCAM rule. + +``` +`switch# **show platform trident tcam detail | grep -i dhcp** +DHCP Snooping uses 3 entries. +… +655402 45 hits - DHCP client to relay trap-to-cpu` +``` + + +## TCP MSS Clamping + + +TCP MSS clamping limits the value of the Maximum Segment Size (MSS) in the TCP header of TCP SYN +packets transiting a specified Ethernet or tunnel interface. +Setting the MSS ceiling can avoid IP fragmentation in tunnel +scenarios by ensuring that the MSS is low enough to account for +the extra overhead of GRE and tunnel outer IP headers. TCP MSS +clamping can be used when connecting via GRE to cloud providers +that require asymmetric routing. + + +When MSS clamping is configured on an +interface, if the TCP MSS value in a SYN packet transiting that interface +exceeds the configured ceiling limit it will be overwritten with the +configured limit and the TCP checksum will be recomputed and updated. + + +TCP MSS clamping is handled by default in the software data path, but the process can be +supported through hardware configuration to minimize possible packet loss and a +reduction in the number of TCP sessions which the switch can establish per second. + + +### Cautions + + +*This feature should be used with caution*. When the TCP MSS clamping feature is enabled by +issuing the tcp mss ceiling command +on any routed interface, *all* routed IPv4 TCP SYN +packets (TCP packets with the “SYN” flag set) are sent by +default to the CPU and switched through software, even on +interfaces where no TCP MSS ceiling has been configured, +as long as TCP MSS clamping is enabled. This limits the +number of TCP sessions that can be established through the +switch per second, and, because throughput for software +forwarding is limited, this feature can also cause packet +loss if the rate at which TCP SYN packets are sent to the +CPU exceeds the limits configured in the control-plane +policy map. + + +Packet loss and TCP session reductions +can be minimized by enabling TCP MSS clamping in hardware, but only SYN +packets in which MSS is the first TCP option are clamped in the hardware +data path; other TCP SYN packets are still switched through software. + + +To disable MSS clamping, the MSS ceiling must be removed from every interface on which it has +been configured by issuing the **no tcp mss +ceiling** command on each configured +interface. + + +### Enabling TCP MSS Clamping + + +There is no global configuration to enable TCP MSS clamping. It is enabled as soon as an MSS ceiling is configured on at least one interface. + + +### Disabling TCP MSS Clamping + + +To disable TCP MSS clamping, the MSS ceiling configuration must be removed from every interface +by using the **no** or **default** form of +the tcp mss ceiling command on every interface where a ceiling +has been configured. + + +### Configuring the TCP MSS Ceiling on an Interface + + +The TCP MSS ceiling limit is set on an interface using the tcp mss ceiling +command. This also enables TCP MSS clamping on the switch as a whole. + + +Note: Configuring a TCP MSS ceiling on any interface enables TCP MSS clamping on the switch as a +whole. Without hardware support, clamping routes all TCP SYN packets through +software, even on interfaces where no TCP MSS ceiling has been configured. +This significantly limits the number of TCP sessions the switch can +establish per second, and can potentially cause packet loss if the CPU +traffic exceeds control plane policy limits. +On Sand platform switches (Qumran-MX, Qumran-AX, Jericho, Jericho+), the following limitations +apply: + +- This command works only on egress. + +- TCP MSS ceiling is supported on IPv4 unicast packets entering +the switch; the configuration has no effect on GRE transit +packets. + +- The feature is supported only on IPv4 routed interfaces. It is +not supported on L2 (switchport) interfaces or IPv6 routed +interfaces. + +- The feature is not supported for IPv6 packets even if they are +going to be tunneled over an IPv4 GRE tunnel. + +- The feature is not supported on VXLAN, loopback or management +interfaces. + +- The feature is only supported on IPv4 unicast packets entering +the switch. The configuration has no effect on GRE transit +packets or GRE decap, even if the egress interface has a TCP +MSS ceiling configured. + + +**Example** + + +- These commands configure **interface ethernet 5** +as a routed port, then specify a maximum MSS ceiling value of +**1458** bytes for TCP SYN +packets exiting that +port. +``` +`switch(config)# **interface ethernet 5** +switch(config-if-Et5)# **no switchport** +switch(config-if-Et5)# **tcp mss ceiling ipv4 1458 egress** +switch(config-if-Et5)#` +``` + +- These commands apply TCP MSS clamping at **1436** +bytes in the egress direction for IPv6 +packets: +``` +`switch(config)# **interface ethernet 26** +switch(config)# **tcp mss ceiling ipv6 1436 egress**` +``` + +- These commands apply TCP MSS clamping at **1476** +bytes for IPv4 packets and **1436** bytes for +IPv6 packets in egress +direction: +``` +`switch(config)# **interface ethernet 27** +switch(config)# **tcp mss ceiling ipv4 1476 ipv6 1436 egress**` +``` + + +### Verifying the TCP MSS Clamping + + +If TCP MSS ceiling is configured on an interface and if the command **show cpu +counters queue | nz** is incrementing in +**CoppSystemL3Ttl1IpOptUcast** +field for Tcp packet with Syn flag, then TCP MSS clamping is being performed +in Software. + + +``` +`switch# **show cpu counters queue | nz** +Fap0.1: +CoPP Class Queue Pkts Octets DropPkts DropOctets +Aggregate +------------------------------------------------------------------------------ +CoppSystemL3Ttl1IpOptUcast TC0 1 82 0 0` +``` + + +### Configuring TCP MSS Clamping + + +#### Interface Configuration + + +You can specify the TCP MSS value under the ***interface configuration +mode***. The command syntax is shown below: + + +**tcp mss ceiling** [ipv4 | +ipv6] **64-65515** +egress + + +The keyword **egress** specifies that the MSS clamping is +applied on packets transmitted out on the interface in egress direction. + + +The following example applies TCP MSS clamping at **1436** +bytes in the egress direction for IPv4 +packets: +``` +`switch(config)# **interface ethernet 25** +switch(config)#**tcp mss ceiling ipv4 1436 egress**` +``` + + +the following example applies TCP MSS clamping at **1436** +bytes in the egress direction for IPv6 +packets: +``` +`switch(config)# **interface ethernet 26** +switch(config)# **tcp mss ceiling ipv6 1436 egress**` +``` + + +The following example applies TCP MSS clamping at **1476** +bytes for IPv4 packets and **1436** bytes for IPv6 packets in +egress +direction: +``` +`switch(config)# **interface ethernet 27** +switch(config)# **tcp mss ceiling ipv4 1476 ipv6 1436 egress**` +``` + + +#### Hardware TCP MSS Clamping Configuration + + +Hardware MSS clamping requires the system TCAM profile to have TCP MSS clamping +enabled. You can achieve this by creating a user defined TCAM profile as described +below. The [User Defined PMF Profiles - TOI](https://www.arista.com/en/support/toi/eos-4-20-5f/13977-user-defined-pmf-profile) provides +general guidelines on how to create and configure TCAM profiles. + + +The system TCAM profile must have the feature **tcp-mss-ceiling +ip** in it in order to use hardware MSS clamping. This is +applicable regardless of whether the TCAM profile is copied from an existing profile +or created from scratch. + + +**Step 1: Create the user defined TCAM profile** + + +The following example demonstrates copying any source profile and adding the feature +**tcp-mss-ceiling ip**. In this example, the profile +name is **Pro1** and the source profile name is +**Source1**. +``` +`(config)# **hardware tcam** +(config-hw-tcam)# **profile Pro1 copy Source1** +(config-hw-tcam-profile-Pro1)# **feature tcp-mss-ceiling ip copy system-feature-source-profile**` +``` + + +TCP MSS clamping is supported only for IPv4 routed packets. Set the packet type for +the feature as follows. This is optional when using **copy +system-feature-source-profile**. In this example, the system +profile name is **Pro1** and the feature name is +**Source1**. +``` +`(config-hw-tcam-profile-Pro1-feature-Source1)# **packet ipv4 forwarding routed**` +``` + + +Set the key size limit to **160**. This is also optional when +the feature is copied from **system-feature-source-profile**. +In this example, the system profile name is **Pro1** and the +feature name is +**Source1**. +``` +`(config-hw-tcam-profile-Pro1-feature-Source1)# **key size limit 160**` +``` + + +Removing unused features to ensure that the TCP MSS TCAM DB is allocated. In this +example, the system profile name is **Pro1** and the feature +name is +**Source1**. +``` +`(config-hw-tcam-profile-Pro1-feature-Source1)# **exit** +(config-hw-tcam-profile-Pro1)# **no feature mirror ip** +(config-hw-tcam-profile-Pro1)# **no feature acl port mac**` +``` + + +**Step 2: Apply the user defined TCAM profile to the system.** + + +The following example sets the profile as the system profile under the *hardware +tcam* mode. In this example, the system profile name is +**red**. +``` +`(config-hw-tcam)# **system profile red**` +``` + + +When the system TCAM profile is changed, it is expected that some agents will +restart. Also it might be necessary to remove some unused features from the TCAM +profile to ensure that the TCP MSS feature gets allocated a TCAM DB. For more +information about configuring TCAM profiles, refer to [User Defined PMF Profiles](https://www.arista.com/en/support/toi/eos-4-20-5f/13977-user-defined-pmf-profile). + + +Note: The hardware clamping only works for TCP packets with MSS as the first TCP option. +Packets where MSS is not the first TCP option are still trapped to CPU for clamping +in software even if the **feature tcp-mss-ceiling** is +configured in the system TCAM profile. + + +#### Backward Compatibility + + +The **tunnel mss ceiling** command which provides the same +functionality is deprecated with the introduction of **tcp mss +ceiling** command. The configuration option **tunnel +mss ceiling** was available only on GRE tunnel interfaces, while +**tcp mss ceiling** is supported on other routed IPv4 +interfaces as well. + + +### TCP MSS Clamping Limitations + + +- The TCP-MSS Clamping is not supported on L2 (switchport ) interfaces. + +- The TCP-MSS Clamping is NOT supported on VXLAN, Loopback and Management +interfaces. + +- The TCP-MSS Clamping is supported only in the Egress direction. + +- The TCP-MSS Clamping is only supported on unicast routed packets +entering the switch. The configuration has no effect on GRE transit +packets and GRE decap case, even if the Egress interface has TCP MSS +ceiling configured. + + +**Software TCP MSS Clamping Limitations** + + +- Once the TCP-MSS Clamping is enabled, all routed TCP-SYN packets will be +software switched, even on interfaces where there is no TCP-MSS +ceiling configuration. + +- TCP SYN packets could get dropped under high CPU usage conditions or due +to DOS attack protection mechanisms such as PDP/CoPP. These factors +could limit the TCP connection establishment rate, i.e new TCP +sessions established per second through the switch. + + +**Hardware MSS Clamping Limitations** + + +- Hardware TCP-MSS clamping is not supported with host routes when the +clamping is applied on a non-tunnel interface. This limitation does +not apply to GRE tunnel interfaces. + +- TCP SYN packets where TCP-MSS is not the first TCP option are trapped to +CPU for MSS adjustment even in hardware MSS clamping mode. + +- Hardware TCP-MSS clamping is not supported for IPv6 packets. + + +### Configuring Hardware Support for TCP MSS Clamping + + +TCP MSS clamping can be supported +in hardware, but some packets are still routed through the software data +path, and an MSS ceiling value must be configured on each interface where +clamping is to be applied. + + +Hardware support for clamping is accomplished through the use of a user-defined TCAM profile. The +TCAM profile can be created from scratch or copied +from an existing profile, but in either case it must +include the **tcp-mss-ceiling +ip** feature. + + +#### Guidelines + + +- When the system TCAM profile is changed, some agents will restart. + +- To ensure that the TCP MSS feature is allocated a TCAM DB, it may be necessary to remove +some unused features from the TCAM profile. + +- Hardware TCP MSS clamping only works for TCP packets with MSS as the first TCP option. +Other TCP SYN packets are still trapped to the CPU for clamping in software. + +- Hardware TCP MSS clamping is not supported with host routes when the clamping is applied +on a non-tunnel interface. This limitation does not apply to GRE tunnel interfaces. + +- The maximum MSS ceiling limit with hardware MSS clamping is 32727 even though the CLI +allows configuration of much larger values. + +- For more information on the creation of user-defined TCAM profiles, see [https://www.arista.com/en/support/toi/eos-4-20-5f/13977-user-defined-pmf-profile](https://www.arista.com/en/support/toi/eos-4-20-5f/13977-user-defined-pmf-profile). + + +To configure hardware support for TCP MSS clamping, create a TCAM profile that includes the tcp mss ceiling feature, then apply it to the system. + + +#### Creating the TCAM Profile + + +A TCAM profile that supports +TCP MSS clamping can be created from scratch, or the feature can be added +to a copy of the default TCAM profile. When creating a profile from scratch, +care must be taken to ensure that all needed TCAM features are included +in the profile. + + +##### Modifying a Copy of the Default TCAM Profile + + +The following commands create a copy of the default TCAM profile, name it +**tcp-mss-clamping**, and +configure it to enable MSS clamping in hardware, then remove some +unused features included in the default profile to ensure that there +are sufficient TCAM resources for the clamping feature. + + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile tcp-mss-clamping copy default** +switch(config-hw-tcam-profile-tcp-mss-clampingl)# **feature tcp-mss-ceiling ip copy system-feature-source-profile** +switch(config-hw-tcam-profile-tcp-mss-clamping-feature-tcp-mss-ceiling)# **key size limit 160** +switch(config-hw-tcam-profile-tcp-mss-clamping-feature-tcp-mss-ceiling)# **packet ipv4 forwarding routed** +switch(config-hw-tcam-profile-tcp-mss-clamping-feature-tcp-mss-ceiling)# **exit** + +switch(config-hw-tcam-profile-tcp-mss-clamping)# **no feature mirror ip** +switch(config-hw-tcam-profile-tcp-mss-clamping)# **no feature acl port mac** +switch(config-hw-tcam-profile-tcp-mss-clampingl)# **exit** + +switch(config-hw-tcam)# **exit** + +switch(config)#` +``` + + +#### Applying the TCAM Profile to the System + + +The following commands enter Hardware TCAM Configuration Mode and set the +**tcp-mss-clamping** profile as the +system profile. + + +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **system profile tcp-mss-clamping** +switch(config-hw-tcam)#` +``` + + +#### Verifying the TCAM Profile Configuration + + +The following command displays +hardware TCAM profile information to verify that the user-defined TCAM +profile has been applied correctly. + + +``` +`switch(config)# **show hardware tcam profile** + +Configuration Status +FixedSystem tcp-mss-clamping tcp-mss-clamping + +switch(config)#` +``` + + +## IPv4 GRE Tunneling + + +GRE tunneling supports the +forwarding over IPv4 GRE tunnel interfaces. The GRE tunnel interfaces +act as a logical interface that performs GRE encapsulation or decapsulation. + + +Note: The forwarding over GRE tunnel interface on DCS-7500R +is supported only if all the line cards on the system have Jericho +family chip-set. + + +### Configuring GRE Tunneling Interface + + +#### On a Local Arista Switch + + +``` +`switch(config)# **ip routing** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel mode gre** +switch(config-if-Tu10)# **ip address 192.168.1.1/24** +switch(config-if-Tu10)# **tunnel source 10.1.1.1** +switch(config-if-Tu10)# **tunnel destination 10.1.1.2** +switch(config-if-Tu10)# **tunnel path-mtu-discovery** +switch(config-if-Tu10)# **tunnel tos 10** +switch(config-if-Tu10)# **tunnel ttl 10**` +``` + + +#### On a Remote Arista Switch + + +``` +`switch(config)# **ip routing** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel mode gre** +switch(config-if-Tu10)# **ip address 192.168.1.2/24** +switch(config-if-Tu10)# **tunnel source 10.1.1.2** +switch(config-if-Tu10)# **tunnel destination 10.1.1.1** +switch(config-if-Tu10)# **tunnel path-mtu-discovery** +switch(config-if-Tu10)# **tunnel tos 10** +switch(config-if-Tu10)# **tunnel ttl 10**` +``` + + +#### Alternative Configuration for Tunnel Source IPv4 Address + + +``` +`switch(config)# **interface Loopback 10** +switch(config-if-Lo10)# **ip add 10.1.1.1/32** +switch(config-if-Lo10)# **exit** + +switch(config)# **conf terminal** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel source interface Loopback 10**` +``` + + +#### Configuration for Adding an IPv4 Route over the GRE Tunnel +Interface + + +``` +`switch(config)# **ip route 192.168.100.0/24 Tunnel 10**` +``` + + +#### Tunnel Mode + + +Tunnel Mode needs to be configured as gre, for GRE tunnel interface. Default value is +**tunnel mode gre**. + + +#### IP Address + + +Configures the IP address for the GRE +tunnel interface. The IP address can be used for routing over the GRE +tunnel interface. The configured subnet is reachable over the GRE tunnel +interface and the packets to the subnet are encapsulated in the GRE header. + + +#### Tunnel Source + + +Specifies the source IP address for the +outer IPv4 encapsulation header for packets going over the GRE tunnel +interface. The tunnel source IPv4 address should be a valid local IPv4 +address configured on the Arista Switch. The tunnel source can also be +specified as any routed interface on the Arista Switch. The routed interface’s +IPv4 address is assigned as the tunnel source IPv4 address. + + +#### Tunnel Destination + + +Specifies the destination IPv4 address +for the outer IPv4 encapsulation header for packets going over the GRE +tunnel interface. The tunnel destination IPv4 should be reachable from +the Arista Switch. + + +#### Tunnel Path Mtu Discovery + + +Specifies if the “Do not Fragment” +flag needs to set in the outer IPv4 encapsulation header for packets +going over the GRE tunnel interface. + + +#### Tunnel TOS + + +Specifies the Tunnel Type of Service (ToS) value to be assigned to the outer IPv4 encapsulation +header for packets going over the GRE tunnel interface. Default TOS +value of **0** will be assigned if tunnel TOS +is not configured. + + +#### Tunnel TTL + + +Specifies the TTL value to the assigned +to the outer IPv4 encapsulation header for packet going over the GRE +tunnel interface. The TTL value is copied from the inner IPv4 header +if tunnel TTL is not configured. The tunnel TTL configuration requires +the tunnel Path MTU Discovery to be configured. + + +### Displaying GRE tunnel Information + + +- The following commands +display the tunnel configuration. + + +``` +`switch# **show interfaces Tunnel 10** +Tunnel10 is up, line protocol is up (connected) + Hardware is Tunnel, address is 0a01.0101.0800 + Internet address is 192.168.1.1/24 + Broadcast address is 255.255.255.255 + Tunnel source 10.1.1.1, destination 10.1.1.2 + Tunnel protocol/transport GRE/IP + Key disabled, sequencing disabled + Checksumming of packets disabled + Tunnel TTL 10, Hardware forwarding enabled + Tunnel TOS 10 + Path MTU Discovery + Tunnel transport MTU 1476 bytes + Up 3 seconds` +``` + +- ``` +`switch# **show gre tunnel static** + +Name Index Source Destination Nexthop Interface +-------- ------ -------- ------------ -------- ----------- +Tunnel10 10 10.1.1.1 10.1.1.2 10.6.1.2 Ethernet6/1 + +switch# **show tunnel fib static interface gre 10** +Type 'Static Interface', index 10, forwarding Primary + via 10.6.1.2, 'Ethernet6/1' + GRE, destination 10.1.1.2, source 10.1.1.1, ttl 10, tos 0xa` +``` + +- Use the **show platform fap tcam summary** command +to verify if the TCAM bank is allocated for GRE packet termination +lookup. +``` +`switch# **show platform fap tcam summary** + + Tcam Allocation (Jericho0) +Bank Used By Reserved By +---------- ------------ ----------- +0 dbGreTunnel -` +``` + +- Use the **show ip route** command to verify if the +routes over tunnel is setup +properly. +``` +`switch# **show ip route** + +VRF: default +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route + +Gateway of last resort is not set + + C 192.168.1.0/24 is directly connected, Tunnel10, Static Interface GRE tunnel +index 10, dst 10.1.1.2, src 10.1.1.1, TTL 10, TOS 10 + S 192.168.100.0/24 is directly connected, Tunnel10, Static Interface GRE +tunnel index 10, dst 10.1.1.2, src 10.1.1.1, TTL 10, TOS 10` +``` + +- The following commands are used to verify the tunnel encapsulation +programming. +``` +`switch# **show platform fap eedb ip-tunnel gre interface Tunnel 10** + +------------------------------------------------------------------------------- +| Jericho0 | +| GRE Tunnel Egress Encapsulation DB +| +|-----------------------------------------------------------------------------| +| Bank/ | OutLIF | Next | VSI | Encap | TOS | TTL | Source | Destination| +OamLIF| OutLIF | Drop| +| Offset| | OutLIF | LSB | Mode | | | IP | IP | Set +| Profile| | +|-----------------------------------------------------------------------------| +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 10 | 10 | 10.1.1.1 | 10.1.1.2 | No +| 0 | No | + +switch# **show platform fap eedb ip-tunnel** + +------------------------------------------------------------------------------- +| Jericho0 | +| IP Tunnel Egress Encapsulation DB +| +|-----------------------------------------------------------------------------| +| Bank/ | OutLIF | Next | VSI | Encap| TOS | TTL | Src | Destination | OamLIF +| OutLIF | Drop| +| Offset| | OutLIF | LSB | Mode | Idx | Idx | Idx | IP | Set | +Profile | | +|-----------------------------------------------------------------------------| +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 9 | 0 | 0 | 10.1.1.2 | No | +0 | No |` +``` + + +## GRE Tunneling Support + + +GRE tunneling supports the forwarding over IPv4 GRE tunnel interfaces. The GRE tunnel +interfaces act as a logical interface that performs GRE encapsulation or decapsulation. +A maximum of 256 GRE-tunnel interfaces are supported. + + +Note: GRE keepalives are not supported. +To configure a local Arista switch on a GRE-tunnel interface, consider the following an +example. +``` +`switch(config)# **ip routing** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel mode gre** +switch(config-if-Tu10)# **ip address 192.168.1.1/24** +switch(config-if-Tu10)# **tunnel source 10.1.1.1** +switch(config-if-Tu10)# **tunnel destination 10.1.1.2** +switch(config-if-Tu10)# **tunnel path-mtu-discovery** +switch(config-if-Tu10)# **tunnel tos 10** +switch(config-if-Tu10)# **tunnel ttl 10**` +``` + + +To configure a remote Arista switch on a GRE-tunnel interface, consider the following an +example. +``` +`switch(config)# **ip routing** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel mode gre** +switch(config-if-Tu10)# **ip address 192.168.1.2/24** +switch(config-if-Tu10)# **tunnel source 10.1.1.2** +switch(config-if-Tu10)# **tunnel destination 10.1.1.1underlayVrf** +switch(config-if-Tu10)# **tunnel path-mtu-discovery** +switch(config-if-Tu10)# **tunnel tos 10** +switch(config-if-Tu10)# **tunnel ttl 10**` +``` + + +To add a IPv4 route over the GRE-tunnel interface, configure simulare to the following. + +``` +`switch(config)# **ip route 192.168.100.0/24 Tunnel 10**` +``` + + +Note: IPv6 GRE-Tunnels are not supported. This is only a data-plane limitation whereas IS-IS +IPv6 (such as control-plane) can still work. + +Use the **show interfaces Tunnel** command to display the interface +tunnel. + + +``` +`switch(config)# **show interfaces Tunnel 10** +Tunnel10 is up, line protocol is up (connected) + Hardware is Tunnel, address is 0a01.0101.0800 + Internet address is 192.168.1.1/24 + Broadcast address is 255.255.255.255 + Tunnel source 10.1.1.1, destination 10.1.1.2 + Tunnel protocol/transport GRE/IP + Key disabled, sequencing disabled + Checksumming of packets disabled + Tunnel TTL 10, Hardware forwarding enabled + Tunnel TOS 10 + Path MTU Discovery + Tunnel transport MTU 1476 bytes + Tunnel underlay VRF "underlayVrf" + Up 3 seconds` +``` + + +Use the **show gre tunnel static** command to display a static +interface tunnel. + + +``` +`switch(config)#**show gre tunnel static** +Name Index Source Destination Nexthop Interface +----------- ------- ----------- ------------- ---------- ---------- +Tunnel10 10 10.1.1.1 10.1.1.2 10.6.1.2 Ethernet6/1` +``` + + +Use the **show tunnel fib static interface** command to display a +fib static interface tunnel. + + +``` +`switch(config)# **show tunnel fib static interface gre 10** +Type 'Static Interface', index 10, forwarding Primary + via 10.6.1.2, 'Ethernet6/1' + GRE, destination 10.1.1.2, source 10.1.1.1, ttl 10, tos 0xa` +``` + + +### Tunnel Mode + + +Tunnel mode is **GRE** for a GRE-tunnel interface which is also +the default tunnel mode. + + +### IP address + + +Use this IP address for routing over the GRE-tunnel interface. The configuration +subnet is reachable over the GRE-tunnel interface, and the packets to the subnet is +encapsulated with the GRE header. + + +### Tunnel Source + + +Specifies the source IP address for the encapsulating IPv4 header of a packet going +over the GRE-tunnel interface. The tunnel source IPv4 address is a valid local IPv4 +address configured on the Arista switch. It uses any route interface on the Arista +switch. The routed interfaces IPv4 address assigns the tunnel source IPv4 address. +Maximum of 16 unique tunnel source IPv4 addresses are supported across all +GRE-tunnel interfaces. + + +The following is an example of an interface as a Tunnel source. + + +``` +`switch(config)# **interface Loopback 10** +switch(config-if-Lo10)# **ip add 10.1.1.1/32** +switch(config-if-Lo10)# **exit** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel source interface Loopback 10**` +``` + + +Note: Coexistence of GRE-tunnel interfaces and Decap-Groups is not supported. + +Note: Coexistence of GRE-tunnel interfaces and VXLAN is not supported. + +Note: GRE-tunnel is not supported with MLAG configuration. + +### Tunnel Destination + + +Specifies the destination IPv4 address for the encapsulating IPv4 header of a packet +going over the GRE-tunnel interface. The tunnel destination IPv4 is reachable from +the Arista switch. + + +Note:Multicast traffic over GRE-Tunnels is not supported. + + +### Tunnel Path MTU Discovery + + +The tunnel path Maximum Transmition Unit (MTU) Discovery specifies if the Don't +Fragment (DF) flag needs to be set in the encapsulating IPv4 header of a packet +going over the GRE-Tunnel interface. MTU configuration on the GRE-tunnel interface +is used by control plane protocols and not enforced in hardware for packets +forwarded in data-plane. The MTU change on the tunnel interface does not take effect +until the tunnel interface is flapped. + + +### Tunnel TOS + + +The Tunnel TOS specifies the TOS value to be set in the encapsulating IPv4 header of +a packet going over the GRE-Tunnel interface. The default value of +**0** is assigned if tunnel TOS is not configured. +Maximum of seven unique tunnel TOS values are supported across all GRE-tunnel +interfaces. + + +### Tunnel TTL + + +The Tunnel TTL specifies the TTL value to be set in the encapsulating IPv4 header of +a packet going over the GRE-tunnel interface. The TTL value is copied from the inner +IPv4 header if tunnel TTL is not configured. The tunnel TTL configuration requires +the tunnel path MTU discovery to be configured. Maximum of four unique tunnel TTL +values are supported across all GRE-tunnel interfaces. + + +### VRF Forwarding (Overlay VRF) + + + The following configuration is an example of overlay VRF, for a GRE tunnel +interface. +``` +`switch(config)# **vrf instance overlayVrf** +switch(config)# **ip routing vrf overlayVrf** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **vrf overlayVrf**` +``` + + +Note:Both the tunnels source and destination address must be in the underlay VRF. GRE +key forwarding is not supported. +The following is an example of a static route configuration, with an overlay +VRF. +``` +`switch(config)# **ip route vrf overlayVrf 7.7.7.0/24 192.168.1.2**` +``` + + +### VRF Forwarding (Underlay VRF) + + +The following is an configuration example of a underlay VRF for a GRE tunnel +interface. +``` +`switch(config)# **vrf instance underlayVrf** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel underlay vrf underlayVrf**` +``` + + +### TCAM Bank Allocation + + +Note: Command to check if Ternary Content-Addressable Memory (TCAM) bank is allocated for +GRE packet termination lookup. + +``` +`switch(config)# **show platform fap tcam summary** + + Tcam Allocation (Jericho0) +Bank Used By Reserved By +---------- ------------------------- ----------- +0 dbGreTunnel -` +``` + + +PBR is not supported on GRE terminated packets. + + +#### Verifing Tunnel Routes + + +Use the **show ip route** command to check if the routes over +tunnel is setup +correctly. +``` +`switch(config)# **show ip route** +VRF: default +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route + +Gateway of last resort is not set + + C 192.168.1.0/24 is directly connected, Tunnel10, Static Interface GRE-Tunnel index 10, dst 10.1.1.2, src 10.1.1.1, TTL 10, TOS 10 + S 192.168.100.0/24 is directly connected, Tunnel10, Static Interface GRE-Tunnel index 10, dst 10.1.1.2, src 10.1.1.1, TTL 10, TOS 10` +``` + + +#### Verifing Tunnel Encap + + +Use the **show platform fap eedb ip-tunnel gre interface +Tunnel** command to check the tunnel encap programming on the GRE +interface. + + +``` +`switch(config)# **show platform fap eedb ip-tunnel gre interface Tunnel 10** + ------------------------------------------------------------------------------------------------------------------ +| Jericho0 | +| GRE Tunnel Egress Encapsulation DB | +|------------------------------------------------------------------------------------------------------------------| +| Bank/ | OutLIF | Next | VSI | Encap | TOS | TTL | Source | Destination | OamLIF | OutLIF | Drop | +| Offset | | OutLIF | LSB | Mode | | | IP | IP | Set | Profile | | +|------------------------------------------------------------------------------------------------------------------| +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 10 | 10 | 10.1.1.1 | 10.1.1.2 | No | 0 | No |` +``` + + +Use the **show platform fap eedb ip-tunnel** command to check +the tunnel encap programming on the IP-tunnel interface. + + +``` +`switch(config)# **show platform fap eedb ip-tunnel** + ----------------------------------------------------------------------------------------------------------- +| Jericho0 | +| IP Tunnel Egress Encapsulation DB | +|-----------------------------------------------------------------------------------------------------------| +| Bank/ | OutLIF | Next | VSI | Encap | TOS | TTL | Src | Destination | OamLIF | OutLIF | Drop | +| Offset | | OutLIF | LSB | Mode | Idx | Idx | Idx | IP | Set | Profile | | +|-----------------------------------------------------------------------------------------------------------| +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 9 | 0 | 0 | 10.1.1.2 | No | 0 | No |` +``` + + +#### Verifing Tunnel VRF + + +Use the **show ip interface tunnel** command to check the +overlay VRF. + + +``` +`switch(config)# **show ip interface tunnel 10** +Tunnel10 is up, line protocol is up (connected) + Internet address is 192.168.1.1/24 + Broadcast address is 255.255.255.255 + IPv6 Interface Forwarding : None + Proxy-ARP is disabled + Local Proxy-ARP is disabled + Gratuitous ARP is ignored + IP MTU 1476 bytes + VPN Routing/Forwarding "overlayVrf" + +switch(config)# **show ip route vrf overlayVrf** + +VRF: overlayVrf +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route, L - VRF Leaked + +Gateway of last resort is not set + + C 1.1.1.0/24 is directly connected, Ethernet1 + S 7.7.7.0/24 [1/0] via 192.168.1.2, Tunnel10, Static Interface GRE-Tunnel index 10, dst 10.1.1.2, src 10.1.1.1 + C 192.168.1.0/24 is directly connected, Tunnel10, Static Interface GRE-Tunnel index 10, dst 10.1.1.2, src 10.1.1.1` +``` + + +#### Tunnel underlay VRF Configuration + + +Use the **show interfaces Tunnel** command to check the +underlay +VRF. +``` +`switch(config)# **show interfaces Tunnel 10** +Tunnel10 is up, line protocol is up (connected) + Hardware is Tunnel, address is 0a01.0101.0800 + Internet address is 192.168.1.1/24 + Broadcast address is 255.255.255.255 + Tunnel source 10.1.1.1, destination 10.1.1.2 + Tunnel protocol/transport GRE/IP + Key disabled, sequencing disabled + Checksumming of packets disabled + Tunnel TTL 10, Hardware forwarding enabled + Tunnel TOS 10 + Path MTU Discovery + Tunnel transport MTU 1476 bytes + Tunnel underlay VRF "underlayVrf" + Up 3 seconds` +``` + + +Use the **show ip route vrf underlayVrf** command to check the +IP route VFR underlayVRF. + + +``` +`switch(config)# **show ip route vrf underlayVrf** +VRF: underlayVrf +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B - BGP, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route, L - VRF Leaked, + +Gateway of last resort is not set + + C 10.1.1.0/24 is directly connected, Ethernet1` +``` + + +## BfRuntime to Use Non-default VRFs + + +Use the following commands to configure the VRF for the BfRuntime connection for the management +interface on the switches that support it. The management interface may be configured on a +different VRF from the default one. + + +### **Configuring BfRuntime to Use Non-default VRFs** + + +The **platform barefoot bfrt vrf** command configures the forwarding plane +agent to restart and listen on the configured VRFs for +connections. +``` +`switch(config)# **platform barefoot bfrt vrf **` +``` + + +If no VRF specified, the configuration uses the default VRF for the IP and port for the the +BfRuntime server. + + +The following displays a typical +configuration. +``` +`switch(config)# **vrf instance management** +switch(config-vrf-management)# **exit** +switch(config)# **platform barefoot bfrt 0.0.0.0 50052** +switch(config)# **platform barefoot bfrt vrf ** +switch(config)# **int management1** +switch(config-if-Ma1)# **vrf management**` +``` + + +### Displaying BfRuntime Configuration + + +The **show platform barefoot bfrt** command displays the existing +configuration for the BfRuntime +server. +``` +`switch# **show platform barefoot bfrt** +Namespace: management +FixedSystem:0.0.0.0:50052` +``` + + +## IPv4 Commands + + +### Cluster Load Balancing Commands + + +- load-balance cluster + +- destination grouping + +- flow + +- flow source learning + +- forwarding type + +- load-balance method + +- port group host + +- balance factor + +- flow exhaustion + +- flow limit + +- flow warning + +- member Ethernet + + +### IP Routing and Address +Commands + + +- agent SandL3Unicast terminate + +- clear arp inspection statistics + +- clear snapshot counters ecmp + +- compress + +- ip arp inspection limit + +- ip arp inspection logging + +- ip arp inspection trust + +- ip arp inspection vlan + +- ip hardware fib ecmp resilience + +- ip hardware fib load-balance distribution + +- ip hardware fib optimize + +- ip hardware fib next-hop resource optimization + +- ip icmp redirect + +- ip load-sharing + +- ip route + +- ip routing + +- ip source binding + +- ip verify + +- ip verify source + +- ipv4 routable +240.0.0.0/4 + +- rib fib policy + +- show dhcp server + +- show hardware capacity + +- show hardware resource DlbEcmpGroupTable agent * + +- show ip + +- show ip arp inspection vlan + +- show ip arp inspection statistics + +- show ip hardware fib +summary + +- show hardware resource l3 summary + +- show ip interface + +- show ip interface brief + +- show ip route + +- show ip route age + +- show ip route gateway + +- show ip route host + +- show ip route match tag + +- show ip route summary + +- show ip verify source + +- show platform arad ip route + +- show platform arad ip route summary + +- show rib route ip + +- show rib route fib policy excluded + +- show rib route summary + +- show routing-context vrf + +- show snapshot counters ecmp history + +- show vrf + +- start snapshot counters + +- tcp mss ceiling + + +### IPv4 DHCP Relay Commands + + +- clear ip dhcp relay counters + +- dhcp relay + +- ip dhcp relay all-subnets + +- ip dhcp relay all-subnets default + +- ip dhcp relay always-on + +- ip dhcp relay information option (Global) + +- ip dhcp relay information option circuit-id + +- ip helper-address + +- show ip dhcp relay + +- show ip dhcp relay counters + + +### DHCP Server Configuration Commands + + +- dhcp server + +- dhcp server client + +- dhcp server debug + +- dhcp server dns + +- dhcp server lease + +- dhcp server option + +- dhcp server private-option + +- dhcp server subnet + +- dhcp server subnet client + +- dhcp server tftp + +- dhcp server vendor-option + +- dhcp server vendor-option ipv4 sub-option + +- show dhcp server + +- show dhcp server leases + + +### IPv4 DHCP Snooping Commands + + +- clear ip dhcp snooping counters + +- ip dhcp snooping + +- ip dhcp snooping +bridging + +- ip dhcp snooping information option + +- ip dhcp snooping vlan + +- show ip dhcp snooping + +- show ip dhcp snooping counters + +- show ip dhcp snooping hardware + + +### IPv4 Multicast Counters Commands + + +- clear ip multicast count + +- ip multicast count + + +### ARP Table Commands + + +- arp + +- arp aging timeout + +- arp cache persistent + +- arp gratuitous accept + +- arp proxy max-delay + +- clear arp-cache + +- clear arp + +- ip local-proxy-arp + +- ip proxy-arp + +- show arp + +- show ip arp + + +### VRF Commands + + +- cli vrf + +- description (VRF) + +- platform barefoot bfrt vrf + +- show platform barefoot bfrt + +- show routing-context vrf + +- show vrf + +- vrf (Interface mode) + +- vrf instance + + +### Trident Forwarding Table Commands + + +- platform trident forwarding-table partition + +- platform trident routing-table partition + +- show platform trident forwarding-table partition + +- show platform trident l3 shadow dlb-ecmp-group-control + + +### IPv4 GRE Tunneling Commands + + +- interface tunnel + +- show interface tunnel + +- show platform fap eedb ip-tunnel gre interface tunnel + +- show platform fap tcam summary + +- show tunnel fib static interface gre + +- tunnel + + +### Dynamic Load Balancing + + +- ip hardware fib ecmp resilience + +- ip hardware fib load-balance distribution + +- show hardware resource DlbEcmpGroupTable agent * + +- show platform trident l3 shadow dlb-ecmp-group-control + + +### agent SandL3Unicast terminate + + +The **agent SandL3Unicast terminate** command restarts the +platform Layer 3 agent to ensure optimized IPv4 routes. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**agent SandL3Unicast terminate** + + +**Related Commands** + + +- ip hardware fib optimize - Enables IPv4 route +scale. + +- show platform arad ip route -Displays resources +for all IPv4 routes in hardware. Routes that use the additional hardware +resources will appear with an asterisk. + +- show platform arad ip route summary -Displays +hardware resource usage of IPv4 routes. + + +**Example** + + +This configuration command restarts the platform Layer 3 agent to ensure optimized +IPv4. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 routes and +re-adds them to the hardware. + + +### arp + + +The **arp** command adds a static entry to an Address +Resolution Protocol (ARP) cache. The switch uses ARP cache entries to correlate +32-bit IP addresses to 48-bit hardware addresses. + + +The **no arp** and **default arp** +commands remove the ARP cache entry with the specified IP address. When multiple +VRFs contain ARP cache entries for identical IP addresses, each entry can only be +removed individually. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**arp [vrf_instance] ipv4_addr +mac_addr +arpa** + + +**no arp [vrf_instance] +ipv4_addr** + + +**default arp [vrf_instance] +ipv4_addr** + + +**Parameters** + +- **vrf_instance** - Specifies the VRF instance +modify. + + +- **no parameter** - Specify changes to the +default VRF. + +- **vrf** +**vrf_name** - Specify changes to the +specified user-defined VRF. + +- **ipv4_addr** - Specify the IPv4 address of ARP entry. + +- **mac_addr** - Specify the local data-link (hardware) address +(48-bit dotted hex notation – H.H.H). + + +**Examples** + +- This command adds a static entry to the ARP cache in the default +VRF. +``` +`switch(config)# **arp 172.22.30.52 0025.900e.c63c arpa** +switch(config)#` +``` + +- This command adds the same static entry to the ARP cache in the VRF named +**purple**. +``` +`switch(config)# **arp vrf purple 172.22.30.52 0025.900e.c63c arpa** +switch(config)#` +``` + + +### arp aging timeout + + +The **arp aging timeout** command specifies the duration of +dynamic address entries in the Address Resolution Protocol (ARP) cache for addresses +learned through the configuration mode interface. The default duration is +**14400** seconds (four hours). + + +The **arp aging timeout** and **default arp aging +timeout** commands restores the default ARP aging timeout for +addresses learned on the configuration mode interface by deleting the corresponding +**arp aging timeout** command from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +**arp aging timeout +arp_time** + + +**no arp aging timeout** + + +**default arp aging timeout** + + +**Parameter** + + +**arp_time** - Specify the ARP aging timeout period in seconds. +Values range from **60** to +**65535**. Default value is +**14400**. + + +**Example** + + +This command specifies an ARP cache duration of **7200** +seconds (two hours) for dynamic addresses added to the ARP cache learned through +**vlan +200**. +``` +`switch(config)# **interface vlan 200** +switch(config-if-Vl200)# **arp aging timeout 7200** +switch(config-if-Vl200)# **show active** +interface Vlan200 + arp aging timeout 7200 +switch(config-if-Vl200)#` +``` + + +### arp cache dynamic capacity + + +AARP and IPv6 Neighbor Discovery store neighbor address resolutions in a neighbor cache. The resources and capabilities of the switch determine the capacity of the +neighbor cache. The Neighbor Cache Capacity feature adds parameters to specify a +per-interface capacity for the neighbor cache. A neighboring device, through +misconfiguration or maliciousness, can unfairly use a large number of address +resolutions. This feature mitigates the over-utilization of address resolutions. + + +**Command Mode** + + +Interface Configuration Mode + + +**Command Syntax** + + +**ipv6 nd cache dynamic capacity capacity** + + +**no arp cache dynamic capacity** + + +**default arp cache dynamic capacity** + + +**Parameters** + + +- **capacity capacity** - The number of dynamic address resolution entries accepted into the ARP +on the specified interface. Configure a range from 0 to 4294967295. If no capacity specified, then the interface +accepts all neighbor resolutions up to the capacity of the switch platform. + + +**Example** + + +Use the following commands to configure an ARP cache of 3000 dynamic address resolution +entries: +``` +`switch(config)# **interface Ethernet3/1** +switch(config-if-Et3/1)# **arp cache dynamic capacity 3000**` +``` + + +### arp cache persistent + + +The **arp cache persistent** command restores the dynamic +entries in the Address Resolution Protocol (ARP) cache after reboot. + + +The **no arp cache persistent** and **default arp +cache persistent** commands remove the ARP cache persistent +configuration from the ***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**arp cache persistent** + + +**no arp cache persistent** + + +**default arp cache persistent** + + +**Example** + + +This command restores the ARP cache after +reboot. +``` +`switch(config)# **arp cache persistent** +switch(config)#` +``` + + +### arp gratuitous accept + + +The **arp gratuitous accept** command configures the +configuration mode interface to accept gratuitous ARP request packets received on +that interface. The ARP table then learns the accepted gratuitous ARP requests. + + +The no and **default** forms of the command +prevent the interface from accepting gratuitous ARP requests. Configuring gratuitous +ARP acceptance on an L2 interface has no effect. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-VLAN Configuration + + +Interface Port-channel Configuration + + +**Command Syntax** + + +**arp gratuitous accept** + + +**no arp gratuitous accept** + + +**default arp gratuitous accept** + + +**Example** + + +These commands configure **interface ethernet 2/1** to accept +gratuitous ARP request +packets. +``` +`switch(config)# **interface ethernet 2/1** +switch(config-if-Et2/1)# **arp gratuitous accept** +switch(config-if-Et2/1)#` +``` + + +### arp proxy max-delay + + +The **arp proxy max-delay** command enables delaying proxy ARP +requests on the configuration mode interface. EOS disables proxy ARP by default. +When enabled, the switch responds to all ARP requests, including gratuitous ARP +requests, with target IP addresses that match a route in the routing table. When a +switch receives a proxy ARP request, EOS performs a check to send the response +immediately or delay the response based on the configured maximum delay in +milliseconds (ms). + + +**Command Mode** + + +Configuration mode + + +**Command Syntax** + + +**arp proxy max-delay +milliseconds** + + +**Parameters** + + +**milliseconds** - Configure the maximum delay before returning +a proxy ARP response in milliseconds. Use a range between 0 and 1000ms with a +default value of 800ms. + + +**Example** + + +This command sets a delay of 500ms before returning a response to a proxy ARP +request. +``` +`switch(config)# **arp proxy max-delay 500ms**` +``` + + +### balance factor + + +The **balance factor** command in the Port Group Host Configuration Mode configures port balancing for Cluster Load Balancing on the network. + + +A higher value results in a more aggressive rebalancing of +flows from a port group, a logical group of hosts, across the available +links, even if the link has a small load imbalance. This is ideal for very +bursty traffic patterns. + + +Configuring a lower value provides a more +conservative action and only triggers a rebalance when encountering a +significant load difference. This minimizes changes and suitable for more +consistent, long-lived flows. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Group Host Configuration + + +**Command Syntax** + + +**balance factor factor_value** + + +**no balance factor factor_value** + + +**Parameters** + + +- **balance** - Configure port group balancing. + +- **factor +factor_value** - Configure port group balancing factor +from 0-4294967295. + + + + +**Example** + + +Use the following commands to configure a balance factor of 2500 for the port group +*MyPortGroup*: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port group host MyPortGroup** +switch(config-clb-port-host-MyPortGroup)# **balance factor 2500** +switch(config-clb-port-host-MyPortGroup)#` +``` + + +### clear arp inspection +statistics + + +The **clear arp inspection statistics** command clears ARP +inspection statistics. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**clear arp inspection statistics** + + +**Related Commands** + + +- ip arp inspection limit + +- ip arp inspection logging + +- ip arp inspection trust + +- ip arp inspection vlan + +- show ip arp inspection vlan + +- show ip arp inspection statistics + + +**Example** + + +This command clears ARP inspection +statistics. +``` +`switch(config)# **clear arp inspection statistics** +switch(config)#` +``` + + +### clear arp + + +The **clear arp** command removes the specified dynamic ARP +entry for the specified IP address from the Address Resolution Protocol (ARP) +table. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**clear arp [vrf_instance] ipv4_addr** + + +**Parameters** + +- **vrf_instance** - Specifies the VRF instance for +which arp data is removed. + +- **no parameter** - Specifies the +context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF +instance. Specify the system default using the +**default** option. + +- **ipv4_addr** - IPv4 address of dynamic ARP +entry. + + +**Example** + + +These commands display the ARP table before and after the removal of dynamic ARP +entry for IP address +**172.22.30.52**. +``` +`switch# **show arp** + +Address Age (min) Hardware Addr Interface +172.22.30.1 0 001c.730b.1d15 Management1 +172.22.30.52 0 0025.900e.c468 Management1 +172.22.30.53 0 0025.900e.c63c Management1 +172.22.30.133 0 001c.7304.3906 Management1 + +switch# **clear arp 172.22.30.52** +switch# **show arp** + +Address Age (min) Hardware Addr Interface +172.22.30.1 0 001c.730b.1d15 Management1 +172.22.30.53 0 0025.900e.c63c Management1 +172.22.30.133 0 001c.7304.3906 Management1 + +switch#` +``` + + +### clear arp-cache + + +The **clear arp-cache** command refreshes dynamic entries in +the Address Resolution Protocol (ARP) cache. Refreshing the ARP cache updates +current ARP table entries and removes expired ARP entries not yet deleted by an +internal, timer-driven process. + + +The command, without arguments, refreshes ARP cache entries for all enabled +interfaces. With arguments, the command refreshes cache entries for the specified +interface. Executing **clear arp-cache** for all interfaces +can result in extremely high CPU usage while the tables are resolving. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**clear arp-cache +[vrf_instance][interface_name]** + + +**Parameters** + +- **vrf_instance** - Specifies the VRF instance to refresh +ARP data. + +- **no parameter** - Specifies the +context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. +System default VRF specified by +**default**. + +- **interface_name** - Interface to refresh ARP cache +entries. Options include the following: + +- **no parameter** - All ARP cache entries. + +- **interface ethernet** +**e_num** - ARP cache entries of specified +Ethernet interface. + +- **interface loopback** +**l_num** - ARP cache entries of specified +loopback interface. + +- **interface management** +**m_num** - ARP cache entries of specified +management interface. + +- **interface port-channel** +**p_num** - ARP cache entries of specified +port-channel Interface. + +- **interface vlan** +**v_num** - ARP cache entries of specified +VLAN interface. + +- **interface VXLAN** +**vx_num** - VXLAN interface specified by +**vx_num**. + + +**Related Commands** + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +These commands display the ARP cache before and after ARP cache entries +refresh. +``` +`switch# **show arp** + +Address Age (min) Hardware Addr Interface +172.22.30.1 0 001c.730b.1d15 Management1 +172.22.30.118 0 001c.7301.6015 Management1 + +switch# **clear arp-cache** +switch# **show arp** + +Address Age (min) Hardware Addr Interface +172.22.30.1 0 001c.730b.1d15 Management1 + +switch#` +``` + + +### clear ip dhcp relay counters + + +The **clear ip dhcp relay counters** command resets the DHCP +relay counters. The configuration mode determines which counters are reset:. + + +The **Interface configuration** command clears the counter for +the configuration mode interface. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**clear ip dhcp relay counters****[interface_name]** + + +**Parameters** + + +**interface_name** - Specify the interface to clear counters.. +Add the following options: + +- **no parameter** - Clears counters for the switch and +for all interfaces. + +- **interface ethernet** +**e_num** - Clears counters for the specified Ethernet +interface. + +- **interface loopback** +**l_num** - Clears counters for the specified loopback +interface. + +- **interface port-channel** +**p_num** - Clears counters for the specified +port-channel Interface. + +- **interface vlan** +**v_num**  -Clears counters for the specified VLAN +interface. + + +**Examples** + +- These commands clear the DHCP relay counters for **vlan +1045** and shows the counters before and after the +**clear** +command. +``` +`switch# **show ip dhcp relay counters** + + | Dhcp Packets | +Interface | Rcvd Fwdd Drop | Last Cleared +----------|----- ---- -----|--------------------- + All Req | 376 376 0 | 4 days, 19:55:12 ago + All Resp | 277 277 0 | + | | + Vlan1001 | 207 148 0 | 4 days, 19:54:24 ago + Vlan1045 | 376 277 0 | 4 days, 19:54:24 ago + +switch# **clear ip dhcp relay counters interface vlan 1045** + + | Dhcp Packets | +Interface | Rcvd Fwdd Drop | Last Cleared +----------|----- ---- -----|--------------------- + All Req | 380 380 0 | 4 days, 21:19:17 ago + All Resp | 281 281 0 | + | | + Vlan1000 | 207 148 0 | 4 days, 21:18:30 ago + Vlan1045 | 0 0 0 | 0:00:07 ago` +``` + +- These commands clear all DHCP relay counters on the +switch. +``` +`switch(config-if-Vl1045)# **exit** +switch(config)# **clear ip dhcp relay counters** +switch(config)# **show ip dhcp relay counters** + +          | Dhcp Packets | +Interface | Rcvd Fwdd Drop | Last Cleared +----------|----- ---- -----|------------- + All Req | 0 0 0 | 0:00:03 ago + All Resp | 0 0 0 | + | | + Vlan1000 | 0 0 0 | 0:00:03 ago + Vlan1045 | 0 0 0 | 0:00:03 ago` +``` + + +### clear ip dhcp snooping +counters + + +The **clear ip dhcp snooping counters** command resets the DHCP +snooping packet counters. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +clear ip dhcp snooping counters [counter_type] + + +**Parameters** + + +**counter_type**  - Specify the type of counter to reset. Options +include the following: + +- **no parameter** - Counters for each VLAN. + +- **debug** - Aggregate counters and drop cause +counters. + + +**Examples** + +- This command clears the DHCP snooping counters for each +VLAN. +``` +`switch# **clear ip dhcp snooping counters** +switch# **show ip dhcp snooping counters** + + | Dhcp Request Pkts | Dhcp Reply Pkts | +Vlan | Rcvd Fwdd Drop | Rcvd Fwdd Drop | Last Cleared +-----|------ ----- ------|----- ---- ------|------------- + 100 | 0 0 0 | 0 0 0 | 0:00:10 ago + +switch#` +``` + +- This command clears the aggregate DHCP snooping +counters. +``` +`switch# **clear ip dhcp snooping counters debug** +switch# **show ip dhcp snooping counters debug** + +Counter Snooping to Relay Relay to Snooping +----------------------------- ----------------- ----------------- +Received 0 0 +Forwarded 0 0 +Dropped - Invalid VlanId 0 0 +Dropped - Parse error 0 0 +Dropped - Invalid Dhcp Optype 0 0 +Dropped - Invalid Info Option 0 0 +Dropped - Snooping disabled 0 0 + +Last Cleared: 0:00:08 ago + +switch#` +``` + + +### clear ip multicast count + + +The **clear ip multicast count** command clears all counters +associated with the multicast traffic. + + +**Command Mode** + + +Gobal Configuration + + +**Command Syntax** + + +clear ip multicast count [group_address +[source_address]] + + +**Parameters** + + +- **no parameters** - Clears all counts of the multicast +route traffic. + +- **group_address** - Clears the multicast traffic count +of the specified group address. + +- **source_address** - Clears the multicast +traffic count of the specified group and source addresses. + + +**Guidelines** + + +This command functions only when the ip multicast count +command is enabled. + + +**Examples** + +- This command clears all counters associated with the multicast +traffic. +``` +`switch(config)# **clear ip multicast count**` +``` + +- This command clears the multicast traffic count of the specified group +address. +``` +`switch(config)# **clear ip multicast count 16.39.24.233**` +``` + + +### clear snapshot counters ecmp + + +The **clear shapshot counters ecmp** deletes previous snapshots. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**clear snapshot counters ecmp req_id_range** + + +**Parameter** + + +**req_id_range** - Specify the Request ID of the snapshot to +delete. If none specified, all previous snapshots delete from the switch. + + +**Example** + + +To delete previous snapshots, use the following +command: +``` +`switch# **clear snapshot counters ecmp id_range**` +``` + + +### cli vrf + + +The **cli vrf** command specifies the context-active VRF. The +context-active VRF determines the default VRF that VRF-context aware commands use +when displaying routing table data. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**cli vrf [vrf_id]** + + +**Parameters** + + +**vrf_id** - Specify the name of VRF assigned as the current VRF +scope. Options include the following: + +- **vrf_name** - Specify the name of user-defined +VRF. + +- **default** - Specify the system-default VRF. + + +**Guidelines** + + +VRF-context aware commands include the following: + + +clear arp-cache + + +show ip + + +show ip arp + + +show ip route + + +show ip route gateway + + +show ip route host + + +**Related Commands** + + +The show routing-context vrf command displays the +context-active VRF. + + +**Example** + + +These commands specify **magenta** as the context-active VRF, +then display the context-active +VRF. +``` +`switch# **cli vrf magenta** +switch# **show routing-context vrf** +Current VRF routing-context is magenta +switch#` +``` + + +### compress + + +The **compress** command increases the hardware resources +available for the specified prefix lengths. + + +The **no compress** command removes the 2-to-1 compression +configuration from the ***running-config***. + + +Note: The **compress** command is supported only on 7500R, 7280R, 7500R2 and 7280R2 +platforms. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip hardware fib optimize prefix-length +prefix-length +expand +prefix-length +compress + + +no ip hardware fib optimize prefix-length +prefix-length +expand +prefix-length +compress + + +**Parameters** + + +**compress** - Allows configuring up to one compressed prefix +length. + + +**Example** + + +In the following example, configure the prefix length **20** +and **24**, expanding prefix length +**19** and **23**, and compressing +prefix length +**25**. +``` +`switch(config)# **ip hardware fib optimize prefix-length 20 24 expand 19 23 compress 25** + ! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +### description (VRF) + + +The **description** command adds a text string to the +configuration mode VRF. The string has no functional impact on the VRF. + + +The **no description** and **default +description** commands remove the text string from the +configuration mode VRF by deleting the corresponding +**description** command from +***running-config***. + + +**Command Mode** + + +VRF Configuration + + +**Command Syntax** + + +**description +label_text** + + +**no description** + + +**default description** + + +**Parameters** + + +**label_text** - Specify the character string assigned to the +VRF configuration. + + +**Related Commands** + + +The vrf instance command places the switch in VRF configuration +mode. + + +**Example** + + +These commands add description text to the **magenta** +VRF. +``` +`switch(config)# **vrf instance magenta** +switch(config-vrf-magenta)# **description This is the first vrf** +switch(config-vrf-magenta)# **show active** + vrf instance magenta + description This is the first vrf + +switch(config-vrf-magenta)#` +``` + + +### destination grouping + + +The **destination grouping** command in the Cluster Load Balancing +Configuration mode allows the configuration of destination grouping settings with +`**BGP**`, `**prefix**`, or +**`VTEP`** groupings for cluster load balancing. The +**no** version of the command deletes the configuration +from the ***running-config***. + + +Destination Grouping prevents traffic bottlenecks on the network by distributing the incoming traffic across all available ECMP paths. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +**Command Syntax** + + +**destination groupings [bgp field-set] [prefix length length**] +[vtep] + + +**no destination groupings [bgp field-set] [prefix length length] [vtep]** + + +**Parameters** + + +- **destination groupings** - Configure destination grouping parameters for cluster load balancing. + + +- **bgp field-set** - Specify using BGP field-sets for destination grouping. + +- **prefix length length** - Specify using address prefix length for destination grouping. Configure the network prefix length between 0 and 128. + +- **vtep** - Specify using a VXLAN tunnel endpoint for destination grouping. + + +**Example** + + +Use the following commands to enter Cluster Load Balancing Mode and use BGP field-sets for destination grouping: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **destination grouping bgp field-set** +switch(config-clb)#` +``` + + +### dhcp relay + + +The **dhcp relay** command places the switch in the DHCP relay +mode. Execute this command in the Global Configuration Mode. + + +The **no dhcp relay** command removes DHCP relay configuration +from the ***running-config***. + + +**Command Mode** + + +Global Configuration Mode + + +**Command Syntax** + + +dhcp relay + + +no dhcp relay + + +**Example** + + +The **dhcp relay** command places the switch in the DHCP relay +configuration mode. +``` +`switch(config)# **dhcp relay** +switch(config-dhcp-relay)#` +``` + + +### dhcp server + + +The **dhcp server** command places the switch in the DHCP relay +mode. Execute this command in the DHCP Server Configuration Mode. + + +The **no dhcp server** command removes DHCP relay configuration +from the ***running-config***. + + +**Command Mode** + + +Global Configuration Mode + + +**Command Syntax** + + +**dhcp server** + + +**no dhcp server** + + +**Example** + + +The **dhcp server** command places the switch in the DHCP relay +configuration mode. +``` +`switch(config)# **dhcp server** +switch(config-dhcp-server)#` +``` + + +### dhcp server client + + +The **dhcp server client** command configures client options +for the DHCP server.Execute this command under the ***dhcp server configuration +mode***. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +dhcp server client class [ipv4|ipv6] definition +client_class +assignments +[default-gateway|dns|lease|option|private-option|tftp] + + +**Parameters** + + +- **[ipv4|ipv6]** - Select the IP address family. + +- **definition +client_class** - Add a class for the client +definition. + +- **default-gateway** - Configure the client class default +gateway sent to DHCP clients. + +- **dns** - Configure the client class DNS. + +- **lease** - Configure the client class lease. + +- **option** - Configure the client class DHCP options. + +- **private-option** - Configure the client class's private options. + +- **tftp** - Configure the client class's TFTP +options. + + +**Example** + + +Use the **dhcp server client class default-gateway** command to +add a client definition for the IPv4 DHCP client class default gateway of 10.0.0.1. +options. +``` +`switch(config-dhcp-server)# **client class ipv4 definition test1 default-gateway 10.0.0.1**` +``` + + +### dhcp server debug + + +The **dhcp server debug log** command configures DHCP server +debugging configuration. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server debug +log +file** + + +**Parameters** + + +**log +file** - Specify the file location to store debugging +logs. + + +**Example** + + +Use the **dhcp server log** command to add a file location for +debugging logs. + +``` +`switch(config-dhcp-server)#**debug log**` +``` + + +### dhcp server dns + + +The **dhcp server dns** command configures DHCP server DNS +options. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server dns [domain name +domain_name +] +[server [ipv4|ipv6] +ip_address** + + +**Parameters** + + +- **domain name domain_name** - Specify the domain name of the DNS server. + +- **server [ipv4|ipv6] +ip_address** - Specify the DNS server as IPv4 or IPv6 +and the IP address of the server. + + +**Example** + + +Use the **dhcp server dns** command to add an IPv4 DNS server, +192.168.10.5, to the DHCP configuration. +options. +``` +`switch(config-dhcp-server)# **dns server ipv4 192.168.10.5**` +``` + + +### dhcp server lease + + +The **dhcp server lease** command configures DHCP server lease +options. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server lease time [ipv4|ipv6] +days +days +hourshours +minutesminutes** + + +**Parameters** + + +- **[ipv4|ipv6]** - Configure the lease for IPv4 or IPv6. + +- **days** **days** - Specify the number of days for the lease to be in effect from 0 to 2000 days. + +- **hours****hours** - Specify the number of hours for the lease to be in effect from 0 to 23 hours. + +- **minutes****minutes** - Specify the +number of minutes for the lease to be in effect from 0 to 59 minutes. + + +**Example** + + +Use the **dhcp server lease** command to add an IPv4 lease to +be in effect for 10 days, to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **dns lease time ipv4 10 days**` +``` + + +### dhcp server option + + +The **dhcp server option** command configures DHCP server +options. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server option [ipv4|ipv6] +code +[always-send data type [hex |string] data]] +>quoted_string >hex +[client-id disable] +hourshours +minutesminutes** + + +**Parameters** + + +- **[ipv4|ipv6]** - Configure the option for IPv4 or IPv6. + +- **code**- Specify the option number from the DHCP options. + +- **[always-send data type [hex |string] data]]** **>quoted_string** **>hex** - Specify to send the option whether or not the client requested it. + +- **client-id disable** - Prevent the DHCPv4 server from +sending back the client ID. + + +**Example** + + +Use the **dhcp server option** command to add an IPv4 DHCP +code, 67, to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **option ipv4 option 67**` +``` + + +### dhcp server private-option + + +The **dhcp server private-option** command configures DHCP +server private options. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server private-option [ipv4|ipv6] +code +[always-send data type [hex |string] data]] +>quoted_string >hex** + + +**Parameters** + + +- **[ipv4|ipv6]** - Configure the option for IPv4 or IPv6. + +- **code**- Specify the option number from 224 to 254. + +- **[always-send data type [hex |string] data]]** +**quoted_string** +**>hex** - Specify to send the option whether or not the +client requested it. + + +**Example** + + +Use the **dhcp server option** command to add an IPv4 private +option code, *225*, to always send the option to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **option ipv4 private-option 225 always-send private-option ipv4 225 always-send type string data "Code Sent"**` +``` + + +### dhcp server subnet + + +The **dhcp server subnet** command configures DHCP server +subnet options. Execute this command in the DHCP Server Configuration +Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server subnet +ipv4_address +ipv6_address** + + +**Parameters** + + +- **ipv4_address**> - Configure the IPv4 subnet. + +- **ipv6_address** - Configure the IPv6 subnet. + + +**Example** + + +Use the **dhcp server subnet** command to add an IPv4 subnet, +*198.168.0.0/24*, to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **subnet 198.168.0.0/24**` +``` + + +### dhcp server subnet client + + +The **dhcp server subnet [ipv4 | ipv6] client** command +configures client options for the DHCP server. Execute this command in the DHCP +Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server subnet [ipv4|ipv6] client class definition +client_class +[default-gateway|dns|lease|name|range|reservations|tftp]** + + +**Parameters** + + +- **[ipv4|ipv6]** - Select the IP address family. + +- **definition +client_class** - Add a class for the client +definition. + +- **default-gateway** **ip_address** - Configure the client class default +gateway sent to DHCP clients. + +- **dns server** **** - Configure the client class DNS. + +- **lease** **days** **hours****hours** +**minutes****minutes** - Configure the client class lease in days, hours, and minutes. + +- **name** **name** - Configure the subnet name. + +- **range** **ip_address_start** **ip_address_end** - Configure the range of IP addresses for the subnet. + +- **reservations mac-address** **mac_address** **[hostname | ipv4-address]**- Configure the MAC address to use for reservations. + +- **tftp** - Configure the client class's TFTP +options. + + +**Example** + + +Use the **dhcp server subnet ipv4 client class +default-gateway** command to add a client definition for the IPv4 +DHCP client class default gateway of 10.0.0.1. +options. +``` +`switch(config-dhcp-server)#**subnet ipv4 client class ipv4 definition test1 default-gateway 10.0.0.1**` +``` + + +### dhcp server tftp + + +The **dhcp server tftp** command configures DHCP +server TFTP options. Execute this command in the DHCP Server Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server tftp server [file [ipv4|ipv6] +file_name] +[option [150|66]] ipv4** + + +**Parameters** + + +- **file [ipv4|ipv6]** **file_name**> - Configure the IPv4 or IPv6 boot file name. + +- **option [150|66]] ipv4** +**ip_address** - Configure the TFTP DHCP option as 150 or +66 with an IPv4 address. + + +**Example** + + +Use the **dhcp server tftp** command to add option 150 with an +IPv4 address *198.168.0.11*, to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **tftp option 150 ipv4 198.168.0.11**` +``` + + +### dhcp server vendor-option + + +The **dhcp server vendor-option** command configures the DHCP +server vendor identifier options. Execute this command under the DHCP Server +Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server vendor-option ipv4 +vendor_id +default +vendor_id +sub-option +sub-option_code** + + +**Parameters** + + +- **vendor_id** - Configure the vendor identifier. + +- **default** **vendor_id** - Set as the default vendor specific option. + +- **sub-option** +**sub-option_code** - Set the sub-option code from +1-254. + + +**Example** + + +Use the **dhcp server vendor-option** command to add vendor +option, *1:4:c0:0:2:8* , to the DHCP configuration. + +``` +`switch(config-dhcp-server)# **vendor-option 1:4:c0:0:2:8**` +``` + + +### dhcp server vendor-option ipv4 sub-option + + +The **dhcp server vendor-option** command configures the DHCP +server vendor identifier options. Execute this command in the DHCP Server +Configuration Mode. + + +**Command Mode** + + +DHCP Server Configuration Mode + + +**Command Syntax** + + +**dhcp server vendor-option ipv4 +vendor_id +default +vendor_id +sub-option +sub-option_code +type +[array | ipv4-address | +string] +array [ipv4-address data +ip_address +[string data +quoted_string** + + +**Parameters** + + +- **vendor_id** - Configure the vendor identifier. + +- **default** **vendor_id** - Set as the default vendor specific option. + +- **sub-option** +**sub-option_code** - Set the sub-option code from +1-254. + + +**Example** + + +Use the **dhcp server vendor-option** command to add the vendor +option, *1:4:c0:0:2:8*, to the DHCP Server configuration. + +``` +`switch(config-dhcp-server)# **vendor-option 1:4:c0:0:2:8**` +``` + + +### fib route limit + + +The **fib route limit** command in the Router General Configuration Mode limits the number of routes added to the Forwarding Information Database (FIB) and +also suppresses BGP routes when exceeding the table limit. The **no** version of the command removes the configuration +from the ***running-config***. + + +**Command Mode** + + +Router General Configuration + + +FIB Route Limit Configuration + + +VRF Configuration + + +**Command Syntax** + + +**[ipv4 | ipv6] limit route_number [warning-limit percent percent]** + + +**Parameters** + + +- **[ipv4 | ipv6]** - Configure IPv4 or IPv6 routes to limit in the FIB. + +- **limit route_number** - Configure the number of routes to limit in the FIB. + +- **warning-limit percent percent** - Configure the percentage of a FIB with routes and issue +a warning. For example, if the FIB has a 100 route limit, and the percentage set to 80, then EOS issues a warning when the FIB has 80 routes. + + +**Example** + + +Use the following command to configure a global route limit for IPv4 to 100 and warn when the table has consumed 80% +of the limit: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **fib route limit** +switch(config-router-general-fib-route-limit)# **ipv4 limit 100 warning-limit 80 percent**` +``` + + +### flow + + +The **flow** command in the Cluster Load Balancing Configuration Mode allows the configuration of flow settings for Cluster Load Balancing including counters, matching, monitoring, sources, and warnings. +The **no** version of the command deletes the configuration from the ***running-config***. + + + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +**Command Syntax** + + +**flow [counters] [match encapsulation [none | vxlan] ipv4] [monitor] [warning ungrouped]** + + +**no flow [counters] [match encapsulation [none | vxlan] ipv4] [monitor] [warning ungrouped]** + + +**Parameters** + + +- **flow** - Specify flow behavior for cluster load balancing. + + +- **counters** - Configure the flow to generate counters for cluster load balancing. + +- **match encapsulation [none | vxlan] ipv4** - Specify the flow to match encapsulation for IPv4. + +- **monitor** - Configure the flow to monitor cluster load balancing without impacting actual forwarding. + +- **warning ungrouped** - Configure the flow to generate warning messages about the cluster load balancing configuration. + + +**Example** + + +Use the following commands to enter Cluster Load Balancing Configuration Mode and configure the flow to match VXLAN encapsulation: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **flow match encapsulation vxlan ipv4** +switch(config-clb)#` +``` + + +### flow exhaustion + + +The **flow exhaustion** command in the Port Group Host Configuration Mode configures flow-related settings for cluster load balancing on the switch. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Group Host Configuration + + +**Command Syntax** + + +**flow exhaustion action [dscp dscp_value] [traffic-class class_value** + + +**no flow exhaustion action [dscp dscp_value] [traffic-class class_value** + + +**Parameters** + + +- **exhaustion action** - Configure an action when the flows reach limits. + +- **dscp dscp_value** - Configure the packet DSCP value from 0 to 63. + +- **traffic-class class_value** - Configure the traffic-class value from 0 to 7. + + +**Example** + + +Configure the MyPortGroup exhaustion action to use a DSCP value of 25: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port group host MyPortGroup** +switch(config-clb-port-host-MyPortGroup)# **flow exhaustion action dscp 25**` +``` + + +### flow source learning + + +The **flow source learning** command enters the Flow Source Learning Configuration Mode and configures cluster load balancing to learn flow sources. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Flow Source Learning Configuration + + +**Command Syntax** + + +**flow source learning** + + +**[aging timeout number_of_seconds seconds | limit number_of_learned_flows** + + +**no flow source learning** + + +**Parameters** + + +- **flow source learning** - Configure flow discovery by learning. + +- **aging timeoutnumber_of_seconds seconds** - Configure the aging timeout between 30 and 2147483647 seconds with a default value of 600 seconds. + +- **limit number_of_learned_flows** - Configure the number of flows to learn and preserve hardware TCAM resources. + + +**Example** + + +Use the following commands to configure the flow source learning timeout to 1200 seconds: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **flow source learning** +switch(config-clb-flow-learning)# **aging timeout 1200 seconds** +switch(config-clb-flow-learning)#` +``` + + +### flow limit + + +The **flow limit** command in the Port Group Host Configuration Mode configures flow-related settings for cluster load balancing on the switch. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Group Host Configuration + + +**Command Syntax** + + +**flow limit max_flows learning max_flows** + + +**no flow limit max_flows learning max_flows** + + +**Parameters** + + +- **flow limit max_flows** - Configure the maximum number of flows per port group. + +- **learning max_flows** - Configure the limit of learned flows. + + + + +**Example** + + +Configure the MyPortGroup flow limit to limit learned flows to 25000: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port group host MyPortGroup** +switch(config-clb-port-host-MyPortGroup)# **flow limit learning 2500**` +``` + + +### flow warning + + +The **flow warning** command in the Port Group Host Configuration Mode configures flow warning thresholds for cluster load balancing on the switch. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Group Host Configuration + + +**Command Syntax** + + +**flow warning threshold_flows** + + +**no flow warning threshold_flows** + + +**Parameters** + + +- **flow warning max_flows** - Configure the warning threshold of flows per port group. + + +**Example** + + +Configure the MyPortGroup flow warning threshold to 25000: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port group host MyPortGroup** +switch(config-clb-port-host-MyPortGroup)# **flow warning 2500**` +``` + + + + + +### forwarding type + + +The **forwarding type** command configures the encapsulation and mode used to deliver packets between TORs over the uplinks. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +**Command Syntax** + + +**forwarding type routed** + + +**no forwarding type routed** + + +**Parameters** + + +- **forwarding type routed** + + +**Example** + + +Use the following commands to configure the forwarding type as routed: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **forwarding type routed** +switch(config-clb)#` +``` + + +### interface tunnel + + +The **interface tunnel** command places the switch in +Interface-Tunnel Configuration Mode. + + +The **no interface tunnel** command deletes the specified +interface tunnel configuration. + + +The **exit** command returns the switch to the global +configuration mode. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**interface tunnel +number** + + +**no interface tunnel +number** + + +**Parameter** + + +**number** - Specify the tunnel interface number. Values range +from **0** to **255**. + + +**Example** + + +This command places the switch in Interface-Tunnel Configuration Mode for tunnel +interface +**10**. +``` +`switch(config)# **interface tunnel 10** +switch(config-if-Tu10)#` +``` + + +### ip arp inspection limit + + +The **ip arp inspection limit** command disables the interface +if the incoming ARP rate exceeds the configured value rate limit of the incoming ARP +packets on an interface. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**ip arp inspection limit** [ rate +pps] [burst_interval +**sec | none]** + + +**no ip arp inspection limit** [ rate +pps] [burst_interval +**sec | none]** + + +**default ip arp inspection limit** [ rate +pps] [burst_interval +**sec | none]** + + +**Parameters** + +- **rate** - Specifies the ARP inspection limit rate in +packets per second. + +- **pps** - Specify the number of ARP inspection +limit rate packets per second. + +- **burst_interval** - Specifies the ARP inspection limit +burst interval. + +- **sec** - Specify the burst interval in +seconds. + + +**Related Commands** + +- ip arp inspection limit + +- ip arp inspection trust + +- ip arp inspection vlan + +- show ip arp inspection vlan + + +**Examples** + +- This command configures the rate limit of incoming ARP packets to disable +the interface when the incoming ARP rate exceeds the configured value, sets +the rate to **512**, the upper limit for the number of +invalid ARP packets allowed per second, and sets the burst consecutive +interval to monitor the interface for a high ARP rate to +**11** seconds. + +``` +`switch(config)# **ip arp inspection limit rate 512 burst interval 11** +switch(config)#` +``` + +- This command displays verification of the interface specific configuration. + +``` +`switch(config)# **interface ethernet 3/1** +switch(config)# **ip arp inspection limit rate 20 burst interval 5** +switch(config)# **interface Ethernet 3/3** +switch(config)# **ip arp inspection trust** +switch(config)# **show ip arp inspection interfaces** +  + Interface      Trust State  Rate (pps) Burst Interval + -------------  -----------  ---------- -------------- + Et3/1          Untrusted    20         5 + Et3/3          Trusted      None       N/A + +switch(config)#` +``` + + +### ip arp inspection logging + + +The **ip arp inspection logging** command enables logging of +incoming ARP packets on the interface if the rate exceeds the configured value. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**ip arp inspection logging****[rate +pps ][burst_interval +sec | none]** + + +**no ip arp inspection logging** +**[RATE +pps ][burst_interval** +**sec | none]** + + +**default ip arp inspection logging** +**[RATE +pps ][burst_interval** +**sec | none]** + + +**Parameters** + + +- **RATE** - Specifies the ARP inspection limit rate in +packets per second. + +- **pps** -Specifies the number of ARP +inspection limit rate packets per second. + +- **burst_interval** - Specifies the ARP inspection limit +burst interval. + +- **sec** - Specify the number of burst +interval seconds. + + +**Related Commands** + +- ip arp inspection limit + +- ip arp inspection trust + +- ip arp inspection vlan + +- show ip arp inspection vlan + + +**Example** + + +This command enables logging of incoming ARP packets when the incoming ARP rate +exceeds the configured value on the interface, sets the rate to monitor the +interface for a high ARP rate to **15** seconds. + +``` +`switch(config)# **ip arp inspection logging rate 2048 burst interval 15** +switch(config)#` +``` + + +### ip arp inspection trust + + +The **ip arp inspection trust** +command configures the trust state of an interface. By default, all interfaces are +untrusted. + + +**Command Mode** + +Global Configuration Mode +**Command Syntax** + + +ip arp inspection +trust + + +no ip arp inspection +trust + + +default ip arp inspection +trust + +**Related Commands** + +- ip arp inspection limit + +- ip arp inspection logging + +- show ip arp inspection vlan + +- ip arp inspection vlan + + +**Examples** + +- This command configures the trust state of an +interface. +``` +`switch(config)# **ip arp inspection trust** +switch(config)#` +``` + +- This command configures the trust state of an interface to +untrusted. +``` +`switch(config)# **no ip arp inspection trust** +switch(config)#` +``` + +- This command configures the trust state of an interface to the +default. +``` +`switch(config)# **default ip arp inspection trust** +switch(config)#` +``` + + +### ip arp inspection vlan + + +The **ip arp inspection vlan** command enables ARP inspection. +EOS intercepts ARP requests and responses on untrusted interfaces on specified +VLANs, and verifies intercepted packets with valid IP-MAC address bindings. EOS +drops all invalid ARP packets. On trusted interfaces, EOS processes all incoming ARP +packets and forwards without verification. By default, EOS disables ARP inspection +on all VLANs. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**ip arp inspection vlan [list]** + + +**Parameters** + + +**list** - Specifies the VLAN interface number. + + +**Related Commands** + + +- ip arp inspection limit + +- ip arp inspection trust + +- ip arp inspection vlan + + +**Example** + +- This command enables ARP inspection on VLANs **1** +through +**150**. +``` +`switch(config)# **ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- This command disables ARP inspection on VLANs **1** +through +**150**. +``` +`switch(config)# **no ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- This command sets the ARP inspection default to VLANs +**1** through +**150**. +``` +`switch(config)# **default ip arp inspection vlan 1 - 150** +switch(config)#` +``` + +- These commands enable ARP inspection on multiple VLANs 1 through +**150** and **200** +through +**250**. +``` +`switch(config)# **ip arp inspection vlan 1-150,200-250** +switch(config)#` +``` + + +### ip dhcp relay all-subnets + + +The **ip dhcp relay all-subnets** command configures the DHCP +smart relay status in the Interface Configuration Mode. DHCP smart relay supports +forwarding DHCP requests with a client secondary IP addresses in the gateway address +field. Enabling DHCP smart relay on an interface requires that you enable DHCP relay +on that interface. + + +By default, an interface assumes the global DHCP smart relay setting as configured by +the ip dhcp relay all-subnets default command. The +**ip dhcp relay all-subnets** command, when +configured, takes precedence over the global smart relay setting. + + +The **no ip dhcp relay all-subnets** command disables DHCP +smart relay on the configuration mode interface. The **default ip dhcp +relay all-subnets** command restores the interface to the +default DHCP smart relay setting, as configured by the **ip dhcp relay +all-subnets default** command, by removing the corresponding +**ip dhcp relay all-subnets** or **no ip +dhcp relay all-subnets** statement from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip dhcp relay all-subnets + + +no ip dhcp relay all-subnets + + +default ip dhcp relay all-subnets + + +**Examples** + +- This command enables DHCP smart relay on VLAN interface +**100**. +``` +`switch(config)# **interface vlan 100** +switch(config-if-Vl100)# **ip helper-address 10.4.4.4** +switch(config-if-Vl100)# **ip dhcp relay all-subnets** +switch(config-if-Vl100)# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is disabled +DHCP Smart Relay is enabled +Interface: Vlan100 + DHCP Smart Relay is enabled + DHCP servers: 10.4.4.4 +switch(config-if-Vl100)#` +``` + +- This command disables DHCP smart relay on VLAN interface +**100**. +``` +`switch(config-if-Vl100)# **no ip dhcp relay all-subnets** +switch(config-if-Vl100)# **show active** + interface Vlan100 + no ip dhcp relay all-subnets + ip helper-address 10.4.4.4 +switch(config-if-Vl100)# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is disabled +DHCP Smart Relay is enabled +Interface: Vlan100 + DHCP Smart Relay is disabled + DHCP servers: 10.4.4.4 +switch(config-if-Vl100)#` +``` + +- This command enables DHCP smart relay globally, configures VLAN interface +**100** to use the global setting, then +displays the DHCP relay +status. +``` +`switch(config)# **ip dhcp relay all-subnets default** +switch(config)# **interface vlan 100** +switch(config-if-Vl100)# **ip helper-address 10.4.4.4** +switch(config-if-Vl100)# **default ip dhcp relay** +switch(config-if-Vl100)# **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option 82 is disabled +DHCP Smart Relay is enabled +Interface: Vlan100 + Option 82 Circuit ID: 333 + DHCP Smart Relay is enabled + DHCP servers: 10.4.4.4 +switch(config-if-Vl100)#` +``` + + +### ip dhcp relay all-subnets +default + + +The **ip dhcp relay all-subnets default** command configures +the global DHCP smart relay setting. DHCP smart relay supports forwarding DHCP +requests with a client secondary IP addresses in the gateway address field. The +default global DHCP smart relay setting is disabled. + + +The global DHCP smart relay setting applies to all interfaces for which an ip dhcp relay all-subnets statement does not exist. Enabling +DHCP smart relay on an interface requires that you also enable DHCP relay on that +interface. + + +The **no ip dhcp relay all-subnets default** and +**default ip dhcp relay all-subnets default** commands +restore the global DHCP smart relay default setting of disabled by removing the +**ip dhcp relay all-subnets default** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp relay all-subnets default + + +no ip dhcp relay all-subnets default + + +default ip dhcp relay all-subnets default + + +**Example** + + +This command configures the global DHCP smart relay setting to +**enabled**. +``` +`switch(config)# **ip dhcp relay all-subnets default** +switch(config)#` +``` + + +### ip dhcp relay always-on + + +The **ip dhcp relay always-on** command enables the DHCP relay +agent on the switch regardless of the DHCP relay agent status on any interface. By +default, EOS enables the DHCP relay agent only if you have one routable interface +configured with an ip helper-address statement. + + +The **no ip dhcp relay always-on** and **default ip +dhcp relay always-on** commands remove the **ip dhcp +relay always-on** command from ***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp relay always-on + + +no ip dhcp relay always-on + + +default ip dhcp relay always-on + + +**Example** + + +This command enables the DHCP relay +agent. +``` +`switch(config)# **ip dhcp relay always-on** +switch(config)#` +``` + + +### ip dhcp relay information +option (Global) + + +The **ip dhcp relay information option** command configures the +switch to attach tags to DHCP requests before forwarding them to the DHCP servers +designated by the ip helper-address commands. The command +specifies the tag contents for packets forwarded by the configured interface. The +default value for each interface configured with an ip helper-address is the name and number of the +interface. + + +The **no ip dhcp relay information option** and +**default ip dhcp relay information option** commands +restore the switch default setting of not attaching tags to DHCP requests by +removing the **ip dhcp relay information option** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp relay information option + + +no ip dhcp relay information option + + +default ip dhcp relay information option + + +**Example** + + +This command enables the attachment of tags to DHCP requests forwarded to DHCP server +addresses. +``` +`switch(config)# **ip dhcp relay information option** +switch(config)#` +``` + + +### ip dhcp relay information option circuit-id + + +The **ip dhcp relay information option circuit-id** command +specifies the content of tags that the switch attaches to DHCP requests before +forwarding them from the configuration mode interface to DHCP server addresses +specified by ip helper-address commands. Tags attach to +outbound DHCP requests only if you enable the information option on the switch +(ip dhcp relay information option circuit-id). + + +The **no ip dhcp relay information option circuit-id** and +**default ip dhcp relay information option circuit-id** commands restore the default content setting for the +configuration mode interface by removing the corresponding command from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip dhcp relay information option circuit-id +id_label + + +no ip dhcp relay information option circuit-id + + +default ip dhcp relay information option circuit-id + + +**Parameters** + + +**id_label**- Specifies the tag content. Use a format in +alphanumeric characters (maximum 15 characters). + + +**Example** + + +This command configures **x-1234** as the tag content for +packets send from VLAN **200**. + +``` +`switch(config)# **interface vlan 200** +switch(config-if-Vl200)# **ip dhcp relay information option circuit-id x-1234** +switch(config-if-Vl200)#` +``` + + +### ip dhcp snooping + + +The **ip dhcp snooping** command enables DHCP snooping globally +on the switch. Configure DHCP snooping as a set of Layer 2 processes and use it with +DHCP servers to control network access to clients with specific IP/MAC addresses. +The switch supports Option-82 insertion,a DHCP snooping process that allows relay +agents to provide remote-ID and circuit-ID information to DHCP reply and request +packets. DHCP servers use this information to determine the originating port of DHCP +requests and associate a corresponding IP address to that port. DHCP servers use +port information to track host location and IP address usage by authorized physical +ports. + + +DHCP snooping uses the information option (Option-82) to include the switch MAC +address as the router-ID along with the physical interface name and VLAN number as +the circuit-ID in DHCP packets. After adding the information to the packet, the DHCP +relay agent forwards the packet to the DHCP server as specified by the DHCP +protocol. + + +DHCP snooping on a specified VLAN requires all of these conditions to be met: + +- Enable DHCP snooping globally. + +- Enabled insertion of option-82 information in DHCP packets. + +- Enable DHCP snooping on the specified VLAN. + +- Enable DHCP relay on the corresponding VLAN interface. + + +The **no ip dhcp snooping** and **default ip dhcp +snooping** commands disables global DHCP snooping by removing +the **ip dhcp snooping** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp snooping + + +no ip dhcp snooping + + +default ip dhcp snooping + + +**Related Commands** + + +- ip dhcp snooping information option enables +insertion of option-82 snooping data. + +- ip helper-address enables the DHCP relay agent on a +configuration mode interface. + + +**Example** + + +This command globally enables snooping on the switch, displaying DHCP snooping status +prior and after invoking the +command. +``` +`switch(config)# **show ip dhcp snooping** +DHCP Snooping is disabled +switch(config)# **ip dhcp snooping** +switch(config)# **show ip dhcp snooping** +DHCP Snooping is enabled +DHCP Snooping is not operational +DHCP Snooping is configured on following VLANs: + None +DHCP Snooping is operational on following VLANs: + None +Insertion of Option-82 is disabled +switch(config)#` +``` + + +### ip dhcp snooping bridging + + +The **ip dhcp snooping bridging** command enables the DHCP +snooping bridging configuration. + + +The **no ip dhcp snooping bridging** command removes the DHCP +snooping bridging configuration from the ***running-config***. + + +**Command Mode** + + +Global Configuration Mode + + +**Command Syntax** + + +ip dhcp snooping bridging + + +no ip dhcp snooping bridging + + +**Example** + + +This command configures the DHCP snooping bridging. + +``` +`switch# **configure** +switch(config)# **ip dhcp snooping bridging**` +``` + + +### ip dhcp snooping information +option + + +The **ip dhcp snooping information option** command enables the +insertion of option-82 DHCP snooping information in DHCP packets on VLANs where you +have DHCP snooping enabled. DHCP snooping provides a Layer 2 switch process that +allows relay agents to provide remote-ID and circuit-ID information to DHCP reply +and request packets. DHCP servers use this information to determine the originating +port of DHCP requests and associate a corresponding IP address to that port. + + +DHCP snooping uses information option (Option-82) to include the switch MAC address +(router-ID) along with the physical interface name and VLAN number (circuit-ID) in +DHCP packets. After adding the information to the packet, the DHCP relay agent +forwards the packet to the DHCP server through DHCP protocol processes. + + +DHCP snooping on a specified VLAN requires all of these conditions to be met: + +- Enable DHCP snooping globally. + +- Enabled insertion of option-82 information in DHCP packets. + +- Enable DHCP snooping on the specified VLAN. + +- Enable DHCP relay on the corresponding VLAN interface. + + +Ifnot enabling DHCP snooping globally, the **ip dhcp snooping information +option** command persists in +***running-config*** without any operational +effect. + + +The **no ip dhcp snooping information option** and +**default ip dhcp snooping information option** +commands disable the insertion of option-82 DHCP snooping information in DHCP +packets by removing the **ip dhcp snooping information +option** statement from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp snooping information option + + +no ip dhcp snooping information option + + +default ip dhcp snooping information option + + +**Example** + + +These commands enable DHCP snooping on DHCP packets from ports on snooping-enabled +VLANs. DHCP snooping was previously enabled on the +switch. +``` +`switch(config)# **ip dhcp snooping information option** +switch(config)# **show ip dhcp snooping** +DHCP Snooping is enabled +DHCP Snooping is operational +DHCP Snooping is configured on following VLANs: + 100 +DHCP Snooping is operational on following VLANs: + 100 +Insertion of Option-82 is enabled + Circuit-id format: Interface name:Vlan ID + Remote-id: 00:1c:73:1f:b4:38 (Switch MAC) +switch(config)#` +``` + + +### ip dhcp snooping vlan + + +The **ip dhcp snooping vlan** command enables DHCP snooping on +specified VLANs. DHCP snooping provides a Layer 2 process that allows relay agents +to provide remote-ID and circuit-ID information in DHCP packets. DHCP servers use +this data to determine the originating port of DHCP requests and associate a +corresponding IP address to that port. Configure DHCP snooping on a global and VLAN +basis. + + +VLAN snooping on a specified VLAN requires each of these conditions: + +- Enable DHCP snooping globally. + +- Enable insertion of option-82 information in DHCP packets. + +- Enable DHCP snooping on the specified VLAN. + +- Enable DHCP relay on the corresponding VLAN interface. + + +If not enabling global DHCP snooping, the **ip dhcp snooping +vlan** command persists in +***running-config*** without any operational +affect. + + +The **no ip dhcp snooping information option** and +**default ip dhcp snooping information option** +commands disable DHCP snooping operability by removing the **ip dhcp +snooping information option** statement from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip dhcp snooping vlan +v_range + + +no ip dhcp snooping vlan +v_range + + +default ip dhcp snooping vlan +v_range + + +**Parameters** + +- **v_range** - Specifies the range of VLANs to enable +DHCP snooping. Formats include a number, a number range, or a +comma-delimited list of numbers and ranges. Numbers range from +**1** to +**4094**. + + +**Example** + + +These commands enable DHCP snooping globally, DHCP snooping on VLAN interface +**100**, and DHCP snooping on +**vlan100**. +``` +`switch(config)# **ip dhcp snooping** +switch(config)# **ip dhcp snooping information option** +switch(config)# **ip dhcp snooping vlan 100** +switch(config)# **interface vlan 100** +switch(config-if-Vl100)# **ip helper-address 10.4.4.4** +switch(config-if-Vl100)# **show ip dhcp snooping** +DHCP Snooping is enabled +DHCP Snooping is operational +DHCP Snooping is configured on following VLANs: + 100 +DHCP Snooping is operational on following VLANs: + 100 +Insertion of Option-82 is enabled + Circuit-id format: Interface name:Vlan ID + Remote-id: 00:1c:73:1f:b4:38 (Switch MAC) +switch(config)#` +``` + + +### ip hardware fib ecmp +resilience + + +The **ip hardware fib ecmp resilience** command enables +resilient ECMP for the specified IP address prefix and configures a fixed number of +next hop entries in the hardware ECMP table for that prefix. In addition to +specifying the maximum number of next hop addresses that the table can contain for +the prefix, the command includes a redundancy factor that allows duplication of each +next hop address. The fixed table space for the address is the maximum number of +next hops multiplied by the redundancy factor. + + +Resilient ECMP is useful when it is undesirable for routes to be rehashed due to link +flap, as when using ECMP for load balancing. + + +The **no ip hardware fib ecmp resilience** and +**default ip hardware fib ecmp resilience** commands +restore the default hardware ECMP table management by removing the **ip +hardware fib ecmp resilience** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip hardware fib ecmp resilience +net_addr +capacity +nhop_max +redundancy +duplicates + + +no ip hardware fib ecmp resilience +net_addr + + +default ip hardware fib ecmp resilience +net_addr + + +**Parameters** + +- **net_addr** - Specify the IP address prefix managed +by command. (CIDR or address-mask). + +- **nhop_max** - Specify the maximum number of next-hop +addresses for specified IP address prefix. Value range varies by +platform: + +- Helix: <**2** to +**64**> + +- Trident: <**2** to +**32**> + +- Trident II: <**2** to +**64**> + +- **duplicates** - Specifies the redundancy factor. +Value ranges from **1** to +**128**. + + +**Example** + + +This command configures a hardware ECMP table space of 24 entries for the IP address +**10.14.2.2/24**. A maximum of six next-hop addresses +can be specified for the IP address. When the table contains six next-hop addresses, +each appears in the table four times. When the table contains fewer than six +next-hop addresses, each is duplicated until the 24 table entries are +filled. +``` +`switch(config)# **ip hardware fib ecmp resilience 10.14.2.2/24 capacity 6 redundancy 4** +switch(config)#` +``` + + +### ip hardware fib load-balance distribution + + +The **ip hardware fib load-balance distribution** command allows the configuration of +dynamic load balancing (DLB) on ECMP Groups. The **no** and **default** +versions of the command disables the feature and returns the configuration to the traditional hash-based load balancing. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**ip hardware fib load-balance distribution [dynamic | hash] +average-traffic-weight +average_traffic_weight_value +flow-set-size +flow_set_size_value +inactivity +inactivity_value +sampling-period +sampling_period +seed +hash_seed +member-selection [optimal always | optimal timer]** + + +**Parameters** + + +- **hash** - Specify to use hash-based load balancing, the +default behavior. + +- **dynamic** - Specify to use dynamic load balancing with ECMP +groups. + +- **average-traffic-weigh +average_traffic_weight_value** - Specifies a +value between 1 and 15 with a default value of 1. A higher weight value +gives preference to average values over instantaneous values. + +- **flow-set-size +flow_set_size_value** - Specifies the number of +flow set entries allocated to each DLB group. + +- **inactivity +inactivity_value** - Specifies the amount of +time for a flow set to be idle before reassigning to an optimal +port. + + +- **member-selection [optimal always +| optimal timer]** - Specifies when to select an +optimal port for the next packet in a flow. + +- **optimal always** - Specifies to always +pick the optimal member whether or not the inactivity duration +has elapse. + +- **optimal timer** - If the inactivity +duration has elapsed, pick the optimal member. + +- **sampling-period +sampling_period** - Specify the duration +between two consecutive sampling of port state data with a default value +of 16 microseconds. + +- **seed +hash_seed** - Specify a value for random number +generation by optimal candidate random selection process to select a +port when two or more ports have the same optimal quality. + + +**Example** + + +Use the following command to set the DLB member selection to optimal +always: + +``` +`switch(config)# **ip hardware fib load-balance distribution dynamic optimal always**` +``` + + +Use the following command + + +### ip hardware fib next-hop resource optimization + + +The **ip hardware fib next-hop resource optimization** command +enables or disables the resource optimization features on the switch. By default, +EOS enables the feature on the switch. + + +The **no hardware fib next-hop resource optimization** command +removes all the resource optimization features running on the switch. + + +**Command Mode** + + +Global Configuration Mode + + +**Command Syntax** + + +ip hardware fib next-hop resource optimization +options + + +no ip hardware fib next-hop resource optimization +options + + +**Parameters** + +- Use one of the following two options to configure this command: + +- **disabled** - Disable hardware resource +optimization for adjacency programming. + +- **thresholds** - Utilization percentage for +starting or stopping optimization. The resource utilization +percentage value ranges from 0 to 100. It can be set to low and +high. + + +**Examples** + + +- The following command disables all hardware resource optimization +features on the +switch: +``` +`switch# **configure terminal** +switch(config)# **ip hardware fib next-hop resource optimization disabled**` +``` + +- The following command configures the thresholds for starting and +stopping the +optimization: +``` +`switch(config)# **ip hardware fib next-hop resource optimization thresholds low 20 high 80**` +``` + + +### ip hardware fib optimize + + +The **ip hardware fib optimize** command enables IPv4 route +scale. Restart the platform Layer 3 agent to ensure optimization of IPv4 routes with +the agent SandL3Unicast terminate command for the +configuration mode interface. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip hardware fib optimize exact-match prefix-length +prefix-length +prefix-length + + + + +**Parameters** + + +**prefix-length** - Specifies the length of the prefix equal to +**12**, **16**, +**20**, **24**, +**28**, or **32**. Optionally, +add one additional prefix-length limited to the prefix-length of +**32**. + + +**Related Commands** + + +- The agent SandL3Unicast terminate command restarts +the Layer 3 agent to ensure optimization of IPv4 routes. + +- The show platform arad ip route command shows +resources for all IPv4 routes in hardware. Routes with additional hardware +resources appear with an asterisk (*). + +- The show platform arad ip route summary +command displays hardware resource usage of IPv4 routes. + + +**Examples** + +- This configuration command allows configuring prefix lengths +**12** and **32** + +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 12 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +One of the two prefixes in this command has a prefix-length of +**32**, required in the instance when +adding two prefixes. For this command to take effect, restart the +platform Layer 3 agent. + +- This configuration command restarts the platform Layer 3 agent to ensure +optimization of IPv4 +routes. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 +routes, and then re-added to the hardware. + +- This configuration command allows configuring prefix lengths +**32** and +**16**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 32 16** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +One of the two prefixes in this command is a prefix-length of +**32**, required when adding two prefixes. +For this command to take effect, restart the platform Layer 3 agent. + +- This configuration command restarts the platform Layer 3 agent to ensure +optimization of IPv4 +routes. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 +routes, and then re-added to the hardware. + +- This configuration command allows configuring prefix length +**24**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 24** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +In this instance, add one prefix-length, and does not require a +prefix-length of **32**. For this command to take +effect, restart the platform Layer 3 agent. + +- This configuration command restarts the platform Layer 3 agent to ensure +optimization of IPv4 +routes. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 +routes, and then re-added to the hardware. + +- This configuration command allows configuring the prefix length of +**32**. +``` +`switch(config)# **ip hardware fib optimize exact-match prefix-length 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are optimized` +``` + + +For this command to take effect, restart the platform Layer 3 agent. + +- This configuration command restarts the platform Layer 3 agent to ensure +optimization of IPv4 +routes. +``` +`switch(config)# **agent SandL3Unicast terminate** +SandL3Unicast was terminated` +``` + + +Restarting the platform Layer 3 agent results in deletion of all IPv4 +routes, and then re-added to the hardware. + +- This configuration command disables configuring prefix lengths +**12** and +**32**. +``` +`switch(config)# **no ip hardware fib optimize exact-match prefix-length 12 32** +! Please restart layer 3 forwarding agent to ensure IPv4 routes are not optimized` +``` + + +One of the two prefixes in this command has a prefix-length of +**32**, required when configuring two +prefixes. For this command to take effect, restart the platform Layer 3 +agent. + + +### ip hardware fib optimize prefixes + + +The **ip hardware fib optimize prefixes** command in the Global Configuration Mode reserves IPv4 optimized prefixes on +the default and non-default VRFs. + + +The **no** version of the command explicitly removes the configuration from the ***running-config*** on the switch. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**ip hardware fib optimize vrf vrf_name prefixes minimum count num_prefixes** + + +**no ip hardware fib optimize vrf vrf_name prefixes minimum count num_prefixes** + + +**Parameters** + + + - **vrf + vrf_name** - Specify the VRF to minimize prefixes. + +- **prefixes minimum count +num_prefixes** - Specify the minimum number of prefixes to +optimize on the VRF. + + +**Example** + + +Use the following command to create reservations for 25 IPv4 optimized prefixes on VRF blue: + + +``` +`switch(config)# ip hardware fib optimize vrf blue prefixes minimum count 25 +! Please restart the SandL3Unicast agent to reserve space for optimized FIB prefixes` +``` + + +### ip helper-address + + +The **ip helper-address** command enables the DHCP relay agent +on the Interface Configuration Mode and specifies a forwarding address for DHCP +requests. An interface configured with multiple helper-addresses forwards DHCP +requests to all specified addresses. + + +The **no ip helper-address** and **default ip +helper-address** commands remove the corresponding +**ip helper-address** command from +***running-config***. Commands that do not +specify an IP helper-address remove all helper-addresses from the interface. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip helper-address +ipv4_addr [vrf +vrf_name][source-address +ipv4_addr | source-interface +interfaces] + + +no ip helper-address [ipv4_addr] + + +default ip helper-address [ipv4_addr] + + +**Parameters** + +- **vrf** +**vrf_name** - Specifies the user-defined VRF for DHCP +server. + +- **ipv4_addr** - Specifies the DHCP server address +accessed by interface. + +- **source-address** +**ipv4_addr** - Specifies the source IPv4 address to +communicate with DHCP server. + +- **source-interface** +**interfaces** - Specifies the source interface to +communicate with DHCP server. varnames include: + +- **Ethernet** +**eth_num** -  Specifies the Ethernet +interface number. + +- **Loopback** +**lpbck_num** - Specifies the loopback +interface number. Value ranges from **0** +to **1000**. + +- **Management** +**mgmt_num** -  Specifies the management +interface number. Accepted values are **1** +and **2**. + +- **Port-Channel** +{**int_num** | +**sub_int_num**} -  Specifies the +port-channel interface or subinterface number. Value of interface +ranges from **1** to +**2000**. Value of sub-interface +ranges from **1** to +**4094**. + +- **Tunnel** +**tnl_num** - Specifies the tunnel interface +number. Value ranges from **0** to +**255**. + +- **VLAN** +**vlan_num** - Specifies the Ethernet +interface number. Value ranges from **1** +to **4094**. + + +**Related Commands** + +- ip dhcp relay always-on + +- ip dhcp relay information option (Global) + +- ip dhcp relay information option circuit-id + + +**Guidelines** + + +If specifying the source-address parameter, then the DHCP client receives an IPv4 +address from the subnet of source IP address. The source-address must be one of the +configured addresses on the interface. + + +**Examples** + +- This command enables DHCP relay on the VLAN interface +**200**; and configure the switch to forward +DHCP requests received on this interface to the server at +**10.10.41.15**. +``` +`switch(config)# **interface vlan 200** +switch(config-if-Vl200)# **ip helper-address 10.10.41.15** +switch(config-if-Vl200)# **show active** +interface Vlan200 + ip helper-address 10.10.41.15 +switch(config-if-Vl200)#` +``` + +- This command enables DHCP relay on the **interface ewthernet +1/2**; and configures the switch to use +**2.2.2.2** as the source IP address when +relaying IPv4 DHCP messages to the server at +**1.1.1.1**. +``` +`switch(config)# **interface ethernet 1/2** +switch(config-if-Et1/2)# **ip helper-address 1.1.1.1 source-address 2.2.2.2** +switch(config-if-Et1/2)#` +``` + + +### ip icmp redirect + + +The **ip icmp redirect** command enables the transmission of +ICMP redirect messages. Routers send ICMP redirect messages to notify data link +hosts of the availability of a better route for a specific destination. + + +The **no ip icmp redirect** disables the switch from sending +ICMP redirect messages. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip icmp redirect + + +no ip icmp redirect + + +default ip icmp redirect + + +**Example** + + +This command disables the redirect +messages. +``` +`switch(config)# **no ip icmp redirect** +switch(config)# **show running-config** + <-------OUTPUT OMITTED FROM EXAMPLE--------> +! +no ip icmp redirect +ip routing +! + <-------OUTPUT OMITTED FROM EXAMPLE--------> +switch(config)#` +``` + + +### ip load-sharing + + +The **ip load-sharing** command provides the hash seed to an +algorithm the switch uses to distribute data streams among multiple equal-cost +routes to an individual IPv4 subnet. + + +In a network topology using Equal-Cost Multipath routing, all switches performing +identical hash calculations may result in hash polarization, leading to uneven load +distribution among the data paths. Hash polarization is avoided when switches use +different hash seeds to perform different hash calculations. + + +The **no ip load-sharing** and **default ip +load-sharing** commands return the hash seed to the default +value of zero by removing the **ip load-sharing** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**ip load-sharing +hardware +seed** + + +**no ip load-sharing +hardware** + + +**default ip load-sharing +hardware** + + +**Parameters** + +- **hardware** - The ASIC switching device. The +available options depend on the switch platform. + +- **arad** + +- **fm6000** + +- **petraA** + +- **trident** + +- **seed**     The hash seed. Value ranges vary by +switch platform. The default value on all platforms is +**0**. + +- when +**hardware**=**arad**     **seed** +ranges from **0** to +**2**. + +- when +**hardware**=**fm6000**     **seed** +ranges from **0** to +**39**. + +- when +**hardware**=**petraA**     **seed** +ranges from **0** to +**2**. + +- when +**hardware**=**trident**     **seed** +ranges from **0** to +**5**. + + +**Example** + + +This command sets the IPv4 load sharing hash seed to one on FM6000 platform +switches. +``` +`switch(config)# **ip load-sharing fm6000 1** +switch(config)#` +``` + + +### ip local-proxy-arp + + +The **ip local-proxy-arp** command enables local proxy ARP +(Address Resolution Protocol) in the Interface Configuration Mode. When enabling +local proxy ARP, ARP requests received in the Interface Configuration Mode returns +an IP address even when the request comes from within the same subnet. + + +The **no ip local-proxy-arp** and **default ip +local-proxy-arp** commands disable local proxy ARP on the +configuration mode interface by removing the corresponding **ip +local-proxy-arp** command from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip local-proxy-arp + + +no ip local-proxy-arp + + +default ip local-proxy-arp + + +**Example** + + +These commands enable local proxy ARP on VLAN interface +**140** +``` +`switch(config)# **interface vlan 140** +switch(config-if-Vl140)# **ip local-proxy-arp** +switch(config-if-Vl140)# **show active** +interface Vlan140 + ip local-proxy-arp +switch(config-if-Vl140)#` +``` + +. + + +### ip multicast count + + +The **ip multicast count** command enables the IPv4 multicast +route traffic counter of group and source addresses in either bytes or packets. + + +The **no ip multicast count** command deletes all multicast +counters including the routes of group and source addresses. + + +The **no ip multicast count *group_address +source_address***command removes the current +configuration of the specified group and source addresses. It does not delete the +counter because the wildcard is still active. + + +The **default ip multicast count** command reverts the current +counter configuration of multicast route to the default state. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**ip multicast count [group_address +[source_address] | bytes | +packets]** + + +**no ip multicast count [group_address +[source_address] | bytes | +packets]** + + +**default ip multicast count [group_address +[source_address] | bytes | +packets]** + + +**Parameters** + + +- **group_address** - Configures the multicast route +traffic count of the specified group address. + +- **source_address** - Configures the multicast +route traffic count of the specified group and source +addresses. + +- **bytes** - Configures the multicast route traffic +count to bytes. + +- **packets** - Configures the multicast route traffic +count to packets. + + +**Guidelines** + + +This command is supported on the FM6000 platform only. + + +**Examples** + +- This command configures the multicast route traffic count to +bytes. +``` +`switch(config)# **ip multicast count bytes**` +``` + +- This command configures the multicast route traffic count of the specified +group and source +addresses. +``` +`switch(config)# **ip multicast count 10.50.30.23 45.67.89.100**` +``` + +- This command deletes all multicast counters including the routes of group +and source +addresses. +``` +`switch(config)# **no ip multicast count**` +``` + +- This command reverts the current multicast route configuration to the +default +state. +``` +`switch(config)# **default ip multicast count**` +``` + + +### ip proxy-arp + + +The **ip proxy-arp** command enables proxy ARP in the Interface +Configuration Mode. Proxy ARP is disabled by default. When enabled, the switch +responds to all ARP requests, including gratuitous ARP requests, with target IP +addresses that match a route in the routing table. + + +The **no ip proxy-arp** and **default ip +proxy-arp** commands disable proxy ARP on the Interface +Configuration Mode by removing the corresponding **ip +proxy-arp** command from ***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip proxy-arp + + +no ip proxy-arp + + +default ip proxy-arp + + +**Example** + + +This command enables proxy ARP on **interface ethernet +4**. +``` +`switch(config)# **interface ethernet 4** +switch(config-if-Et4)# **ip proxy-arp** +switch(config-if-Et4)#` +``` + + +### ip route + + +The **ip route** command creates a static route. The +destination can be a network segment, and the next-hop address can be either an IPv4 +address or a routable port. When multiple routes exist to a destination prefix, the +route with the lowest administrative distance takes precedence. + + +By default, the administrative distance assigned to static routes is 1. Assigning a higher administrative distance to a static route configures it to be overridden by dynamic routing data. For example, a static route with an administrative distance value of 200 is overridden by OSPF intra-area routes, which have a default administrative distance of 110. + + +Route maps use tags to filter routes. The default tag value on static routes is +0. + + +Multiple routes with the same destination and the same administrative distance +comprise an Equal Cost Multi-Path (ECMP) route. The switch attempts to spread +outbound traffic equally through all ECMP route paths. EOS assigns all paths +comprising an ECMP identical tag values, and commands that change the tag value of a +path change the tag value of all paths in the ECMP. + + +The **no ip route** and **default ip +route** commands delete the specified static route by removing the +corresponding **ip route** command from +***running-config***. Commands that do not list a next-hop address +remove all **ip route** statements with the specified +destination from ***running-config***. If an **ip +route** statement exists for the same IP address in multiple VRFs, +each must be removed separately. Deleting a VRF deletes all static routes in a +user-defined VRF. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip route [vrf_instance] +dest_net +next-hop +[distance][tag_varname][rt_name] + + +no ip route [vrf_instance] +dest_net +[next-hop][distance] + + +default ip route [vrf_instance] +dest_net +[next-hop][distance] + + +**Parameters** + + +- **vrf_instance** - Specifies the VRF instance to +modify. + +- **no parameter** - Changes made to the default +VRF. + +- **vrf** +**vrf_name** - Changes made to the specified +VRF. + +- **dest_net** - Destination IPv4 subnet (CIDR or +address-mask notation). + +- **next-hop** - Location or access method of next hop +device. Options include the following: + +- **ipv4_addr**  -An IPv4 address. + +- **null0** - Null0 interface. + +- **ethernet** +**e_num** - Ethernet interface specified by +**e_num**. + +- **loopback** +**l_num** - Loopback interface specified by +**l_num**. + +- **management** +**m_num** - Management interface specified by +**m_num**. + +- **port-channel** +**p_num** - Port-channel interface specified +by **p_num**. + +- **vlan** +**v_num** - VLAN interface specified by +**v_num**. + +- **VXLAN** +**vx_num** - VXLAN interface specified by +**vx_num**. + +- **distance** Administrative distance assigned to the +route. Options include the following: + +- **no parameter** - Route assigned default +administrative distance of one. + +- **1-255** - The administrative distance +assigned to route. + +- **tag_varname** - Static route tag. Options include +the following: + +- **no parameter** - Assigns default static +route tag of **0**. + +- **tag** +**t_value** - Static route tag value. +**t_value** ranges from +**0** to +**4294967295**. + +- **rt_nameE** - Associates descriptive text to the +route. Options include the following: + +- **no parameter**  - No text is associated with +the route. + +- **name** +**descriptive_text** - Assign the specified +text to the route. + + +**Related Command** + + +The [ip route nexthop-group](/um-eos/eos-nexthop-groups#xx1145545) command creates +a static route that specifies a Nexthop Group to determine the Nexthop address. + + +**Example** + + +This command creates a static route in the default +VRF. +``` +`switch(config)# **ip route 172.17.252.0/24 vlan 2000** +switch(config)#` +``` + + +### ip routing + + +The **ip routing** command enables IPv4 routing. When enabling +IPv4 routing, the switch attempts to deliver inbound packets to destination IPv4 +addresses by forwarding them to interfaces or next hop addresses specified by the +forwarding table. + + +The **no ip routing** and **default ip +routing** commands disable IPv4 routing by removing the +**ip routing** command from +***running-config***. When disabling IPv4 +routing, the switch attempts to deliver inbound packets to their destination MAC +addresses. When this address matches the switch MAC address, EOS delivers the packet +to the CPU. EOS discards IP packets with IPv4 destinations that differ from the +switch address. The **delete-static-routes** varname removes +static entries from the routing table. + + +IPv4 routing is disabled by default. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip routing [vrf_instance] + + +no ip routing +[delete_routes][vrf_instance + + +default ip routing +[delete_routes][vrf_instance] + + +**Parameters** + +- **delete_routes** - Resolves routing table static entries +when routing is disabled. + +- **no parameter** - Routing table retains +static entries. + +- **delete-static-routes** - Removes static +entries from the routing table. + +- **vrf_instance** - Specifies the VRF instance to +modify. + +- **no parameter** -Changes made to the +default VRF. + +- **vrf** +**vrf_name** - Changes made to the specified +user-defined VRF. + + +**Example** + + +This command enables IPv4 +routing. +``` +`switch(config)# **ip routing** +switch(config)#` +``` + + +### ip source binding + + +Layer 2 Port-Channels support IP source guard (IPSG), not member ports. The IPSG +configuration on port channels supersedes the configuration on the physical member +ports. Therefore, source IP MAC binding entries should be configured on port +channels. When configured on a port channel member port, IPSG does not take effect +until you delete the port from the Port Channel configuration. + + +Note: IP source bindings are also used by static ARP inspection. + + +The **no ip source binding** and **default ip source +binding** commands exclude parameters from IPSG filtering, and +set the default for **ip source binding**. + + +**Command Mode** + + +interface-Ethernet Configuration + + +**Command Syntax** + + +ip source binding +[ip_address][mac_address] +vlan [vlan_range] +interface [interface] + + +no ip source binding +[ip_address][mac_address] +vlan [vlan_range] +interface [interface] + + +default ip source binding +[ip_address][mac_address] +vlan [vlan_range] +interface [interface] + + +**Parameters** + +- **ip_address** - Specifies the IP ADDRESS. + +- **mac_address** - Specifies the MAC ADDRESS. + +- **vlan +vlan_range** - Specifies the VLAN ID range. + +- **interface +interface** - Specifies the Ethernet +interface. + + +**Related Commands** + +- ip verify source + +- show ip verify source + + +**Example** + + +This command configures source IP-MAC binding entries to IP address +**10.1.1.1**, MAC address +**0000.aaaa.1111**, VLAN ID +**4094**, and **interface ethernet +36**. +``` +`switch(config)# **ip source binding 10.1.1.1 0000.aaaa.1111 vlan 4094 interface +ethernet 36** +switch(config)#` +``` + + +### ip verify source + + +The **ip verify source** command +configures IP source guard (IPSG) applicable only to Layer 2 ports. When configured +on Layer 3 ports, IPSG does not take effect until this interface converts to Layer +2. + + +Layer 2 Port-Channels support IPSG, not member ports. The IPSG +configuration on port channels supersedes the configuration on the physical member +ports. Therefore, source IP MAC binding entries should be configured on port +channels. When configured on a port channel member port, IPSG does not take effect +until you delete the port from the Port Channel configuration. + + +The +**no ip verify source** and **default ip +verify source** commands exclude VLAN IDs from IPSG filtering, +and set the default for **ip verify +source**. + + +**Command Mode** + + +Interface-Ethernet +Configuration + + +**Command Syntax** + + +ip verify source vlan +[vlan_range] + + +no ip verify source +[vlan_range] + + +default ip verify +source + + +**Parameters** + + +**vlan_range** +- Specifies the VLAN ID range. + +**Related Commands** + +- ip source binding + +- show ip verify source + + +**Example** + +This command excludes VLAN IDs +**1** through **3** from IPSG +filtering. When enabled on a trunk port, IPSG filters the inbound IP packets on all +allowed VLANs. IP packets received on VLANs **4** through +**10** on **Ethernet 36** filter +by IPSG, while permitting those received on VLANs **1** +through +**3**. +``` +`switch(config)# **no ip verify source vlan 1-3** +switch(config)# **interface ethernet 36** +switch(config-if-Et36)# **switchport mode trunk** +switch(config-if-Et36)# **switchport trunk allowed vlan 1-10** +switch(config-if-Et36)# **ip verify source** +switch(config-if-Et36)#` +``` + + +### ip verify + + +The **ip verify** command configures Unicast Reverse Path +Forwarding (uRPF) for inbound IPv4 packets on the configuration mode interface. uRPF +verifies the accessibility of source IP addresses in packets that the switch +forwards. + + +uRPF defines two operational modes: strict mode and loose mode. + +- **Strict mode** - uRPF verifies that a packetreceived on the interface +with the routing table entry specifies for its return packet. + +- **Loose mode** - uRPF validation does not consider the inbound packet’s +ingress interface only if a valid return path exists. + + +The **no ip verify** and **default ip +verify** commands disable uRPF on the configuration mode +interface by deleting the corresponding **ip verify** command +from ***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-Channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +ip verify unicast source reachable-via +rpf_mode + + +no ip verify unicast + + +default ip verify unicast + + +**Parameters** + + +**rpf_mode** - Specifies the uRPF mode. Options include: + +- **any** - Loose mode. + +- **rx** - Strict mode. + +- **rx allow-default** - Strict mode. All inbound +packets forward if a default route is defined. + + +**Guidelines** + + +The first IPv4 uRPF implementation briefly disrupts IPv4 unicast routing. Subsequent +**ip verify** commands on any interface do not disrupt +IPv4 routing. + + +**Examples** + +- This command enables uRPF loose mode on **VLAN interface +17**. +``` +`switch(config)# **interface vlan 17** +switch(config-if-Vl17)# **ip verify unicast source reachable-via any** +switch(config-if-Vl17)# **show active** + interface Vlan17 + ip verify unicast source reachable-via any +switch(config-if-Vl17)#` +``` + +- This command enables uRPF strict mode on **VLAN interface +18**. +``` +`switch(config)# **interface vlan 18** +switch(config-if-Vl18)# **ip verify unicast source reachable-via rx** +switch(config-if-Vl18)# **show active** + interface Vlan18 + ip verify unicast source reachable-via rx +switch(config-if-Vl18)#` +``` + + +### ipv4 routable 240.0.0.0/4 + + +The **ipv4 routable 240.0.0.0/4** command assignes an class E +addresses to an interface. When configured, the class E address traffic are routed +through BGP, OSPF, ISIS, RIP, static routes and programmed to the FIB and kernel. By +default, this command is disabled. + + +The **no ipv4 routable 240.0.0.0/4** and **default +ipv4 routable 240.0.0.0/4** commands disable IPv4 Class E +routing by removing the **ipv4 routable 240.0.0.0/4** command +from ***running-config***. + + +IPv4 routable **240.0.0.0/4** routing is disabled by +default. + + +**Command Mode** + + +Router General Configuration + + +**Command Syntax** + + +ipv4 routable 240.0.0.0/4 + + +no ipv4 routable 240.0.0.0/4 + + +default ipv4 routable 240.0.0.0/4 + + +**Example** + + +These commands configure an IPv4 Class E (**240/4**) address to +an interface. +``` +`switch(config)# **router general** +switch(config-router-general)# **ipv4 routable 240.0.0.0/4**` +``` + + +### load-balance cluster + + +The **load-balance cluster** command enters the Cluster Load Balancing (CLB) Configuration Mode +and configure parameters for cluster load-balancing on a network. The **no** version of the command +removes the configuration from the ***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**load-balance cluster** + + +**no load-balance cluster** + + +**Parameters** + + +- **load-balance cluster** - Enters the Cluster Load Balancing (CLB) Configuration Mode. + + +**Example** + + +Use the following command to enter the Cluster Load Balancing (CLB) Configuration Mode: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)#` +``` + + +### load-balance method + + +The **load-balance** command in the Cluster Load Balancing +Configuration Mode configures the method of load-balancing traffic on the cluster. +Currently, EOS supports Round-Robin and Spine types of load balancing. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +**Command Syntax** + + +**load-balance method [flow round-robin] [spine port-index]** + + +**no load-balance method [flow round-robin] [spine +port-index]** + + +**Parameters** + + +- **load-balance method flow round-robin** - Specify the load-balancing method as round-robin for flows. + +- **load-balance method spine port-index** - Specify the load-balancing method as port index for spines. + + +**Examples** + + +Use the following commands to configure round-robin as the load-balancing flow method: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **load-balance method flow round-robin** +switch(config-clb)#` +``` + + +Use the following commands to configure port-index as the load-balancing spine method: + + +``` +`switch(config)# **load-balance cluster** + switch(config-clb)# **load-balance method spine port-index** + switch(config-clb)#` +``` + + +### member Ethernet + + +The **member Ethernet** command in the Port Group Host Configuration Mode configures per port hardware interfaces for cluster load balancing on the switch. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Group Host Configuration + + +**Command Syntax** + + +**member Ethernet interface_number** + + +**no member Ethernet interface_number** + + +**Parameters** + + +- **member Ethernet interface_number** - Configure the Ethernet hardware interface number from 1 to 46 per port group. + + +**Example** + + +Add Ethernet 1 hardware interface to MyPortGroup: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port group host MyPortGroup** +switch(config-clb-port-host-MyPortGroup)# **member Ethernet 1**` +``` + + + + +### platform barefoot bfrt vrf + + +The **platform barefoot bfrt vrf** command configures the +forwarding plane agent on supported platforms to restart and listen on the +configured VRF for connections. If left unconfigured, the switch uses the default +VRF for the IP and port for the BfRuntime server. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +platform barefoot bfrt vrf +vrf_name + + +**Parameter** + + +**VRF name** - Specify the name for the configured +VRFconnections. + + +**Example** + + +These commands configure the forwarding plane agent to restart and listen on the +configured VRF for +connections. +``` +`switch(config)# **vrf instance management** +switch(config-vrf-management)# **exit** +switch(config)# **platform barefoot bfrt 0.0.0.0 50052** +switch(config)# **platform barefoot bfrt vrf ** +switch(config)# **int management1** +switch(config-if-Ma1)# **vrf management**` +``` + + +### platform trident forwarding-table +partition + + +The **platform trident forwarding-table partition** command +provides a shared table memory for L2, L3 and algorithmic LPM entries that can be +partitioned in different ways. + + +Instead of fixed-size tables for L2 MAC entry tables, L3 IP forwarding tables, and +Longest Prefix Match (LPM) routes, the tables can be unified into a single shareable +forwarding table. + + +Note: Changing the Unified Forwarding Table mode causes the forwarding agent to restart, +briefly disrupting traffic forwarding on all ports. + + +The **no platform trident forwarding-table partition** and +**default platform trident forwarding-table +partition** commands remove the  **platform trident +forwarding-table partition** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +platform trident forwarding-table partition +size + + +no platform trident forwarding-table partition + + +default platform trident forwarding-table partition + + +**Parameters** + + +**size**       Size of partition. Options include the +following: + +- **0**      288k l2 entries, 16k host entries, 16k lpm +entries. + +- **1**      224k l2 entries, 80k host entries, 16k lpm +entries. + +- **2**      160k l2 entries, 144k host entries, 16k lpm +entries. + +- **3**      96k l2 entries, 208k host entries, 16k lpm +entries. + + +The default value is **2** (160k l2 entries, 144k host entries, +16k lpm entries). + + +**Examples** + +- This command sets the single shareable forwarding table to option 2 that +supports 160k L2 entries, 144k host entries, and 16k LPM +entries. +``` +`switch(config)# **platform trident forwarding-table partition 2** +switch(config)` +``` + +- This command sets the single shareable forwarding table to option 3 that +supports 96k L2 entries, 208k host entries, and 16k LPM entries. Since the +switch was previously configured to option 2, you’ll see a warning notice +before the changes are +implemented. +``` +`switch(config)# **platform trident forwarding-table partition 3** + +Warning: StrataAgent will restart immediately` +``` + + +### platform trident routing-table +partition + + +The **platform trident routing-table partition** command +manages the partition sizes for the hardware LPM table that stores IPv6 routes of +varying sizes. + + +An IPv6 route of length /64 (or shorter) requires half the hardware resources of an +IPv6 route longer than /64. The switch installs routes of varying lengths in +different table partitions. This command specifies the size of these partitions to +optimize table usage. + + +Note: Changing the routing table partition mode causes the forwarding agent to restart, +briefly disrupting traffic forwarding on all ports. + + +The **no platform trident routing-table partition** and +**default platform trident routing-table partition** +commands restore the default partitions sizes by removing the **platform +trident routing-table partition** command from +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +platform trident routing-table partition +size + + +no platform trident routing-table partition + + +default platform trident routing-table partition + + +**Parameters** + + +**size**      Size of partition. Options include the +following: + +- **1**      16k IPv4 entries, 6k IPv6 (/64 and smaller) +entries, 1k IPv6 (any prefix length). + +- **2**      16k IPv4 entries, 4k IPv6 (/64 and smaller) +entries, 2k IPv6 (any prefix length). + +- **3**      16k IPv4 entries, 2k IPv6 (/64 and smaller) +entries, 3k IPv6 (any prefix length). +The default value is +**2** (16k IPv4 entries, 4k IPv6 (/64 and +smaller) entries, 2k IPv6 (any prefix length). + + +**Restrictions** + + +Partition allocation cannot be changed from the default setting when enabling uRPF +for IPv6 traffic. + + +**Example** + + +This command sets the shareable routing table to option **1** +that supports **6K** prefixes equal to or shorter than +**/64** and **1K** prefixes +longer than +**/64**. +``` +`switch(config)# **platform trident routing-table partition 1** +switch(config)` +``` + + +### port group host + + +The **port group host** command enters the Port Group Host Configuration mode and configures additional port parameters for Cluster Load Balancing by identifying the ports +connected to the GPU server. + + +The **no** version of the command deletes the configuration from the ***running-config***. + + +**Command Mode** + + +Cluster Load Balancing Configuration + + +Port Host Group Configuration + + +**Command Syntax** + + +**port group host word** + + +**no port group host word** + + +**Parameters** + + +- **port group host word** - Specify a name for the port group host. + + +**Example** + + +Use the following commands to create a port group host, MyPortGroup, and enter Port Host Configuration Mode: + + +``` +`switch(config)# **load-balance cluster** +switch(config-clb)# **port host group MyPortGroup** +switch(config-clb-port-host-MyPortGroup)#` +``` + + +### rib fib policy + + +The **rib fib policy** command enables FIB policy for a +particular VRF under router general configuration mode. The FIB policy can be +configured to advertise only specific RIB routes and exclude all other routes. + + +For example, a FIB policy can be configured that does not place routes associated +with a specific origin in the routing table. These routes do not forward data +packets and these routes do not advertise by the routing protocol to neighbors. + + +The **no rib fib policy** and **default rib fib +policy** commands restore the switch to its default state by +removing the corresponding rib fib policy command from +***running-config***. + + +**Command Mode** + + +Router General Configuration + + +**Command Syntax** + + +rib [ipv4 | ipv6] +fib policy +name + + +no rib [ipv4 | ipv6] +fib policy +name + + +default rib [ipv4 | ipv6] +fib policy +name + + +**Parameters** + +- **ipv4** - IPv4 configuration commands. + +- **ipv6** - IPv6 configuration commands. + +- **name** - Route map name. + + +**Example** + + +The following example enables FIB policy for IPv4 in the default VRF, using the route +map, +**map1**. +``` +`Switch(config)# **router general** +Switch(config-router-general)# **vrf default** +Switch(config-router-general-vrf-default)# **rib ipv4 fib policy map1**` +``` + + +### show arp + + +The **show arp** command displays all ARP tables. This command +differs from the show ip arp command in that it shows MAC +bindings for all protocols, whereas show ip arp only displays +MAC address – IP address bindings. Addresses display with their host name by +including the ***resolve*** argument. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show arp +[vrf_inst][format][host_addr][host_name][intf][mac_addr][data] + + +**Parameters** + + +The **vrf_inst** and **format** +parameters are always listed first and second. The **data** +parameter is always listed last. All other parameters can be placed in any order. + +- **vrf_inst** - Specifies the VRF instance to display +data. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. +System default VRF is specified by +**default**. + +- **format** - Displays format of host address. Options +include the following: + +- **no parameter** - Entries associate hardware address with an +IPv4 address. + +- **resolve** - Enter associate hardware address +with a host name (if it exists). + +- **host_addr** -  IPv4 address to filter routing table +entries. Options include the following: + +- **no parameter** - Routing table entries not filtered by host +address. + +- **ipv4_addr** - Table entries matching +specified IPv4 address. + +- **host_name** - Host name to filter routing table +entries. Options include the following: + +- **no parameter** - Routing table entries not filtered by host +name. + +- **host** +**hostname** - Entries matching +**hostname** (text). + +- **intf** - Interfaces for which command displays +status. + +- **no parameter** - Routing table entries not filtered by +interface. + +- **interface ethernet** +**e_num** - Routed Ethernet interface +specified by **e_num**. + +- **interface loopback** +**l_num** - Routed loopback interface +specified by **l_num**. + +- **interface management** +**m_num** - Routed management interface +specified by **m_num**. + +- **interface port-channel** +**p_num** - Routed port channel Interface +specified by **p_num**. + +- **interface vlan** +**v_num** - VLAN interface specified by +**v_num**. + +- **interface VXLAN** +**vx_num** - VXLAN interface specified by +**vx_num**. + +- **mac_addr** - MAC address to filter routing table +entries. Options include the following: + +- **no parameter** - Routing table entries not filtered by +interface MAC address. + +- **mac_address** +**mac_address** - Entries matching +**mac_address** (dotted hex notation – +H.H.H). + +- **data** - Detail of information provided by command. +Options include the following: + +- **no parameter** - Routing table entries. + +- **summary** - Summary of ARP table +entries. + +- **summary total** - Number of ARP table entries. + + +**Related Commands** + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +This command displays the ARP +table. +``` +`switch> **show arp** +Address Age (min) Hardware Addr Interface +172.22.30.1 0 001c.730b.1d15 Management1 +172.22.30.133 0 001c.7304.3906 Management1 +switch>` +``` + + +### show arp agent + + +The **show arp agent** command displays the aggregate of all ARP entries that the CLI and other switch +agents requested the ARP agent to install in EOS. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show arp agent[ipv4 | ipv6] +[cache | dynamic | capacity +| [interface +interface] + + +**Parameters** + + +- **[ipv4 | ipv6]** - Display details about +IPv4 or IPv6 parameters. + +- **cache** - Display neighbor cache information. + +- **dynamic** - Display the capacity of the dynamic neighbor +resolutions. + +- **capacity** - Display the configured capacity of address +resolutions. + +- **interface +interface** - Specify the interface to display ARP +agent details. + + +**Examples** + + +Use the following command to display IPv4 ARP agent details for Ethernet interface +1: +``` +`switch# **show arp agent ipv4 cache dynamic capacity interface ethernet 1 summary** +Ethernet1 + Cache Entry Kind: dynamic + Capacity: 100 + Entries: 5` +``` + + +Use the following command to display IPv6 ARP agent details for Ethernet interface +1: +``` +`switch# **show arp agent ipv6 cache dynamic capacity interface ethernet 1 summary** +Ethernet1 + Link-local excluded + Cache Entry Kind: dynamic + Capacity: 250 + Entries: 5` +``` + + +Executing the command without the **summary** parameter displays +the list of addresses tracked towards +capacity: +``` +`switch# **show arp agent ipv4 cache dynamic capacity interface ethernet 1** +Ethernet1 + Cache Entry Kind: dynamic + Capacity: 100 + Entries: 5 + 10.0.0.1 + 10.0.0.2 + 10.0.0.3 + 10.0.0.4 + 10.0.0.5` +``` + + +``` +`switch# **show arp agent ipv6 cache dynamic capacity interface ethernet 1** + Ethernet1 + Link-local excluded + Cache Entry Kind: dynamic + Capacity: 250 + Entries: 5 + 1::1 + 1::2 + 1::3 + 1::4 + 1::5` +``` + + +### show dhcp server + + +Use the **show dhcp server** command to display DHCP server information. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + + +show dhcp server [ipv4 | ipv6 | +leases | vrf] + + +**Parameters** + +- **ipv4** Displays details related to IPv4. + +- **ipv6** Displays details related to IPv6. + +- **leases** Displays active leases. + +- **A.B.C.D/E** IPv4 subnet. + +- **NAME** Subnet name. + + + +**Examples** + +- The following output displays DHCPv4. + + + +``` +`switch# **show dhcp server ipv4** +IPv4 DHCP Server is active +Debug log is enabled +DNS server(s): 10.2.2.2 +DNS domain name: mydomain +Lease duration: 1 days 0 hours 0 minutes +TFTP server: +myserver (Option 66) +10.0.0.3 (Option 150) +TFTP file: fileFoo +Active Leases: 1 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Inactive (Could not determine VRF) + Ethernet2 Inactive (Not in default VRF) + Ethernet3 Inactive (Kernel interface not created yet) + Ethernet4 Inactive (Not up) + Ethernet5 Inactive (No IP address) + Ethernet6 Active + +Vendor information: +Vendor ID: default + Sub-options Data +---------------- ---------------- + 1 192.0.2.0, 192.0.2.1 + +Vendor ID: vendorFoo + Sub-options Data +---------------- ----------- + 2 192.0.2.2 + 3 “data” + +Subnet: 10.0.0.0/8 +Subnet name: subnetFoo +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.1.1.1 10.2.2.2 +Lease duration: 3 days 3 hours 3 minutes +Default gateway address: 10.0.0.3 +TFTP server: +subnetServerFoo (Option 66) +10.0.0.4 (Option 150) +TFTP boot file: subnetFiletftp +Active leases: 1 +Reservations: +MAC address: 1a1b.1c1d.1e1f +IPv4 address: 10.0.0.1 + +MAC address: 2a2b.2c2d.2e2f +IPv4 address: 10.0.0.2` +``` + +- In this example, DHCPv6 is configured with subnet + **fe80::/10** while being enabled on +**Ethernet1** with address +**fe80::1/64** and on + **Ethernet3** with address +**fe80::2/64**. +``` +`switch# **show dhcp server ipv6** +IPv6 DHCP server is active +Debug log is enabled +DNS server(s): fe80::6 +DNS domain name: testaristanetworks.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: + Interface Status +--------------- ------ + Ethernet1 Active + Ethernet3 Active + +Subnet: fe80::/10 +Subnet name: foo +Range: fe80::1 to fe80::3 +DNS server(s): fe80::4 fe80::5 +Direct: Inactive (Multiple interfaces match this subnet: Ethernet1 Ethernet3) +Relay: Active +Active leases: 0` +``` + +- This example illustrates when multiple subnets match an interface. In this example, + DHCPv6 is configured with subnets **fc00::/7** and +**fe80::/10** while being enabled on **Ethernet1** with + address **fe80::1/10** and + **fc00::1/7**. +``` +`switch# **show dhcp server ipv6** +IPv6 DHCP server is active +DNS server(s): fc00::2 +DNS domain name: testaristanetworks.com +Lease duration: 1 days 3 hours 30 minutes +Active leases: 0 +IPv6 DHCP interface status: + Interface Status +--------------- ------ + Ethernet1 Active + +Subnet: fc00::/7 +Subnet name: data +Range: fc00::1 to fc00::5 +DNS server(s): fc00::6 fc00::8 +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0 + +Subnet: fe80::/10 +Subnet name: bar +Direct: Inactive (This and other subnets match interface Ethernet1) +Relay: Active + +Active leases: 0` +``` + +- After disabling a subnet, the **show dhcp server** command + displays the disable message with a reason. The number of active leases of the + disabled subnets displays as **0**. In this example, there are + overlapping subnets. +``` +`switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Active + +Subnet: 10.0.0.0/24 (Subnet is disabled - overlapping subnet 10.0.0.0/8) +Range: 10.0.0.1 to 10.0.0.10 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0 + +Subnet: 10.0.0.0/8 (Subnet is disabled - overlapping subnet 10.0.0.0/24) +DNS server(s): +Default gateway address: 10.0.0.3 +Active leases: 0` +``` + +- In this example, the display output shows overlapping + ranges. +``` +`switch# **show dhcp server** +IPv4 DHCP Server is active +DNS server(s): 10.2.2.2 +Lease duration: 1 days 0 hours 0 minutes +Active Leases: 0 +IPv4 DHCP interface status: + Interface Status +------------------------------------------------- + Ethernet1 Active + +Subnet: 10.0.0.0/8 (Subnet is disabled - range 10.0.0.9-10.0.0.12 overlaps with an existing pool) +Range: 10.0.0.1 to 10.0.0.10 +Range: 10.0.0.9 to 10.0.0.12 +DNS server(s): 10.3.3.3 10.4.4.4 +Default gateway address: 10.0.0.4 +Active leases: 0` +``` + +- This example displays duplicate static IP address + reservation. +``` +`Subnet: 10.0.0.0/8 (Subnet is disabled - ipv4-address 10.0.0.11 is reserved more than once) +Subnet name: +DNS server(s): +Default gateway address: 10.0.0.3 +Active leases: 0 +Reservations: +MAC address: 1a1b.1c1d.1e1f +IPv4 address: 10.0.0.11 + +MAC address: 2a2b.2c2d.2e2f +IPv4 address: 10.0.0.11` +``` + +- Use the **show dhcp server leases** command to display + detailed information about the IP addresses allocated by the DHCP Server (including + the IP address, the expected end time for that address, the time when the address is + handed out, and the equivalent MAC + address). +``` +`switch# **show dhcp server leases** +10.0.0.10 +End: 2019/06/20 17:44:34 UTC +Last transaction: 2019/06/19 17:44:34 UTC +MAC address: 5692.4c67.460a + +2000:0:0:40::b +End: 2019/06/20 18:06:33 UTC +Last transaction: 2019/06/20 14:36:33 UTC +MAC address: 165a.a86d.ffac` +``` + + + + +### show dhcp server leases + + +Use the **show dhcp server leases** command to display DHCP server lease information. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + + +**show dhcp server leases [ipv4 | ipv6**] + + +**Parameters** + +- **ipv4** - Displays details related to IPv4. + +- **ipv6** - Displays details related to IPv6. + + +**Example** + + +Use the **show dhcp server leases** command to display detailed +information about the IP addresses allocated by the DHCP Server including the IP +address, the expected end time for that address, the time when assigning the address, +and the equivalent MAC +address. +``` +`switch# **show dhcp server leases** +10.0.0.10 +End: 2019/06/20 17:44:34 UTC +Last transaction: 2019/06/19 17:44:34 UTC +MAC address: 5692.4c67.460a + +2000:0:0:40::b +End: 2019/06/20 18:06:33 UTC +Last transaction: 2019/06/20 14:36:33 UTC +MAC address: 165a.a86d.ffac` +``` + + +### show hardware capacity + + +The **show hardware capacity** command displays the utilization +of the hardware resources: + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**show hardware capacity** + + +**Example** + + +The following command is used to show the utilization of the hardware resources: + +``` +`switch# **show hardware capacity** +Forwarding Resources Usage + +Table Feature Chip Used Used Free Committed Best Case High + Entries (%) Entries Entries Max Watermark + Entries +------ --------------- ------- ---------- ------- ---------- ------------ ----------- --------- +ECMP 0 0% 4095 0 4095 0 +ECMP Mpls 0 0% 4095 0 4095 0 +ECMP Routing 0 0% 4095 0 4095 0 +ECMP VXLANOverlay 0 0% 4095 0 4095 0 +ECMP VXLANTunnel 0 0% 3891 0 3891 0` +``` + + +### show hardware resource DlbEcmpGroupTable agent * + + +The following platforms use the **show hardware resource DlbEcmpGroupTable agent *** command: + + +- DCS-7050CX4 + +- DCS-7050DX4-32S-F + +- DCS-7050PX4-32S-F + +- DCS-7050SDX4 + +- DCS-7050SPX4 + +- 7358X4-SC + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show hardware resource DlbEcmpGroupTable agent * + + +**Example** + + +Use the following command to display information about DLB and ECMP +groups: +``` +`switch# **show hardware resource DlbEcmpGroupTable agent *** +Resource: bcm56881_b0::Common::DlbEcmpGroupTable +Feature agent: StrataL3Unicast +Unit id: 511 +View: entry +eId OC flowBase flowSize memPtr inDur member0Port member0PortValid ... +--- --- -------- -------- —---- —--- —---------- —--------------- ... + 2 1 512 1 2 50 11 1 ...` +``` + + +### show hardware resource l3 summary + + +The **show hardware resource l3 summary** command displays a summary of used hardware entries and the total available capacity for Layer 3 +features such as next-hops and ECMP groups. The command allows assessing the health of the forwarding plane and determining if the switch approaches resource limits. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +**show hardware resource l3 summary** + + + + +**Example** + + +Enter the command to display the following information: + + +``` +`(config)# **show hardware resource l3 summary** +Source lookup : disabled +Adjacency sharing : disabled +Route deletion delay : 0.0 seconds + +L3 interfaces : 1/4096 + +Nexthops : 59/32768 +Overlay nexthops : 50/24576 +Underlay nexthops : 9/24576 +Shared Overlay and Underlay nexthop tables : True +Tunnel Nexthops : 0/8192 + +Overlay ECMP groups : 0/4096 +Overlay ECMP members : 0/65536 +Underlay ECMP groups : 0/4096 +Underlay ECMP members : 0/65536 +Shared Overlay and Underlay ECMP member tables : True +Tunnel ECMP groups : 0/4096 +Tunnel ECMP members : 0/8192 + +IPv4 routes : 67 +IPv6 routes : 44 +IPv4 unprogrammed routes : 0 +IPv6 unprogrammed routes : 0 +IPv4 multicast(*, G) routes : 0/32768 +IPv6 multicast(*, G) routes : 0/32768 +IPv4 multicast(S, G) routes : 0/32768 +IPv6 multicast(S, G) routes : 0/16384 + +ALPM mode : 3-Level +Memory format : narrow mode +TCAM usage : 5/2304 +Level-2 cells : 2/6144 +Level-2 buckets : 1/1024 +Level-2 mem geometry : 1024 (buckets), 6 (banks) +Level-3 cells : 23/65536 +Level-3 buckets : 5/8192 +Level-3 mem geometry : 8192 (buckets), 8 (banks) +Pivots : 2 (ipv4 : 1, ipv6 : 1) +Subpivots : 5 (ipv4 : 2, ipv6 : 3) +ALPM routes : 111 (ipv4 : 67, ipv6 : 44) + +Multicast replication groups : 6/16384 +Repl head entries : 0/147456 +Repl list entries : 0/147456 + +Mystation TCAM entries : 1/128 + +Virtual ports : 2/8192` +``` + + +Table 4. Display Output + +| Field Name +| Description +| + + +| **Source lookup** +| Unicast Reverse Path Forwarding (uRPF) enabled or disabled. +| + + +| **Adjacency sharing** +| Enabled or disabled. +| + + +| **Route deletion delay** +| Indicates, in *seconds*, the delay of route deletion. +| + + +| **L3 interfaces x/n** +| x indicates the number of configured L3 ports. +n indicates the number of possible L3 ports. +| + + +| **Nexthops x/n** +| x indicates the total number of next-hops. +n indicates the maximum number of possible next-hops. +| + + +| **Overlay nexthops x/n** +| x indicates the number of L3 next-hops plus +VXLAN overlay next-hops. +n indicates the +maximum number of L3 next-hops plus VXLAN overlay next-hops. + + +x indicates the number of L3 next-hops.* + + +n indicates the maximum number of L3 +next-hops.* +| + + +| **Underlay nexthops x/n** +| x indicates the number of VXLAN underlay +next-hops plus HER next-hops plus underlay multicast routing +next-hops. +n indicates the maximum +number of L3 next-hops plus VXLAN overlay next-hops. + + +x indicates the number of VXLAN underlay +next-hops plus HER next-hops.* + + +n indicates the maximum number of VXLAN underlay +next-hops.* +| + + +| **Shared Overlay and Underlay nexthop tables** +| **False** or +**True*** +| + + +| **Tunnel Nexthops x/n** +| Not applicable +x indicates the number of +VXLAN underlay next-hops plus HER next-hops.* + + +n indicates the maximum possible VXLAN overlay +next-hops.* +| + + +| **Overlay ECMP groups x/n** +| x indicates the number of VXLAN overlay ECMP +groups. +n indicates the maximum +possible VXLAN overlay groups. + + +x indicates the number of L3 routing ECMP +groups.* + + +n indicates the maximum possible number of L3 +routing ECMP groups.* +| + + +| **Overlay ECMP members x/n** +| x indicates the number of VXLAN overlay ECMP +groups. +n indicates the maximum +possible VXLAN overlay members. + + +x indicates the number of normal L3 ECMP +groups.* + + +n indicates the maximum +possible number of L3 ECMP members.* +| + + +| **Underlay ECMP groups x/n** +| x indicates the number of VXLAN underlay ECMP +groups plus L3 routing ECMP groups. +n indicates +the maximum possible of combined groups. + + +x indicates the number of VXLAN underlay ECMP +groups.* + + +n indicates the maximum possible number of VXLAN +underlay ECMP groups.* +| + + +| **Underlay ECMP members x/n** +| x indicates the number of VXLAN underlay ECMP +groups plus L3 routing ECMP members. +n +indicates the maximum possible number of combined members. + + +x indicates the number of VXLAN underlay ECMP +groups.* + + +n indicates the maximum possible number +of VXLAN underlay ECMP members.* +| + + +| **Shared Overlay and Underlay ECMP member tables** +| Always True or Always False*. +| + + +| **Tunnel ECMP groups : x/n** +| Not Applicable +x indicates the number of +VXLAN overlay ECMP groups.* + + +n indicates the maximum possible number of VXLAN +underlay ECMP groups.* +| + + +| **Tunnel ECMP members : x/n** +| Not Applicable +x indicates the number of +VXLAN overlay ECMP members.* + + +n indicates the maximum possible number of VXLAN +underlay ECMP members.* +| + + +| **IPv4 routes** +| Indicates the number of programmed IPv4 routes. +| + + +| **IPv6 routes** +| Indicates the number of programmed IPv6 routes. +| + + +| **IPv4 unprogrammed routes** +| Indicates the number of unprogrammed IPv4 routes. +| + + +| **IPv6 unprogrammed routes** +| Indicates the number of unprogrammed IPv6 routes. +| + + +| **Host table usage : x/n** +| x indicates the number of host table entries +used. +n indicates the total number of +host table entries used. + + Not Applicable* +| + + +| **IPv4 unicast routes : x/n** +| x indicates the number of unicast IPv4 routes. + +n indicates the total number of +possible IPv4 routes. + + Not Applicable* +| + + +| **IPv6 unicast routes : x/n** +| x indicates the number of unicast IPv6 routes. + +n indicates the total number of +possible IPv6 routes. + + Not Applicable* +| + + +| **IPv4 multicast(*, G) routes : x/n** +| x indicates the number of IPv4 multicast +routes from any source to multicast group. +n +indicates the maximum number of possible IPv4 multicast routes +from any source to multicast group. + + +Not +Applicable* +| + + +| **IPv6 multicast(*, G) routes : x/n** +| x indicates the number of IPv6 multicast +routes from any source to multicast group. +n +indicates the maximum number of possible IPv6 multicast routes +from any source to multicast group. + + Not Applicable* +| + + +| **IPv4 multicast(S, G) routes : x/n** +| x indicates the number of IPv4 multicast +routes from a source IP to a multicast group. +n +indicates the maximum number of IPv4 multicast routes from a +source IP to a multicast group. + + Not Applicable* +| + + +| **IPv6 multicast(S, G) routes : x/n** +| x indicates the number of IPv6 multicast +routes from a source IP to a multicast group. +n +indicates the maximum number of IPv6 multicast routes from a +source IP to a multicast group. + + Not Applicable* +| + + +| **Memory format** +| Narrow or wide mode. +| + + +| **TCAM usage : x/n** +| x indicates the number of TCAM entries on the switch. + +n indicates the maximum number of programmable TCAM entries on the switch. +| + + +| **Level-2 cells** : x/n** +| x indicates the number of cells used on the switch. + +n indicates the total number of cells in the ALPM level 2 table on the switch. +| + + +| **Level-2 buckets** : x/n** +| x indicates the number of buckets used on the switch. + +n indicates the total number of buckets in the ALPM level 2 table on the switch. +| + + +| **Level-2 mem geometry x(buckets),n(banks)** +| Indicates the number of buckets and banks on the switch. +| + + +| **Level-3 cells** : x/n** +| x indicates the number of cells used on the switch. + +n indicates the total number of cells in the ALPM level 3 table on the switch. +| + + +| **Level-3 buckets** : x/n** +| x indicates the number of buckets used on the switch. + +n indicates the total number of buckets in the ALPM level 3 table on the switch. +| + + +| **Level-3 mem geometry x(buckets),n(banks)** +| x indicates the number of Level 3 buckets used. + +n indicates the number of Level 3 banks used. +| + + +| **Pivots : n (ipv4 : x , ipv6 : y)** +| n indicates the number of pivots in the APLM +tree. +x indicates the number of IPv4 +pivots. + + +y indicates the number of IPv6 pivots. +| + + +| **Subpivots : n (ipv4 : x , ipv6 : y)**> +| n indicates the number of subpivots in the +APLM tree. +x indicates the number of IPv4 +subpivots. + + +y indicates the number of +IPv6 subpivots. +| + + +| **ALPM routes : n (ipv4 : x , ipv6 : y)** +| n indicates the number of APLM routes. + +x indicates the number of IPv4 APLM +routes. + + +y indicates the number of IPv6 APLM +routes. +| + + +| **Multicast replication groups : x/n** +| x indicates the number of multicast +replication groups programmed, includes L3MC, L2MC +group. +n indicates the total number of +multicast replication groups programmed, includes L3MC, L2MC +group. + + Not applicable* +| + + +| **Repl head entries : x/n** +| x indicates the number of Repl head entries +programmed. +n indicates the total +number of Repl head entries programmed. + + Not +applicable* +| + + +| **Repl list entries : x/n** +| x indicates the number of Repl list entries +programmed. +n indicates the total number of +Repl list entries programmed. + + +Not applicable* +| + + +| **Mystation TCAM entries : x/n** +| x indicates the number of Mystation TCAM entries programmed. + +n indicates the total number of Mystation TCAM entries programmed. +| + + +| **Virtual ports : x/n** +| x indicates the number of virtual ports. + +n indicates the maximum possible number of virtual ports. +| + + +*****Applies to the following platforms: + + + + + - DCS-7060X6-32PE-F + + - DCS-7060X6-32PE-N + + - DCS-7060X6-64PE-F + + + + +### show interface tunnel + + +The **show interface tunnel** command displays the interface +tunnel information. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show interface tunnel +number + + +**Parameter** + + +**number** - Specifies the tunnel interface number. + + +**Example** + + +This command displays tunnel interface configuration information for tunnel interface +**10**. +``` +`switch# **show interface tunnel 10** + +Tunnel10 is up, line protocol is up (connected) + Hardware is Tunnel, address is 0a01.0101.0800 + Internet address is 192.168.1.1/24 + Broadcast address is 255.255.255.255 + Tunnel source 10.1.1.1, destination 10.1.1.2 + Tunnel protocol/transport GRE/IP + Key disabled, sequencing disabled + Checksumming of packets disabled + Tunnel TTL 10, Hardware forwarding enabled + Tunnel TOS 10 + Path MTU Discovery + Tunnel transport MTU 1476 bytes + Up 3 seconds` +``` + + +### show ip + + +The **show ip** command displays IPv4 routing, IPv6 routing, +IPv4 multicast routing, and VRRP status on the switch. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip + + +**Example** + + +This command displays IPv4 routing +status. +``` +`switch> **show ip** + +IP Routing : Enabled +IP Multicast Routing : Disabled +VRRP: Configured on 0 interfaces + +IPv6 Unicast Routing : Enabled +IPv6 ECMP Route support : False +IPv6 ECMP Route nexthop index: 5 +IPv6 ECMP Route num prefix bits for nexthop index: 10 + +switch>` +``` + + +### show ip arp + + +The **show ip arp** command displays ARP cache entries that map +an IPv4 address to a corresponding MAC address. The table displays addresses by the +host names when the command includes the ***resolve*** +argument. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip arp +[vrf_inst][format][host_addr][host_name][intf][mac_addr][data] + + +**Parameters** + + +The **vrf_inst** and **format** +parameters list first and second. The **data** parameter lists +last. All other parameters can be placed in any order. + +- **vrf_inst** - Specifies the VRF instance to display +data. + +- **no parameter** - Specifies the Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. +Specifies the system default VRF +**default**. + +- **format** - Displays format of host address. The +options include the following: + +- **no parameter** - Displays entries associated hardware address +with an IPv4 address. + +- **resolve** - Displays the specific associated +hardware address with a host name (if it exists). + +- **host_addrR** - Specifies the IPv4 address to filter +routing table entries. The options include the following: + +- **no parameter** - Routing table entries not filtered by host +address. + +- **ipv4_addr**   - Table entries matching +specified IPv4 address. + +- **host_name** - Host name by to filter routing table +entries. The options include the following: + +- **no parameter** - Routing table entries not filtered by host +name. + +- **host** +**hostname** - Entries with matching +**hostname** (text). + +- **interface_name** - Interfaces to display status. + +- **no parameter** - Routing table entries not filtered by +interface. + +- **interface ethernet** +**e_num** - Routed Ethernet interface +specified by **e_num**. + +- **interface loopback** +**l_num** - Routed loopback interface +specified by **l_num**. + +- **interface management** +**m_num** - Routed management interface +specified by **m_num**. + +- **interface port-channel** +**p_num**  - Routed port channel Interface +specified by **p_num**. + +- **interface vlan** +**v_num** - VLAN interface specified by +**v_num**. + +- **interface VXLAN** +**vx_num**  - VXLAN interface specified by +**vx_num**. + +- mac_addr  - MAC address to filter routing table entries. +The options include the following: + +- **no parameter** - Routing table entries not filtered by +interface MAC address. + +- **mac_address** +**mac_address** - Entries with matching +**mac_address** (dotted hex notation – +H.H.H). + +- **data** - Details of information provided by command. +The varnames include the following: + +- **no parameter** - Routing table entries. + +- **summary** - Summary of ARP table +entries. + +- **summary total** - Number of ARP table +entries. + + +**Examples** + +- This command displays ARP cache entries that map MAC addresses to IPv4 +addresses. +``` +`switch> **show ip arp** + +Address Age (min) Hardware Addr Interface +172.25.0.2 0  004c.6211.021e Vlan101, Port-Channel2 +172.22.0.1 0  004c.6214.3699 Vlan1000, Port-Channel1 +172.22.0.2 0  004c.6219.a0f3 Vlan1000, Port-Channel1 +172.22.0.3 0  0045.4942.a32c Vlan1000, Ethernet33 +172.22.0.5 0  f012.3118.c09d Vlan1000, Port-Channel1 +172.22.0.6 0  00e1.d11a.a1eb Vlan1000, Ethernet5 +172.22.0.7 0  004f.e320.cd23 Vlan1000, Ethernet6 +172.22.0.8 0  0032.48da.f9d9 Vlan1000, Ethernet37 +172.22.0.9 0  0018.910a.1fc5 Vlan1000, Ethernet29 +172.22.0.11 0  0056.cbe9.8510 Vlan1000, Ethernet26 +switch>` +``` + +- This command displays ARP cache entries that map MAC addresses to IPv4 +addresses. The ouput displays host names assigned to IP addresses in place +of the +address. +``` +`switch> **show ip arp resolve** + +Address Age (min) Hardware Addr Interface +green-vl101.new         0  004c.6211.021e Vlan101, Port-Channel2 +172.22.0.1 0  004c.6214.3699 Vlan1000, Port-Channel1 +orange-vl1000.n         0  004c.6219.a0f3 Vlan1000, Port-Channel1 +172.22.0.3 0  0045.4942.a32c Vlan1000, Ethernet33 +purple.newcompa         0  f012.3118.c09d Vlan1000, Port-Channel1 +pink.newcompany         0  00e1.d11a.a1eb Vlan1000, Ethernet5 +yellow.newcompa         0  004f.e320.cd23 Vlan1000, Ethernet6 +172.22.0.8 0  0032.48da.f9d9 Vlan1000, Ethernet37 +royalblue.newco         0  0018.910a.1fc5 Vlan1000, Ethernet29 +172.22.0.11 0  0056.cbe9.8510 Vlan1000, Ethernet26 +switch>` +``` + + +### show ip arp inspection +statistics + + +The **show ip arp inspection statistics** command displays the +statistics of inspected ARP packets. For a specified VLAN specified, the output +displays only VLANs with ARP inspection enabled. If no VLAN specified, the output +displays all VLANs with ARP inspection enabled. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip arp inspection statistics [vlan +[vid]|[interface] +interface +intf_slot | intf_port] + + +**Parameters** + + +- **vid** - Specifies the VLAN interface ID. + +- **interface** - Specifies the interface (e.g., +Ethernet). + +- **intf_slot** - Specifies the interface +slot. + +- **intf_port** - Specifies the interface +port. + +- **INTF** - Specifies the VLAN interface slot and +port. + + +**Related Commands** + +- ip arp inspection limit + +- ip arp inspection trust + +- ip arp inspection vlan + + +**Examples** + +- This command display statistics of inspected ARP packets for VLAN +**10**. +``` +`switch(config)# **show ip arp inspection statistics vlan 10** + +Vlan : 10 +-------------- +ARP +Req Forwarded = 20 +ARP Res Forwarded = 20 +ARP Req Dropped = 1 +ARP Res Dropped = 1 +Last invalid ARP: +Time: 10:20:30 ( 5 minutes ago ) +Reason: Bad IP/Mac match +Received on: Ethernet 3/1 +Packet: +  Source MAC: 00:01:00:01:00:01 +  Dest MAC: 00:02:00:02:00:02 +  ARP Type: Request +  ARP Sender MAC: 00:01:00:01:00:01 +  ARP Sender IP: 1.1.1 + +switch(config)#` +``` + +- This command displays ARP inspection statistics for Ethernet interface +**3/1**. +``` +`switch(config)# **show ip arp inspection statistics ethernet interface 3/1** +interface : 3/1 +-------- +ARP Req Forwarded = 10 +ARP Res Forwarded = 10 +ARP Req Dropped = 1 +ARP Res Dropped = 1 + +Last invalid ARP: +Time: 10:20:30 ( 5 minutes ago ) +Reason: Bad IP/Mac match +Received on: VLAN 10 +Packet: +  Source MAC: 00:01:00:01:00:01 +  Dest MAC: 00:02:00:02:00:02 +  ARP Type: Request +  ARP Sender MAC: 00:01:00:01:00:01 +  ARP Sender IP: 1.1.1 + +switch(config)#` +``` + + +### show ip arp inspection +vlan + + +The **show ip arp inspection vlan** command displays the +configuration and operation state of ARP inspection. For a VLAN range specified, the +output displays only VLANs with ARP inspection enabled. If no VLAN specified, the +output displays all VLANs with ARP inspection enabled. The operation state turns to +***Active*** when hardware becomes ready to +trap ARP packets for inspection. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip arp inspection vlan [list] + + +**Parameters** + + +**list** - Specifies the VLAN interface number. + + +**Related Commands** + +- ip arp inspection limit + +- ip arp inspection trust + +- show ip arp inspection statistics + + +**Example** + + +This command displays the configuration and operation state of ARP inspection for +VLANs **1** through +**150**. +``` +`switch(config)# **show ip arp inspection vlan 1 - 150** + +VLAN 1 +---------- +Configuration +: Enabled +Operation State : Active +VLAN 2 +---------- +Configuration +: Enabled +Operation State : Active +{...} +VLAN 150 +---------- +Configuration +: Enabled +Operation State : Active + +switch(config)#` +``` + + +### show ip dhcp relay counters + + +The **show ip dhcp relay counters** command displays the number +of DHCP packets received, forwarded, or dropped on the switch and on all interfaces +enabled as DHCP relay agents. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip dhcp relay counters + + +**Example** + + +This command displays the IP DHCP relay counter +table. +``` +`switch> **show ip dhcp relay counters** + + | Dhcp Packets | +Interface | Rcvd Fwdd Drop | Last Cleared +----------|----- ---- -----|--------------------- + All Req | 376 376 0 | 4 days, 19:55:12 ago + All Resp | 277 277 0 | + | | + Vlan1000 | 0 0 0 | 4 days, 19:54:24 ago + Vlan1036 | 376 277 0 | 4 days, 19:54:24 ago + +switch>` +``` + + +### show ip dhcp relay + + +The **show ip dhcp relay** command displays the DHCP relay +agent configuration status on the switch. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip dhcp relay + + +**Example** + + +This command displays the DHCP relay agent configuration +status. +``` +`switch> **show ip dhcp relay** +DHCP Relay is active +DHCP Relay Option (82)is enabled +DHCP Relay vendor-specific suboption (9) under information option (82) +DHCP Smart Relay is enabled +Interface: Vlan100 + DHCP Smart Relay is disabled + DHCP servers: 10.4.4.4 +switch>` +``` + + +### show ip dhcp snooping +counters + + +The **show ip dhcp snooping counters** command displays +counters that track the quantity of DHCP request and reply packets received by the +switch. The output displays data for each VLAN or aggregated for all VLANs with +counters for packets dropped. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip dhcp snooping counters +[counter_typedebug] + + +**Parameters** + + +**counter_type** - Displays the type of counter. + +- **no parameter** - Command displays counters for each VLAN. + +- **debug** - Command displays aggregate counters and +drop cause counters. + + +**Examples** + +- This command displays the number of DHCP packets sent and received on each +VLAN. +``` +`switch> **show ip dhcp snooping counters** + + | Dhcp Request Pkts | Dhcp Reply Pkts | +Vlan | Rcvd Fwdd Drop | Rcvd Fwdd Drop | Last Cleared +-----|------ ----- ------|----- ---- ------|------------- + 100 | 0 0 0 | 0 0 0 | 0:35:39 ago + +switch>` +``` + +- This command displays the number of DHCP packets sent on the +switch. +``` +`switch> **show ip dhcp snooping counters debug** +Counter Snooping to Relay Relay to Snooping +----------------------------- ----------------- ----------------- +Received 0 0 +Forwarded 0 0 +Dropped - Invalid VlanId 0 0 +Dropped - Parse error 0 0 +Dropped - Invalid Dhcp Optype 0 0 +Dropped - Invalid Info Option 0 0 +Dropped - Snooping disabled 0 0 + +Last Cleared: 3:37:18 ago +switch>` +``` + + +### show ip dhcp snooping +hardware + + +The **show ip dhcp snooping hardware** command displays +internal hardware DHCP snooping status on the switch. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip dhcp snooping hardware + + +**Example** + + +This command DHCP snooping hardware +status. +``` +`switch> **show ip dhcp snooping hardware** +DHCP Snooping is enabled +DHCP Snooping is enabled on following VLANs: + None + Vlans enabled per Slice + Slice: FixedSystem + None +switch>` +``` + + +### show ip dhcp snooping + + +The **show ip dhcp snooping** command displays the DHCP +snooping configuration. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip dhcp snooping + + +**Example** + + +This command displays the switch’s DHCP snooping +configuration. +``` +`switch> **show ip dhcp snooping** +DHCP Snooping is enabled +DHCP Snooping is operational +DHCP Snooping is configured on following VLANs: + 100 +DHCP Snooping is operational on following VLANs: + 100 +Insertion of Option-82 is enabled + Circuit-id format: Interface name:Vlan ID + Remote-id: 00:1c:73:1f:b4:38 (Switch MAC) +switch>` +``` + + +### show ip hardware fib summary + + +The **show ip hardware fib summary** command displays the +statistics of the RECMP. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show ip hardware fib summary + + +**Example** + + +The following command is used to show the statistics of +RECMP: +``` +`switch# **show ip hardware fib summary** +Fib summary +----------- +Adjacency sharing: disabled +BFD peer event: enabled +Deletion Delay: 0 +Protect default route: disabled +PBR: supported +URPF: supported +ICMP unreachable: enabled +Max Ale ECMP: 600 +UCMP weight deviation: 0.0 +Maximum number of routes: 0 +Fib compression: disabled +**Resource optimization for adjacency programming: enabled +Adjacency resource optimization thresholds: low 20, high 80**` +``` + + +**About the Output** + + +The last two lines of the output displays if feature is enabled and the corresponding +threshold values for starting and stopping the optimization process. + + +### show ip interface + + +The **show ip interface** command displays the status of specified +interfaces that are configured as routed ports. The command provides the following +information: + +- Interface description + +- Internet address + +- Broadcast address + +- Address configuration method + +- Proxy-ARP status + +- MTU size + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip interface [interface_name] +[vrf_inst] + + +**Parameters** + + +- **interface_name** - Interfaces for which command displays +status. + +- **no parameter** - All routed interfaces. + +- **ipv4_addr** - Neighbor IPv4 address. + +- **ethernet** +**e_range** - Routed Ethernet interfaces specified by +**e_range**. + +- **loopback** +**l_range** - Routed loopback interfaces specified by +**l_range**. + +- **management** +**m_range** - Routed management interfaces specified by +**m_range**. + +- **port-channel** +**p_range** -  Routed port channel Interfaces specified by +**p_range**. + +- **vlan** +**v_range** - VLAN interfaces specified by +**v_range**. + +- **VXLAN** +**vx_range** - VXLAN interfaces specified by +**vx_range**. + +- **vrf_inst** - Specifies the VRF instance for which data is +displayed. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. System +default VRF is specified by **default**. + + +**Examples** + +- This command displays IP status of configured VLAN interfaces numbered between +**900** and +**910**. +``` +`switch> **show ip interface vlan 900-910** +! Some interfaces do not exist +Vlan901 is up, line protocol is up (connected) + Description: ar.pqt.mlag.peer + Internet address is 170.23.254.1/30 + Broadcast address is 255.255.255.255 + Address determined by manual configuration + Proxy-ARP is disabled + MTU 9212 bytes +Vlan903 is up, line protocol is up (connected) + Description: ar.pqt.rn.170.23.254.16/29 + Internet address is 170.23.254.19/29 + Broadcast address is 255.255.255.255 + Address determined by manual configuration + Proxy-ARP is disabled + MTU 9212 bytes` +``` + +- This command displays the configured TCP Maximum Segment Size (MSS) ceiling value of +**1436** bytes for an Ethernet interface +**25**. + +``` +`switch> **show ip interface ethernet 25** +Ethernet25 is up, line protocol is up (connected) + Internet address is 10.1.1.1/24 + Broadcast address is 255.255.255.255 + IPv6 Interface Forwarding : None + Proxy-ARP is disabled + Local Proxy-ARP is disabled + Gratuitous ARP is ignored + IP MTU 1500 bytes + IPv4 TCP MSS egress ceiling is 1436 bytes` +``` + + +### show ip interface brief + + +Use the **show ip interface brief** command output to display +the status summary of the specified interfaces that are configured as routed ports. +The command provides the following information for each specified interface: + +- IP address + +- Operational status + +- Line protocol status + +- MTU size + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show ip interface [interface_name] +[vrf_inst] brief** + + +**Parameters** + +- **interface_name** - Interfaces for which command +displays status. + +- **no parameter** -  All routed +interfaces. + +- **ipv4_addr** - Neighbor IPv4 address. + +- **ethernet** +**e_range** - Routed Ethernet interfaces +specified by **e_range**. + +- **loopback** +**l_range** -Routed loopback interfaces +specified by **l_range**. + +- **management** +**m_range** -  Routed management interfaces +specified by **m_range**. + +- **port-channel** +**p_range** -Routed port channel Interfaces +specified by **p_range**. + +- **vlan** +**v_range** - VLAN interfaces specified by +**v_range**. + +- **VXLAN** +**vx_range** - VXLAN interface range specified +by **vx_range**. + +- **vrf_inst** - Specifies the VRF +instance for which data is displayed. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** -Specifies name of VRF +instance. System default VRF is specified by +**default**. + + +**Example**This command displays the summary status of VLAN interfaces +**900-910**. +``` +`switch> **show ip interface vlan 900-910 brief** + +! Some interfaces do not exist +Interface IP Address Status Protocol MTU +Vlan901 170.33.254.1/30 up up 9212 +Vlan902 170.33.254.14/29 up up 9212 +Vlan905 170.33.254.17/29 up up 1500 +Vlan907 170.33.254.67/29 up up 9212 +Vlan910 170.33.254.30/30 up up 9212` +``` + + +### show ip route + + +The **show ip route** command displays routing table entries +that are in the Forwarding Information Base (FIB), including static routes, routes +to directly connected networks, and dynamically learned routes. Multiple equal-cost +paths to the same prefix are displayed contiguously as a block, with the destination +prefix displayed only on the first line. + + +The **show running-config** command displays configured +commands not in the FIB. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show ip route +[vrf_instance][address][route_type][info_level][prefix]** + + +**Parameters** + + +The **vrf_instance** and **address** +parameterslist first and second, respectively. All other parameters can be placed +in any order. + +- **vrf_instance** - Specifies the VRF instance to +display data. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. +System default VRF is specified by +**default**. + +- **address** - Filters routes by IPv4 address or +subnet. + +- **no parameter** - All routing table entries. + +- **ipv4_addr** - Routing table entries matching +specified address. + +- **ipv4_subnet** - Routing table entries +matching specified subnet (CIDR or address-mask). + +- **route_type** - Filters routes by specified protocol +or origin. varnames include: + +- **no parameter** - All routing table entries. + +- **aggregate** - Entries for BGP aggregate +routes. + +- **bgp** - Entries added through BGP +protocol. + +- **connected** - Entries for routes to networks +directly connected to the switch. + +- **isis** - Entries added through ISIS +protocol. + +- **kernel** - Entries appearing in Linux kernel +but not added by EOS software. + +- **ospf** - Entries added through OSPF +protocol. + +- **rip** - Entries added through RIP +protocol. + +- **static** - Entries added through CLI +commands. + +- **vrf** - Displays routes in a VRF. + +- **Iinfo_level** - Filters entries by next hop +connection. varnames include: + +- **no parameter**  - Filters routes whose next hops are directly +connected. + +- **detail** - Displays all routes. + +- **prefix** - Filters routes by prefix. + +- **no parameter** - Specific route entry that matches the address +parameter. + +- **longer-prefixes** -  All subnet route +entries in range specified by address parameter. + + +**Related Command** + + +The cli vrf command specifies the context-active VRF. + + +**Examples** + +- This command displays IPv4 routes learned through +BGP. +``` +`switch> **show ip route bgp** +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, A - Aggregate + + B E 170.44.48.0/23 [20/0] via 170.44.254.78 + B E 170.44.50.0/23 [20/0] via 170.44.254.78 + B E 170.44.52.0/23 [20/0] via 170.44.254.78 + B E 170.44.54.0/23 [20/0] via 170.44.254.78 + B E 170.44.254.112/30 [20/0] via 170.44.254.78 + B E 170.53.0.34/32 [1/0] via 170.44.254.78 + B I 170.53.0.35/32 [1/0] via 170.44.254.2 + via 170.44.254.13 + via 170.44.254.20 + via 170.44.254.67 + via 170.44.254.35 + via 170.44.254.98` +``` + +- This command displays the unicast IP routes installed in the +system. +``` +`switch# **show ip route** + VRF name: default +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I - ISIS, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route + +Gateway of last resort is not set + C 10.1.0.0/16 is directly connected, Vlan2659 + C 10.2.0.0/16 is directly connected, Vlan2148 + C 10.3.0.0/16 is directly connected, Vlan2700 + S 172.17.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.18.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.19.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.20.0.0/16 [1/0] via 172.24.0.1, Management1 + S 172.22.0.0/16 [1/0] via 172.24.0.1, Management1 + C 172.24.0.0/18 is directly connected, Management1` +``` + +- This command displays the leaked routes from a source +VRF. +``` +`switch# **show ip route vrf VRF2 20.0.0.0/8** +... +S L 20.0.0.0/8 [1/0] (source VRF VRF1) via 10.1.2.10, Ethernet1` +``` + +- This example displays an IPv4 route with Forwarding Equivalency Class (FEC) +with an IPv4 next hop and an IPv6 next hop route. + +``` +`switch#**show ip route 10.1.0.0/23** + VRF: default + Source Codes: + C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B - Other BGP Routes, + B I - iBGP, B E - eBGP, R - RIP, I L1 - IS-IS level 1, + I L2 - IS-IS level 2, O3 - OSPFv3, A B - BGP Aggregate, + A O - OSPF Summary, NG - Nexthop Group Static Route, + V - VXLAN Control Service, M - Martian, + DH - DHCP client installed default route, + DP - Dynamic Policy Route, L - VRF Leaked, + G - gRIBI, RC - Route Cache Route, + CL - CBF Leaked Route + +**S 10.1.0.0/23 [1/0] + via 2000:0:0:43::2, Ethernet2 + via 10.0.1.2, Ethernet4**` +``` + + +### show ip route age + + +The **show ip route age** command displays the time when the +route for the specified network was present in the routing table. It does not +account for the changes in parameters like metric, next-hop etc. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show ip route +address +age** + + +**Parameters** + + +**address** - Filters routes by IPv4 address or subnet. + +- **ipv4_addr** - Routing table entries matching +specified address. + +- **ipv4_subnet** - Routing table entries matching +specified subnet (CIDR or address-mask). + + +**Example** + + +This command shows the amount of time since the last update to IP route +**172.17.0.0/20**. +``` +`switch> **show ip route 172.17.0.0/20 age** +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I - ISIS, A - Aggregate + + B E 172.17.0.0/20 via 172.25.0.1, **age 3d01h** +switch>` +``` + + +### show ip route gateway + + +The **show ip route gateway** command displays IP addresses of +all gateways (next hops) used by active routes. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip route [vrf_instance] +gateway + + +**Parameters** + + +**vrf_instance** - Specifies the VRF instance for which data is +displayed. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. System +default VRF is specified by **default**. + + +**Related Commands** + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +This command displays next hops used by active +routes. +``` +`switch> **show ip route gateway** +The following gateways are in use: + 172.25.0.1 Vlan101 + 172.17.253.2 Vlan3000 + 172.17.254.2 Vlan3901 + 172.17.254.11 Vlan3902 + 172.17.254.13 Vlan3902 + 172.17.254.17 Vlan3903 + 172.17.254.20 Vlan3903 + 172.17.254.66 Vlan3908 + 172.17.254.67 Vlan3908 + 172.17.254.68 Vlan3908 + 172.17.254.29 Vlan3910 + 172.17.254.33 Vlan3911 + 172.17.254.35 Vlan3911 + 172.17.254.105 Vlan3912 + 172.17.254.86 Vlan3984 + 172.17.254.98 Vlan3992 + 172.17.254.99 Vlan3992 +switch>` +``` + + +### show ip route host + + +The **show ip route host** command displays all host routes in +the host forwarding table. Host routes have a destination prefix of the entire +address ( prefix = **255.255.255.255** or mask = +**/32**). Each entry includes a code of the route’s +purpose: + +- **F** - Static routes from the FIB. + +- **R**  - Routes defined because the IP address is an interface +address. + +- **B** - Broadcast address. + +- **A** - Routes to any neighboring host for which the switch has an ARP +entry. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip route [vrf_instance] +host + + +**Parameters** + + +**vrf_instance** - Specifies the VRF instance to display +data. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. System +default VRF is specified by **default**. + + +**Related Commands** + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +This command displays all host routes in the host forwarding +table. +``` +`switch> **show ip route host** +R - receive B - broadcast F - FIB, A - attached + +F 127.0.0.1 to cpu +B 172.17.252.0 to cpu +A 172.17.253.2 on Vlan2000 +R 172.17.253.3 to cpu +A 172.17.253.10 on Vlan2000 +B 172.17.253.255 to cpu +B 172.17.254.0 to cpu +R 172.17.254.1 to cpu +B 172.17.254.3 to cpu +B 172.17.254.8 to cpu +A 172.17.254.11 on Vlan2902 +R 172.17.254.12 to cpu + +F 172.26.0.28 via 172.17.254.20 on Vlan3003 + via 172.17.254.67 on Vlan3008 + via 172.17.254.98 on Vlan3492 +                via 172.17.254.2 on Vlan3601 + via 172.17.254.13 on Vlan3602 +via 172.17.253.2 on Vlan3000 +F 172.26.0.29 via 172.25.0.1 on Vlan101 +F 172.26.0.30 via 172.17.254.29 on Vlan3910 +F 172.26.0.32 via 172.17.254.105 on Vlan3912 +switch>` +``` + + +### show ip route match tag + + +The **show ip route match tag** command displays the route tag +assigned to the specified IPv4 address or subnet. Route tags are added to static +routes for use by route maps. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show ip route [vrf_instance] +address +match tag** + + +**Parameters** + +- **VRF_INSTANCE** - Specifies the VRF instance to +display data. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. +System default VRF is specified by +**default**. + +- **address** - Displays routes of specified IPv4 +address or subnet. + +- **ipv4_addr** - Routing table entries +matching specified IPv4 address. + +- **ipv4_subnet** - Routing table entries +matching specified IPv4 subnet (CIDR or address-mask). + + +**Example** + + +This command displays the route tag for the specified +subnet. +``` +`switch> **show ip route 172.17.50.0/23 match tag** +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian + + O E2 172.17.50.0/23 tag 0 + +switch>` +``` + + +### show ip route summary + + +The **show ip route summary** command displays the number of +routes, categorized by destination prefix, in the routing table. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show ip route [vrf_instance] +summary + + +**Parameters** + + +**vrf_instance** - Specifies the VRF instance for which data +is displayed. + +- **no parameter** - Context-active VRF. + +- **vrf** +**vrf_name** - Specifies name of VRF instance. System +default VRF is specified by **default**. + + +**Example** + + +This command displays a summary of the routing table +contents. +``` +`switch> **show ip route summary** +Route Source Number Of Routes +------------------------------------- +connected 15 +static 0 +ospf 74 + Intra-area: 32 Inter-area:33 External-1:0 External-2:9 + NSSA External-1:0 NSSA External-2:0 +bgp 7 + External: 6 Internal: 1 +internal 45 +attached 18 +aggregate 0 +switch>` +``` + + +### show ip verify source + + +The **show ip verify source** +command displays the IP source guard (IPSG) configuration, operational states, and +IP-MAC binding entries for the configuration mode interface. + + +**Command +Mode** + + +EXEC + + +**Command Syntax** + + +show ip +verify source [vlan | +detail] + + +**Parameters** + +- **vlan** - Displays all VLANs configured in +**no ip verify source vlan**. + +- **detail** - Displays all source IP-MAC binding +entries configured for IPSG. + + +**Related Commands** + +- ip source binding + +- ip verify source + + +**Examples** + +- This command verifies the IPSG configuration and operational +states. +``` +`switch(config)# **show ip verify source** +Interface       Operational State +--------------- ------------------------ +Ethernet1       IP source guard enabled +Ethernet2       IP source guard disabled` +``` + +- This command displays all VLANs configured in **no ip verify +source vlan**. Hardware programming errors, e.g.,VLAN +classification failed, indicate in the operational state. If an error +occurs, this VLAN considered as enabled for IPSG. Traffic on this VLAN +filters by +IPSG. +``` +`switch(config)# **show ip verify source vlan** +IPSG disabled on VLANS: 1-2 +VLAN            Operational State +--------------- ------------------------ +1               IP source guard disabled +2               Error: vlan classification failed` +``` + +- This command displays all source IP-MAC binding entries configured for IPSG. +If programmed into hardware, a source binding entry considered active. +Permits IP traffic matching any active binding entry. If configured. a +source binding entry on an interface or a VLAN with the operational state of +IPSG disabled, this entry does not install in the hardware, in which case an +“IP source guard disabled” state displays. If a port channel has no member +port configured, binding entries configured for this port channel do not +install in hardware, and a “Port-Channel down” state +displays. +``` +`switch(config)# **show ip verify source detail** +Interface      IP Address  MAC Address     VLAN  State +-------------- ----------- --------------- ----- ------------------------ +Ethernet1      10.1.1.1    0000.aaaa.1111   5     active +Ethernet1      10.1.1.5    0000.aaaa.5555   1     IP source guard disabled +Port-Channel1  20.1.1.1    0000.bbbb.1111   4     Port-Channel down` +``` + + +### show platform arad ip +route summary + + +The **show platform arad ip route summary** command shows +hardware resource usage of IPv4 routes. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform arad ip route summary + + +**Related Commands** + +- The agent SandL3Unicast terminate command enables +restarting the layer 3 agent to ensure IPv4 routes are optimized. + +- The ip hardware fib optimize command enables IPv4 +route scale. + +- The show platform arad ip route command shows +resources for all IPv4 routes in hardware. Routes that use the additional +hardware resources will appear with an asterisk. + + +**Example** + + +This command shows hardware resource usage of IPv4 +routes. +``` +`switch(config)# **show platform arad ip route summary** +Total number of VRFs: 1 +Total number of routes: 25 +Total number of route-paths: 21 +Total number of lem-routes: 4 + +switch(config)#` +``` + + +### show platform arad ip +route + + +The **show platform arad ip route** command shows resources for +all IPv4 routes in hardware. Routes that use the additional hardware resources will +appear with an asterisk. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform arad ip route + + +**Related Commands** + +- The agent SandL3Unicast terminate command enables +restarting the Layer 3 agent to ensure IPv4 routes are optimized. + +- The ip hardware fib optimize command enables IPv4 +route scale. + +- The show platform arad ip route summary command +shows hardware resource usage of IPv4 routes. + + +**Examples** + +- This command displays the platform unicast forwarding routes. In this +example, the ACL label field in the following table is +**4094** by default for all routes. If an IPv4 +egress RACL is applied to an SVI, all routes corresponding to that VLAN will +have an ACL label value. In this case, the ACL Label field value is +2. +``` +`switch# **show platform arad ip route** + Tunnel Type: M(mpls), G(gre) + +------------------------------------------------------------------------------- +| Routing Table | +| +|------------------------------------------------------------------------------ +|VRF| Destination | | | | Acl | | +ECMP| FEC | Tunnel +| ID| Subnet | Cmd | Destination | VID | Label | MAC / CPU +Code |Index|Index|T Value + +------------------------------------------------------------------------------- +|0 |0.0.0.0/8 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1031 | - +|0 |10.1.0.0/16 |TRAP | CoppSystemL3DstMiss|2659 | - | ArpTrap | - |1030 | - +|0 |10.2.0.0/16 |TRAP | CoppSystemL3DstMiss|2148 | - | ArpTrap | - |1026 | - +|0 |172.24.0.0/18 |TRAP | CoppSystemL3DstMiss|0 | - | ArpTrap | - |1032 | - +|0 |0.0.0.0/0 |TRAP | CoppSystemL3LpmOver|0 | - | SlowReceive | - +|1024 | - +|0 |10.1.0.0/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.1.0.1/32* |TRAP | CoppSystemIpUcast |0 | - | Receive | - |32766| - +|0 |10.1.255.1/32* |ROUTE| Po1 |2659 |4094 | 00:1f:5d:6b:ce:45 +| - |1035 | - +|0 |10.1.255.255/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.3.0.0/32* |TRAP | CoppSystemIpBcast |0 | - | BcastReceive | - +|1027 | - +|0 |10.3.0.1/32* |TRAP | CoppSystemIpUcast |0 | - | Receive | - |32766| - +|0 |10.3.255.1/32* |ROUTE| Et18 |2700 |2 | 00:1f:5d:6b:00:01 +| - |1038 | - +...........................................................` +``` + +- This command shows resources for all IPv4 routes in hardware. Routes that +use the additional hardware resources will appear with an +asterisk. +``` +`switch(config)# **show platform arad ip route** +Tunnel Type: M(mpls), G(gre) +* - Routes in LEM + +------------------------------------------------------------------------------- +| Routing Table | | +|------------------------------------------------------------------------------ +|VRF| Destination | | | |Acl | |ECMP +| FEC | Tunnel +|ID | Subnet | Cmd | Destination |VID |Label| MAC / CPU Code +|Index|Index|T Value + +------------------------------------------------------------------------------- +|0 |0.0.0.0/8 |TRAP |CoppSystemL3DstMiss|0 | - |ArpTrap | - +|1030 | - +|0 |100.1.0.0/32 |TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - +|1032 | - +|0 |100.1.0.0/32 |TRAP |CoppSystemIpUcast |0 | - |Receive | - +|32766| - +|0 |100.1.255.255/32|TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - +|1032 | - +|0 |200.1.255.255/32|TRAP |CoppSystemIpBcast |0 | - |BcastReceive | - +|1032 | - +|0 |200.1.0.0/16 |TRAP |CoppSystemL3DstMiss|1007| - |ArpTrap | - +|1029 | - +|0 |0.0.0.0/0 |TRAP |CoppSystemL3LpmOver|0 | - |SlowReceive | - +|1024 | - +|0 |4.4.4.0/24* |ROUTE|Et10 |1007| - |00:01:00:02:00:03| - +|1033 | - +|0 |10.20.30.0/24* |ROUTE|Et9 |1006| - |00:01:00:02:00:03| - +|1027 | - + +switch(config)#` +``` + + +### show platform barefoot bfrt + + +The **show platform barefoot bfrt** command displays +information about the current BfRuntime server configuration. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show platform barefoot bfrt + + +**Parameters** + + +**no parameter** - Specify the state of the system. + + +**Example** + + +The following output is for a system where the BfRuntime server has been +configured. +``` +`(switch)# **show platform barefoot bfrt** +Namespace: management +FixedSystem:0.0.0.0:50052` +``` + + +### show platform fap eedb +ip-tunnel gre interface tunnel + + +The **show platform fap eedb ip-tunnel gre interface tunnel** +command verifies the tunnel encapsulation programming for the tunnel interface. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show platform fap eedb ip-tunnel gre interface tunnel +number** + + +**Parameter** + + +**number** - Specifies the tunnel interface number. + + +**Example** + + +These commands verify the tunnel encapsulation programming for the **tunnel +interface +10**. +``` +`switch# **show platform fap eedb ip-tunnel gre interface tunnel 10** +---------------------------------------------------------------------------- +| Jericho0 | +| GRE Tunnel Egress Encapsulation DB +| +|--------------------------------------------------------------------------| +| Bank/ | OutLIF | Next | VSI | Encap | TOS | TTL | Source | Destination| +OamLIF| OutLIF | Drop| +| Offset| | OutLIF | LSB | Mode | | | IP | IP | Set +| Profile| | +|--------------------------------------------------------------------------| +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 10 | 10 | 10.1.1.1 | 10.1.1.2 | No +| 0 | No | + +switch# **show platform fap eedb ip-tunnel** +------------------------------------------------------------------------------- +| Jericho0 | +| IP Tunnel Egress Encapsulation DB +| +|------------------------------------------------------------------------------ +| Bank/ | OutLIF | Next | VSI | Encap| TOS | TTL | Src | Destination | OamLIF +| OutLIF | Drop| +| Offset| | OutLIF | LSB | Mode | Idx | Idx | Idx | IP | Set | +Profile | | +|------------------------------------------------------------------------------ +| 3/0 | 0x6000 | 0x4010 | 0 | 2 | 9 | 0 | 0 | 10.1.1.2 | No | +0 | No |` +``` + + +### show platform fap tcam +summary + + +The **show platform fap tcam summary** command displays +information about the TCAM bank that is allocated for GRE packet termination lookup. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show platform fap tcam summary** + + +**Example** + + +This command verifies if the TCAM bank is allocated for GRE packet termination +lookup. +``` +`switch# **show platform fap tcam summary** + +Tcam Allocation (Jericho0) +Bank Used By Reserved By +---------- ----------------------- ----------- +0 dbGreTunnel -` +``` + + +### show platform trident +forwarding-table partition + + +The **show platform trident forwarding-table partition** +command displays the size of the L2 MAC entry tables, L3 IP forwarding tables, and +Longest Prefix Match (LPM) routes. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show platform trident forwarding-table partition + + +show platform trident forwarding-table partition flexible + + +**Examples** + +- The **show platform trident forwarding-table +partition** command displays the Trident forwarding table +information. +``` +`switch(config)# **show platform trident forwarding-table partition** +L2 Table Size: 96k +L3 Host Table Size: 208k +LPM Table Size: 16k +switch(config)#` +``` + +- The **show platform trident forwarding-table partition +flexible** shows the banks allocated for ALPM as +well. +``` +`switch(config)# **show platform trident forwarding-table partition flexible** +-------------------------------------------------- +Minimum L2 entries = 32768 +Minimum L3 entries = 16384 +Maximum L2 entries = 262144 +Maximum L3 entries = 262144 +Maximum Exact Match entries = 131072 +L2 entries per bucket = 4 +L3 entries per bucket = 4 +Exact Match entries per bucket = 2 +Maximum entries per bucket = 4 +Maximum shared buckets = 65536 +Maximum entries per bank = 32768 +Maximum shared banks = 8 +ALPM entries per bank = 46080 +ALPM = Enabled +-------------------- +# UFT bank details # +-------------------- +S - Shared UFT bank, D - Dedicated UFT bank ++-------------+------------+------+------------+--------------+ +| Physical ID | Feature | Type | Logical ID | Hash Offset | ++-------------+------------+------+------------+--------------+ +| 0 | L2 | D | 0 | 0x4 | +| 1 | L2 | D | 1 | 0xe | +| 2 | ALPM | S | N/A | 0 | +| 3 | ALPM | S | N/A | 0 | +| 4 | ALPM | S | N/A | 0 | +| 5 | ALPM | S | N/A | 0 | +| 6 | L2 | S | 2 | 0xc | +| 7 | ExactMatch | S | 0 | 0xc | +| 8 | ExactMatch | S | 1 | 0xf | +| 9 | L3 | S | 2 | 0xc | +| 10 | L3 | D | 0 | 0x0 | +| 11 | L3 | D | 1 | 0x8 | ++-------------+------------+------+------------+--------------+` +``` + + +### show platform trident l3 shadow dlb-ecmp-group-control + + +The **show platform trident l3 shadow dlb-ecmp-group-control** displays information about +Dynamic Load Balancing with ECMP groups. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +show platform trident l3 shadow dlb-ecmp-group-control + + +**Example** + + +Use the following command to display information about DLB and ECMP +groups: +``` +`switch# show platform trident l3 shadow dlb-ecmp-group-control + DLB_ECMP_GROUP_CONTROL: + eId size path baseAddr flowSize memPtr flowBase OC mode inDur + ---- ---- ---- -------- -------- ------ -------- -- ---- ------ + 1 3 0 136 1 1 256 1 0 500 + +Legend: +eId = Entry ID +size = Primary Group Size +path = Primary Path Threshold +baseAddr = Group Port To Member Base Address +flowSize = Flow Set Size +memPtr = Group Membership Pointer +flowBase = Flow Set Base +OC = Enable Optimal Candidate +mode = Port Assignment Mode +inDur = Inactivity Duration` +``` + + +The output displays the following information: + +- **Entry ID** - Indicates the dynamic load balance +group ID. + +- **Primary Group Size** - Indicates the number of +members in the DLB group. + +- **Enable optimal candidate** - Indicates the least +loaded member or predefined member selection. Always set to +1 to ensure the selection of the least loaded member. + +- **Inactivity duration** - Indicates the inactivity +period. If the switch does not receive new packets from a particular flow +within this duration, then the optimal member becomes the new member for the +flow. Represented in microseconds. + + +### show rib route ip + + +The **show rib route ip** command displays a list of IPv4 +Routing Information Base (RIB) routes. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show rib route ip [vrf +vrf_name][prefix][route_type] + + +**Parameters** + + +- **vrf** +**vrf_name** - Displays RIB routes from the specified +VRF. + +- **prefix** - Displays routes filtered by the specified +IPv4 information. Options include the following: + +- **ip_address** - Displays RIB routes filtered +by the specified IPv4 address. + +- **ip_subnet_mask** - Displays RIB routes +filtered by the specified IPv4 address and subnet mask. + +- **ip_prefix** - Displays RIB routes filtered +by the specified IPv4 prefix. + +- **route_type** - Displays routes filtered by the +specified route type. Options include the following: + +- **bgp** - Displays RIB routes filtered by +BGP. + +- **connected** - Displays RIB routes filtered +by connected routes. + +- **dynamicPolicy** - Displays RIB routes +filtered by dynamic policy routes. + +- **host** - Displays RIB routes filtered by +host routes. + +- **isis** - Displays RIB routes filtered by +IS-IS routes. + +- **ospf** - Displays RIB routes filtered by +OSPF routes. + +- **ospf3** - Displays RIB routes filtered by +OSPF3 routes. + +- **reserved** - Displays RIB routes filtered by +reserved routes. + +- **route-input** - Displays RIB routes filtered +by route-input routes. + +- **static** - Displays RIB routes filtered by +static routes. + +- **vrf** - Displays routes in a VRF. + +- **vrf-leak** - Displays leaked routes in a +VRF. + + +**Examples** + +- This command displays IPv4 RIB static +routes. +``` +`switch# **show rib route ip static** +VRF name: default, VRF ID: 0xfe, Protocol: static +Codes: C - Connected, S - Static, P - Route Input + B - BGP, O - Ospf, O3 - Ospf3, I - Isis + > - Best Route, * - Unresolved Nexthop + L - Part of a recursive route resolution loop +>S 10.80.0.0/12 [1/0] + via 172.30.149.129 [0/1] + via Management1, directly connected +>S 172.16.0.0/12 [1/0] + via 172.30.149.129 [0/1] + via Management1, directly connected +switch#` +``` + +- This command displays IPv4 RIB connected +routes. +``` +`switch# **show rib route ip connected** +VRF name: default, VRF ID: 0xfe, Protocol: connected +Codes: C - Connected, S - Static, P - Route Input + B - BGP, O - Ospf, O3 - Ospf3, I - Isis + > - Best Route, * - Unresolved Nexthop + L - Part of a recursive route resolution loop +>C 10.1.0.0/24 [0/1] + via 10.1.0.102, Ethernet1 +>C 10.2.0.0/24 [0/1] + via 10.2.0.102, Ethernet2 +>C 10.3.0.0/24 [0/1] + via 10.3.0.102, Ethernet3 +switch#` +``` + +- This command displays routes leaked through VRF leak +agent. +``` +`switch# **show rib route ip vrf VRF2 vrf-leak** +VRF: VRF2, Protocol: vrf-leak +... +>VL 20.0.0.0/8 [1/0] source VRF: VRF1 + via 10.1.2.10 [0/0] type ipv4 + via 10.1.2.10, Ethernet1` +``` + + +### show rib route fib policy excluded + + +The **show rib route fib policy excluded** command displays the +RIB routes filtered by FIB policy. The **fib policy excluded** +parameter displays the RIB routes excluded from programming into +FIB, by FIB policy. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show rib route [ipv4 | ipv6] +fib policy excluded + + +**Example** + + +The following example displays the RIB routes excluded by the FIB policy using the +**fib policy excluded** option of the **show +rib route** +command. +``` +`switch# **show rib route ipv6 fib policy excluded** +switch# **show rib route ip bgp fib policy excluded** + +VRF name: default, VRF ID: 0xfe, Protocol: bgp +Codes: C - Connected, S - Static, P - Route Input + B - BGP, O - Ospf, O3 - Ospf3, I - Isis + > - Best Route, * - Unresolved Nexthop + L - Part of a recursive route resolution loop +>B 10.1.0.0/24 [200/0] + via 10.2.2.1 [115/20] type tunnel + via 10.3.5.1, Ethernet1 + via 10.2.0.1 [115/20] type tunnel + via 10.3.4.1, Ethernet2 + via 10.3.6.1, Ethernet3 +>B 10.1.0.0/24 [200/0] + via 10.2.2.1 [115/20] type tunnel + via 10.3.5.1, Ethernet1 + via 10.2.0.1 [115/20] type tunnel + via 10.3.4.1, Ethernet2 + via 10.3.6.1, Ethernet3` +``` + + +### show rib route summary + + +The **show rib route summary** command displays information +about the routes present in the Routing Information Base. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show rib route summary [info_level] + + +**Parameters** + + +- **no parameter** - Displays data in one table with the summary of all +routes in the RIB for default VRF. + +- **brief** - Displays one table with the summary of all +routes across all configured VRFs. + +- **ip** - Displays one table with the summary of all +IPv4 in the RIB for default VRF. + +- **ipv6** - Displays one table with the summary of all +IPv4 in the RIB for default VRF. + +- **vrf +vrf_Name** - Displays one table with the summary of +all routes in the Routing Information Base for the specified VRF. + +- **vrf all** - Displays one table with the summary of +all routes in the Routing Information Base for each configured VRF. + +- info_level - Displays the amount of information. Options +include the following: + +- **Display Values** + +- **VRF** - VRF RIB displayed. + +- **Route Source** - Source for the route. + +- **Number of Routes** - Number of routes for each +source. + + +**Examples** + +- The following displays data in one table with the summary of all routes +in the RIB for default VRF. + + +``` +`switch> **show rib route summary** +VRF: default +Route Source Number Of Routes +-------------------- ---------------- +BGP 1 +Connected 4 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 2 +Static 0 +VRF leak 0` +``` + +- The following displays data in one table with the summary of all routes +across all configured VRFs. + + +``` +`switch> **show rib route summary brief** +Route Source Number Of Routes +-------------------- ---------------- +BGP 2 +Connected 8 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 4 +Static 0 +VRF leak 0` +``` + +- The following displays data in one table with the summary of all IPv4 +routes in the RIB for default VRF. + + +``` +`switch> **show rib route summary ip** +VRF: default +Route Source Number Of Routes +-------------------- ---------------- +BGP 1 +Connected 4 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 2 +Static 0 +VRF leak 0` +``` + +- The following displays data in one table with the summary of all IPv6 +routes in the RIB for default VRF. + + +``` +`switch> **show rib route summary ipv6** +VRF: default +Route Source Number Of Routes +-------------------- ---------------- +BGP 0 +Connected 0 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 0 +Static 0 +VRF leak 0` +``` + +- The following displays data in one table with the summary of all routes +in the RIB for the VRF named **red**. + + +``` +`switch> **show rib route summary vrf red** +VRF: red +Route Source Number Of Routes +-------------------- ---------------- +BGP 1 +Connected 4 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 2 +Static 0 +VRF leak 0` +``` + +- The following displays data in one table with the summary of all routes +in the RIB for each configured VRF. + + +``` +`switch> **show rib route summary vrf all** +VRF: red +Route Source Number Of Routes +-------------------- ---------------- +BGP 1 +Connected 4 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 2 +Static 0 +VRF leak 0 + +VRF: default +Route Source Number Of Routes +-------------------- ---------------- +BGP 1 +Connected 4 +Dynamic policy 0 +IS-IS 0 +OSPF 0 +OSPFv3 0 +RIP 0 +Route input 2 +Static 0 +VRF leak 0` +``` + + +### show routing-context +vrf + + +The **show routing-context vrf** command displays the +context-active VRF. The context-active VRF determines the default VRF that +VRF-context aware commands use when displaying routing table data from a specified +VRF. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show routing-context vrf + + +**Related Commands** + + +The cli vrf command specifies the context-active VRF. + + +**Example** + + +This command displays the context-active +VRF. +``` +`switch> **show routing-context vrf** +Current VRF routing-context is PURPLE +switch>` +``` + + +### show snapshot counters ecmp history + + +The **show snapshot counters ecmp history** displays information about the AGM configuration. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show snapshot counters ecmp history + + +**Parameters** + + +- **Request ID** - Identifies the snapshot Request ID to use for the **clear** +command. + +- **Output directory URL** - Identifies the snapshot storage location. + +- **Complete** - Identifies the snapshot completion status. + +- **Poll Interval** - Identifies the configured polling interval for the snapshot. + +- **Total poll count** - Identifies the total number of hardware +counters collected. + +- **Start time** and **Stopped time** - Identifies the system time when the snapshot +started and stopped. + +- **L2 Adjacency ID** and **Interfaces** - +The summary of the ECMP groups monitored by AGM. + + +**Example** + + +Use the **show snapshot counters ecmp history** to display +information about the +configuration. +``` +`switch# **show snapshot counters ecmp history** +Request ID: 17 +Output directory URL: file:/var/tmp/ecmpMonitor +Output file name(s): ecmpMonitor-17-adj1284.ctr, ecmpMonitor-17-adj1268.ctr +Complete: True +Poll interval: 1000 microseconds +Total poll count: 59216 +Start time: 2024-06-17 17:58:36 +Stop time: 2024-06-17 17:59:36 + +L2 Adjacency ID Interfaces +--------------------- ---------------------------------------------------- +1268 Ethernet54/1, Ethernet41/1, Ethernet1/1, Ethernet57/1 +1284 Ethernet20/1, Ethernet35/1, Ethernet41/1, Ethernet8/1, Ethernet1/1` +``` + + + + +### show tunnel fib static +interface gre + + +The **show tunnel fib static interface gre** command displays +the Forwarding Information Base (FIB) information for a static interface GRE tunnel. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show tunnel fib static interface gre +number + + +**Parameter** + + +**number** - Specifies the tunnel index number. + + +**Example** + + +This command display the interface tunnel configuration with GRE +configured. +``` +`switch# **show tunnel fib static interface gre 10** + +Type 'Static Interface', index 10, forwarding Primary + via 10.6.1.2, 'Ethernet6/1' + GRE, destination 10.1.1.2, source 10.1.1.1, ttl 10, tos 0xa` +``` + + + + +### show vrf + + +The **show vrf** command displays the VRF name, RD, supported +protocols, state and included interfaces for the specified VRF or for all VRFs on +the switch. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show vrf [vrf_instance] + + +**Parameters** + + +vrf_instance - Specifies the VRF instance to display. + +- **no parameter** - Displays information for all +VRFs. + +- **vrf** +**vrf_name** - Displays information for the specified +user-defined VRF. + + +**Example** + + +This command displays information for the VRF named +**purple**. +``` +`switch> **show vrf purple** +Vrf      RD          Protocols  State       Interfaces +-------- ----------- ---------- ----------- -------------- +purple   64496:237   ipv4       no routing  Vlan42, Vlan43 + +switch>` +``` + + +### start snapshot counters + + +The **start snapshot counters ecmp** allows the monitoring of packets and bytes traversing the members of the +configured ECMP groups on the switch with a high time resolution. + + +**Command Mode** + + +Global Configuration Mode + + +**Command Syntax** + + +start snapshot counters +ecmp +poll +interval +interval [milliseconds | +microseconds] duration +duration +seconds +destination_url + + +**Parameters** + +- **interval +interval** - Specify at least 100 microseconds. EOS +does not guarantee the interval, and the actual poll interval may depend on the +system load as well as the number and size of configured ECMP groups. Valid +values include milliseconds and microseconds. + +- **duration +duration +seconds** - Specify the duration for collecting data. A +maximum of 3600 seconds can be configured. + +- **destination_url** - Optionally, provide a destination +URL for data storage. + +- **file** - The path must start with +**/tmp** or +**/tmp**. The files store in the non-persistent +storage. + +- **flash** - Files store in persistent +storage. + + +**Example** + + +To begin collecting data on the switch at 100 millisecond intervals for 1800 seconds, use +the following +command: +``` +`switch(config)#**start snapshot counters ecmp poll interval 100 milliseconds duration 1800 seconds**` +``` + + +### tcp mss ceiling + + +The **tcp mss ceiling** command configures the Maximum Segment +Size (MSS) limit in the TCP header on the configuration mode interface and enables +TCP MSS clamping. + + +The **no tcp mss ceiling** and the **default tcp mss +ceiling** commands remove any MSS ceiling limit previously +configured on the interface. + + +Note: Configuring a TCP MSS ceiling on any Ethernet or tunnel interface enables TCP MSS +clamping on the switch as a whole. Without hardware support, clamping routes all TCP +SYN packets through software, even on interfaces where no TCP MSS ceiling has been +configured. This significantly limits the number of TCP sessions the switch can +establish per second, and can potentially cause packet loss if the CPU traffic +exceeds control plane policy limits. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Subinterface-Ethernet Configuration + + +Interface-Port-channel Configuration + + +Subinterface-Port-channel Configuration + + +Interface-Tunnel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +tcp mss ceiling {ipv4 +segment size | ipv6 +segment size}{egress | +ingress} + + +no tcp mss ceiling + + +default tcp mss ceiling + + +**Parameters** + +- **ipv4** +**segment size** The IPv4 segment size value in bytes. +Values range from **64** to +**65515**. + +- **ipv6** +**segment size** The IPv6 segment size value in +bytes. Values range from **64** to +**65495**. This option is not supported on +Sand platform switches (Qumran-MX, Qumran-AX, Jericho, Jericho+). + +- **egress** The TCP SYN packets that are forwarded from +the interface to the network. + +- **ingress** The TCP SYN packets that are received from +the network to the interface. Not supported on Sand platform switches. + + +**Guidelines** + +- On Sand platform switches (Qumran-MX, Qumran-AX, Jericho, Jericho+), this +command works only for egress, and is supported only on IPv4 unicast packets +entering the switch. + +- Clamping can only be configured in one direction per interface and works +only on egress on Sand platform switches. + +- To configure ceilings for both IPv4 and IPv6 packets, both configurations +must be included in a single command; re-issuing the command overwrites any +previous settings. + +- Clamping configuration has no effect on GRE transit packets. + + +**Example** + + +These commands configure **interface ethernet 5** as a routed +port, then specify a maximum MSS ceiling value of **1458** +bytes in TCP SYN packets exiting that port. This enables TCP MSS clamping on the +switch. +``` +`switch(config)# **interface ethernet 5** +switch(config-if-Et5)# **no switchport** +switch(config-if-Et5)# **tcp mss ceiling ipv4 1458 egress** +switch(config-if-Et5)#` +``` + + +### tunnel + + +The **tunnel** command configures options for +protocol-over-protocol tunneling. Because Interface-Tunnel Configuration Mode does not +provide a group change mode, ***running-config*** changes +immediately after executing the commands. The **exit** command +does not affect the configuration. + + +The **no tunnel** command deletes the specified tunnel +configuration. + + +**Command Mode** + + +Interface-tunnel Configuration + + +**Command Syntax** + + +tunnel +options + + +no tunnel +options + + +**Parameters** + +- **options** - Specifies the various tunneling options +as listed below. + +- **destination** - Specifies the destination address of +the tunnel. + +- **ipsec** - Secures the tunnel with the IPsec +address. + +- **key** - Sets the tunnel key. + +- **mode** - Specifies the tunnel encapsulation +method. + +- **path-mtu-discovery** - Enables the Path MTU +discovery on tunnel. + +- **source** - Specifies the source of the +tunnel packets. + +- **tos** - Sets the IP type of service +value. + +- **ttl** - Sets time to live value. + +- **underlay** - Specifies the tunnel underlay. + + +**Example** + + +These commands place the switch in interface-tunnel configuration mode for +**interface Tunnel 10** and with GRE tunnel configured +on the interfaces +specified. +``` +`switch(config)# **ip routing** +switch(config)# **interface Tunnel 10** +switch(config-if-Tu10)# **tunnel mode gre** +switch(config-if-Tu10)# **ip address 192.168.1.1/24** +switch(config-if-Tu10)# **tunnel source 10.1.1.1** +switch(config-if-Tu10)# **tunnel destination 10.1.1.2** +switch(config-if-Tu10)# **tunnel path-mtu-discovery** +switch(config-if-Tu10)# **tunnel tos 10** +switch(config-if-Tu10)# **tunnel ttl 10**` +``` + + +### vrf (Interface mode) + + +The **vrf** command adds the configuration mode interface to +the specified VRF. You must create the VRF first, using the vrf instance command. + + +The **no vrf** and **default vrf** +commands remove the configuration mode interface from the specified VRF by deleting +the corresponding **vrf** command from +***running-config***. + + +All forms of the **vrf** command remove all IP addresses +associated with the configuration mode interface. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Loopback Configuration + + +Interface-Management Configuration + + +Interface-Port-channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +vrf [vrf_name] + + +no vrf [vrf_name] + + +default vrf [vrf_name] + + +**Parameters** + + +**vrf_name** - Displays the name of configured VRF. + + +**Examples** + +- These commands add the configuration mode interface (**vlan +20**) to the VRF named +**purple**. +``` +`switch(config)# **interface vlan 20** +switch(config-if-Vl20)# **vrf purple** +switch(config-if-Vl20)#` +``` + +- These commands remove the configuration mode interface from VRF +**purple**. +``` +`switch(config)# **interface vlan 20** +switch(config-if-Vl20)# **no vrf purple** +switch(config-if-Vl20)#` +``` + + +### vrf instance + + +The **vrf instance** command places the switch in VRF +configuration mode for the specified VRF. If the named VRF does not exist, this +command creates it. The number of user-defined VRFs supported varies by +platform. + + +To add an interface to the VRF once created, use the vrf (Interface mode) command. + + +The **no vrf instance** and **default vrf +instance** commands delete the specified VRF instance by +removing the corresponding **vrf instance** command from +***running-config***. This also removes all IP +addresses associated with interfaces that belong to the deleted VRF. + + +The **exit** command returns the switch to global configuration +mode. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +vrf instance [vrf_name] + + +no vrf instance [vrf_name] + + +default vrf instance [vrf_name] + + +**Parameters** + + +**vrf_name** - The name of the configured VRF. The names +**main** and **default** are +reserved. + + +**Example** + + +This command creates a VRF named **purple** and places the +switch in VRF configuration mode for that +VRF. +``` +`switch(config)# **vrf instance purple** +switch(config-vrf-purple)#` +``` diff --git a/docs/arista-scraped/nexthop-groups.md b/docs/arista-scraped/nexthop-groups.md new file mode 100644 index 00000000..4257e911 --- /dev/null +++ b/docs/arista-scraped/nexthop-groups.md @@ -0,0 +1,1167 @@ + + + +# Nexthop Groups + + +These sections describe the Nexthop groups: + +- Next-hop Group Description + +- Next-hop Group Configuration + +- Nexthop Group commands + + +## Next-hop Group Description + + +Each routing table entry provides the next hop address to a specified destination. A next-hop +address consists of the address of the next device on the path to the entry specified +destination. + + +A next-hop group uses a data structure that defines a list of next-hop addresses and a tunnel +type for packets routed to the specified address. When an IP route statement specifies a +next-hop group as the next-hop address, the switch configures a static route with a next-hop +group member as the next-hop address and encapsulates packets forwarded to that address as +required by the group tunnel type. + + +Configure the next-hop group size as a parameter that specifies the number of entries that the +group contains. Group entries not explicitly configured are filled with drop routes. The +switch uses ECMP hashing to select the address within the next-hop group when forwarding +packets. When a packet’s hash selects a drop route, the switch drops the packet. + + +Next-hop groups are supported on Trident platform switches and has the following restrictions: + +- Each switch can support 512 IPv4 or IPv6 Tunnels + +- Next-hop groups can contain 256 next-hops. + +- The switch supports 1024 next-hop groups. + +- Multiple routes can share a tunnel. + +- Tunnels do not support IP multicast packets. + + +Next-hop groups support IP-in-IP tunnels. The entry IP address family within a particular +next-hop group cannot be mixed. They must be all IPv4 or all IPv6 entries. + + +## Next-hop Group Configuration + + +Next-hop groups are configured and modified in next-hop-group configuration mode. After a group +is created, it is associated to a static route through an ip route nexthop-group statement. + + +These tasks are required to configure a next-hop group and apply it to a static route. + +- Creating and Editing Next-hop +Groups + +- Configuring a Group’s Encapsulation +Parameters + +- Configuring the Group’s Size + +- Creating Next-hop Group Entries + +- Displaying Next-hop Groups + +- Applying a Next-hop Group to a Static +Route + + +### Creating and Editing Next-hop Groups + + +Create next-hop groups using the nexthop-group command that specifies an +unconfigured group. The switch enters ***nexthop-group*** configuration mode +for the new group. ***Nexthop-group*** mode is also accessible for modifying +existing groups. When in ***nexthop-group*** configuration mode, the +**show active** command displays the group’s +configuration. + + +- This command creates a next-hop group named +**NH-1**. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)#` +``` + +- These commands enter ***nexthop-group*** configuration mode for the +group named **NH3**, then displays the previously +configured group +parameters. +``` +`switch(config)# **nexthop-group NH3** +switch(config-nexthop-group-NH3)#show active + nexthop-group NH3 + size 4 + ttl 10 + entry 0 tunnel-destination 10.14.21.3 + entry 1 tunnel-destination 10.14.21.5 + entry 2 tunnel-destination 10.14.22.5 + entry 3 tunnel-destination 10.14.22.6 +switch(config-nexthop-group-NH3)#` +``` + + +### Configuring Group Encapsulation Parameters + + +Packets in static routes associated with the next-hop group are encapsulated to support the +group’s tunnel type. Nexthop groups support IP-in-IP tunnels. The group also defines +the source IP address and TTL field contents included in the packet +encapsulation. + + +- This command configures the TTL setting to **32** for +nexthop group **NH-1** encapsulation +packets. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **ttl 32** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 128 + ttl 32 +switch(config-nexthop-group-NH-1)#` +``` + + +The address is inserted +in the encapsulation source IP fields is specified by tunnel-source (Next-hop Group). + +- These commands create **interface loopback 100**, assign +an IP address to the interface, then specifies that address as the tunnel source +for packets designated by next-hop-group +**NH-1**. +``` +`switch(config)# **interface loopback 100** +switch(config-if-Lo100)# **ip address 10.1.1.1/32** +switch(config-if-Lo100)# **exit** +switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **tunnel-source intf loopback 100** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 256 + ttl 32 + tunnel-source intf Loopback100 +switch(config-nexthop-group-NH-1)#` +``` + +Configure the nexthop +group tunnel to become active in the tunnel RIB only if a viable nexthop +group exists. A nexthop group becomes viable when it meets specific +reachability and programming criteria determined by one or more underlying +entries resolving in the Forwarding Information Base (FIB) and has +programmability. By default, IP tunnels become active even if no viable +nexthop group exists. To override this behavior, use the following +commands: +``` +`switch(config)# **router general** +switch(config-router-general)# **tunnel nexthop-group unresolved invalid** +switch(config-router-general)#` +``` + + +### Configuring IP-in-IP Encapsulation + + +Through IP-in-IP encapsulation, IP packets matching a static Nexthop-Group route encapsulate +within an IP-in-IP tunnel and forward. + + +This command configures a static Nexthop-Group route and an IP-in-IP Nexthop-Group +for IP-in-IP +encapsulation. +``` +`switch(config)# **ip route 124.0.0.1/32 nexthop-group abc** +switch(config)# **nexthop-group abc type ip-in-ip** +switch(config-nexthop-group-abc)# **size 512** +switch(config-nexthop-group-abc)# **tunnel-source 1.1.1.1** +switch(config-nexthop-group-abc)# **entry 0 tunnel-destination 1.1.1.2** +switch(config-nexthop-group-abc)# **entry 1 tunnel-destination 10.1.1.1** +switch(config-nexthop-group-abc)# **ttl 64** +switch(config-nexthop-group-abc)#` +``` + + +### Configuring the Group’s Size + + +The group’s size specifies the number of entries in the group. A group can contain up to +**256** entries, which is the default size. The +group’s size is specified by size (Nexthop Group). + + +This command configures the next-hop group **NH-1** to contain +**128** +entries. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **size 128** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 128 + ttl 64 +switch(config-nexthop-group-NH-1)#` +``` + + +### Creating Next-hop Group Entries + + +Each entry specifies a next-hop address that is used to forward packets. A next-hop group +contains one entry statement for each next-hop address. The group size specifies the +number of entry statements the group may contain. Each entry statement is assigned +an index number to distinguish it from other entries within the group, and entry +index numbers range from zero to the group size minus one. + + +Next-hop group entries are configured by entry (Next-hop Group). + + +- These commands set the next-hop group size at four entries, then create three +entries. eos drops packets hashed to the fourth +entry. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **size 4** +switch(config-nexthop-group-NH-1)# **entry 0 tunnel-destination 10.13.4.4** +switch(config-nexthop-group-NH-1)# **entry 1 tunnel-destination 10.15.4.22** +switch(config-nexthop-group-NH-1)# **entry 2 tunnel-destination 10.15.5.37** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 4 + ttl 64 + entry 0 tunnel-destination 10.13.4.4 + entry 1 tunnel-destination 10.15.4.22 + entry 2 tunnel-destination 10.15.5.37 +switch(config-nexthop-group-NH-1)#` +``` + +- These commands configure a next-hop group with three IPv6 next-hop +entries. +``` +`switch(config)# **nexthop-group nhg-v6-mpls type ip** +switch(config-nhg-v6-mpls)# **size 3** +switch(config-nhg-v6-mpls)# **entry 0 nexthop 2002::6401:1** +switch(config-nhg-v6-mpls)# **entry 1 nexthop 2002::6404:1** +switch(config-nhg-v6-mpls)# **entry 2 nexthop 2002::6404:2** +switch(config-nhg-v6-mpls)#` +``` + +- These commands configure an IPv4 route to point to the next-hop group +**nhg-v6-mpls**. (Both IPv4 routes and IPv6 routes +can point to this next-hop +group.) +``` +`switch# **ip route 100.5.0.0/16 Nexthop-Group nhg-v6-mplsp** +switch#` +``` + + +### Displaying Next-hop Groups + + +The show nexthop-group command displays a group configured parameters. + + +This command displays the properties of the nexthop group named +**NH-1**. +``` +`switch> **show nexthop-group NH-1** +Name Id type size ttl sourceIp +NH-1 4 ipInIp 256 64 0.0.0.0 +switch>` +``` + + +### Applying a Next-hop Group to a Static Route + + +The ip route nexthop-group associates a next-hop group with a specified +destination address and configures the encapsulation method for packets tunneled to +that address. + + +This command creates a static route in the default VRF, using the next-hop group of +**NH-1** to determine the next hop +address. +``` +`switch(config)# **ip route 10.17.252.0/24 nexthop-group NH-1** +switch(config)#` +``` + + +The **show ip route** command displays the routing table for a +specified VRF. Routes that utilize a next-hop group entry are noted with a route +type code of **NG**. + + +This command displays a routing table that contains a static route with its next-hop +specified by a next-hop group. + + +``` +`switch> **show ip route** +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B I - ibgp, B E - ebgp, + R - RIP, I - ISIS, A B - bgp Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route + +Gateway of last resort is not set + + C 10.3.3.1/32 is directly connected, Loopback0 + C 10.9.1.0/24 is directly connected, Ethernet51/3 + C 10.10.10.0/24 is directly connected, Ethernet51/1 + S 10.20.0.0/16 [20/0] via 10.10.10.13, Ethernet51/1 + C 10.10.11.0/24 is directly connected, Ethernet3 + NG 10.10.3.0/24 [1/0] via ng-test1, 5 + C 10.17.0.0/20 is directly connected, Management1 + S 10.17.0.0/16 [1/0] via 10.17.0.1, Management1 + S 10.18.0.0/16 [1/0] via 10.17.0.1, Management1 + S 10.19.0.0/16 [1/0] via 10.17.0.1, Management1 + S 10.20.0.0/16 [1/0] via 10.17.0.1, Management1 + S 10.22.0.0/16 [1/0] via 10.17.0.1, Management1 + +switch>` +``` + + +### Support for IPv6 Link-Local Addresses in Next-hop Groups Entries + + +IPv6 Link-local addresses in Next-hop Groups entries support IPv6 link-local next-hops +belonging to a Next-hop Group. Only the MPLS Next-hop Group supports IPv6 and because of +this, IPv6 is limited to getting support only by the Nexthop Group of MPLS. An advantage +is that you can use these devices even when they are not configured with globally +routable IPv4 or IPv6 addresses. + + +#### Configuration + + +An MPLS next-hop group with IPv6 address now accepts an interface if the IPv6 +address is a link-local. Note the use of percentages between the IPv6 address and the +interface. +``` +`switch(config)# **nexthop-group nhg1 type mpls** +switch(config-nexthop-group-nhg1)# **entry 0 push label-stack 606789 nexthop fe80::fe80:2%Ethernet2** +switch(config-nexthop-group-nhg1)# **entry 1 push label-stack 204164 nexthop fe80::fe80:2%Ethernet3**` +``` + + +#### Show commands + + +Use the **show nexthop-group** command to display the current +status of the nexthop-groups. +``` +`switch# **show nexthop-group** +nhg1 + Id 1 + Type mpls + Size 12 + Entries (left most label is the top of the stack) + 0 push label-stack 606789 nexthop fe80::fe80:2 + Tunnel destination directly connected, Ethernet2 + 00:d4:27:77:e9:77, Ethernet2 + 1 push label-stack 204164 nexthop fe80::fe80:2 + Tunnel destination directly connected, Ethernet3 + 00:79:21:32:0f:32, Ethernet3` +``` + + +#### Limitations + + +Review the following limitations for the support of IPv6 link-local address in nexthop +group entries: + +- Only the nexthop-group of MPLS supports an IPv6 address. Therefore, link-local +IPv6 addresses are only supported for this type of nexthop-group. + +- Nexthop-groups are configured and exist in the default VRF. The link-local IPv6 +addresses for nexthop-group entries can only be resolved for interfaces in the +default VRF. + + +## Nexthop Group commands + + +**Nexthop commands** + +- entry (Next-hop Group) + +- ip route nexthop-group + +- nexthop-group + +- size (Nexthop Group) + +- ttl (Next-hop Group) + +- tunnel-source (Next-hop Group) + + +**Nexthop Show Command** + +- show nexthop-group + + +### entry (Next-hop Group) + + +The **entry** command defines a next-hop entry in the +***nexthop group*** configuration mode . Each next-hop entry +specifies a next-hop IP address for static routes to which the next-hop group is +assigned. The group size (size (Nexthop Group)) specifies the +quantity of entries a group contains. Each entry is created by an individual +command. Entries within a group are distinguished by an index number. + + +The **no entry** and **default entry** commands delete the specified nexthop group entry, as referenced by index number, by removing the corresponding **entry** statement from ***running-config.*** + + +**Command Mode** + + +Nexthop-group Configuration + + +**Command Syntax** + + +entry +index +tunnel-destination +ipv4_address + + +no entry +index + + +default entry +index + + +**Parameters** + +- **index** - Entry index. Values range from **0** to +**group-size – 1**. + +- **ipv4_address** - Nexthop IPv4 address. + +- **group-size** - the group’s entry capacity as +specified by the size (Nexthop Group) command. + + +**Example** + + +These commands sets the next-hop group size at four entries, then creates three +next-hop entries. eos drops packets hashed to the fourth +entry. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **size 4** +switch(config-nexthop-group-NH-1)# **entry 0 tunnel-destination 10.13.4.4** +switch(config-nexthop-group-NH-1)# **entry 1 tunnel-destination 10.15.4.22** +switch(config-nexthop-group-NH-1)# **entry 2 tunnel-destination 10.15.5.37** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 4 + ttl 64 + entry 0 tunnel-destination 10.13.4.4 + entry 1 tunnel-destination 10.15.4.22 + entry 2 tunnel-destination 10.15.5.37 +switch(config-nexthop-group-NH-1)#` +``` + + +### ip route nexthop-group + + +The **ip route nexthop-group** command creates a static route. +The destination is a network segment. The next-hop address is one of the IP +addresses that comprise the specified next-hop group. Packets forwarded as a result +of this command are encapsulated as specified by the tunnel-type parameter of the +specified next-hop group. + + +When multiple routes exist to a destination prefix, the route with the lowest +administrative distance takes precedence. When a route created through this command +has the same administrative distance as another static route (ECMP), the route that +was created earliest has preference; ***running-config*** +stores static routes in the order that they are created. + + +By default, the administrative distance assigned to static routes is +**1**. Assigning a higher administrative distance to a +static route configures it to be overridden by dynamic routing data. For example, a +static route with a distance value of **200** is overridden by +OSPF intra-area routes, which have a default distance of +**110**. + + +The **no ip route nexthop-group** and **default ip +route nexthop-group** commands delete the specified route by +removing the corresponding **ip route nexthop-group** command +from ***running-config***. **ip route +nexthop-group** statements for an IP address in multiple VRFs +must be removed separately. + + +A **no ip route** or **default ip route** +command without a next-hop parameter deletes all corresponding **ip route +nexthop-group** statements. Deleting a user-defined VRF also +deletes its static routes. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +ip route [VRF_INST +dest_net +nexthop-group +nhgp_name +[dist][TAG_OPTION][RT_NAME] + + +no ip route [VRF_INST] +dest_net [nexthop-group +nhgroup_name][distance] + + +default ip route [VRF_INST] dest_net + [nexthop-group +nhgroup_name][distance] + + +**Parameters** + +- **VRF_INST**      Specifies the VRF instance being +modified. + +- **no parameter**      Changes are made to the default VRF. + +- **vrf** +**vrf_name**      Changes are made to the +specified VRF. + +- **dest_net**      Destination IPv4 subnet (CIDR or +address-mask notation). + +- **nhgp_name**      Name of next-hop group. + +- **dist** Administrative distance assigned to route. +Options include: + +- **no parameter**      Route assigned default administrative +distance of one. + +- **1-255**      The administrative distance +assigned to route. + +- **TAG_OPTION**      Static route tag. Options +include: + +- **no parameter**      Assigns default static route tag of +**0**. + +- **tag** +**t_value**       Static route tag value. +**t_value** ranges from +**0** to +**4294967295**. + +- **RT_NAME**       Associates descriptive text to the +route. Options include: + +- **no parameter**       No text is associated with the route. + +- **name** +**descriptive_text**      The specified text +is assigned to the route. + + +**Related commands** + + +The **[ip route](/um-eos/eos-ipv4#xx1144639)** command creates a static route that +specifies the next-hop address without using next-hop groups. + + +**Example** + + +This command creates a static route in the default VRF, using the next-hop group of +**NH-1** to determine the next hop +address. +``` +`switch(config)# **ip route 10.17.252.0/24 nexthop-group NH-1** +switch(config)#` +``` + + +### nexthop-group + + +The **nexthop-group** command places the switch in +***nexthop-group*** configuration mode, through which next-hop +groups are created or modified. The command also specifies the tunnel protocol for +extracting payload from encapsulated packets that arrive through an IP address upon +which the group is applied. + + +A next-hop group is a data structure that defines a list of next-hop addresses and +the encapsulation process for packets routed to the specified address. The command +either accesses an existing ***nexthop group*** configuration or creates a +new group if it specifies a non-existent group. Supported tunnel protocols include +IP ECMP and IP-in-IP. + + +The ***nexthop-group*** configuration mode is not a group change mode; +***running-config*** is changed immediately +upon entering commands. Exiting the ***nexthop-group*** configuration mode +does not affect ***running-config***. The +**exit** command returns the switch to +***global*** configuration mode. + + +The **no nexthop-group** and **default +nexthop-group**commands delete previously configured commands in +the specified **nexthop-group** mode. When the command does not +specify a group, it removes all next-hop-groups. When the command specifies a tunnel +type without naming a group, it removes all next-hop-groups of the specified +type. + + +**Command Mode** + + +Global Configuration + + +**** + + +Command Syntax + + +nexthop-group +group_name + type +TUNNEL_TYPE + + +no nexthop-group +[group_name][type +TUNNEL_TYPE] + + +default nexthop-group +[group_name][typeTUNNEL_TYPE] + + +**Parameters** + +- **group_name** Nexthop group name. + +- **TUNNEL_TYPE** Tunnel protocol of the nexthop-group. +Options include: + +- **ip** ECMP nexthop. + +- **ip-in-ip** IP in IP tunnel. + +- **gre** Encapsules the Layer 3 protocols +overs IP networks. + +- **mpls-over-gre** Tunnels MPLS over a non-MPLS +network. + +- entry Nexthop Group Entry +Configuration. + +- size Nexthop Group Entry Size. + +- tos Tunnel encapsulation IP type of +service. + +- ttl Tunnel encapsulation TTL value. + +- tunnel-source Source Interface or +Address. + + +**commands Available in Nexthop-group Configuration Mode** + +- entry (Next-hop Group) + +- size (Nexthop Group) + +- ttl (Next-hop Group) + +- tunnel-source (Next-hop Group) + + +**Restrictions** + + +Tunnel type availability varies by switch platform. + + +**Examples** + + +- This command creates a nexthop group named **NH-1** +that specifies ECMP +nexthops. +``` +`switch(config)# **nexthop-group NH-1 type ip** +switch(config-nexthop-group-NH-1)#` +``` + +- This command exits nexthop-group mode for the **NH-1** +nexthop +group. +``` +`switch(config-nexthop-group-NH-1)# **exit** +switch(config)#` +``` + +- These commands creates a nexthop group **NH-2** of type +MPLS over +GRE. +``` +`switch(config)# **nexthop-group NH-2 type mpls-over-gre** +switch(config-nexthop-group-NH-2)# **tunnel-source 11.1.1.1** +switch(config-nexthop-group-NH-2)# **ttl 32** +switch(config-nexthop-group-NH-2)# **tos 20** +switch(config-nexthop-group-NH-2)# **entry 0 push label-stack 16000 tunnel-destination 11.1.1.2** +switch(config)# **ip route 100.1.1.1/32 Nexthop-Group NH-2** + +Counters for nexthop group may be enabled using the following command +switch(config)# **hardware counter feature nexthop**` +``` + + +### show nexthop-group + + +The **show nexthop-group** command displays properties of the +specified nexthop group. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show nexthop-group +nhgroup_name [VRF_INST] + + +**Parameters** + + +- **nhgroup_name** Name of the group displayed by +command. + +- **VRF_INST** Specifies the VRF instance for which data +is displayed. + +- **no parameter**     Context-active VRF. + +- **vrf** +**vrf_name** Specifies the name of VRF +instance. System default VRF is specified by +**default**. + + +**Related commands** + + +The show nexthop-group command places the switch in the +***nexthop-group*** configuration mode to create a new group or +modify an existing group. + + +**Example** + + +This command displays the nexthop group +information. +``` +`switch(config)# **show nexthop-group** + Id 107 + Type mplsOverGre + Size 1 (auto size enabled, programmed size 1) + TTL 32 + Source IP 11.1.1.1 + Entries (left most label is the top of the stack) + 0 push label-stack 16000 tunnel-destination 11.1.1.2 + Tunnel destination directly connected, Ethernet1 + 00:00:aa:aa:aa:aa, Ethernet1 + +With nexthop group counter enabled +switch(config)# **show nexthop-group** + Id 1 + Type mplsOverGre + Size 1 (auto size enabled, programmed size 1) + TTL 64 + Source IP 0.0.0.0 + Entries (left most label is the top of the stack) + 0 push label-stack 16000 tunnel-destination 1.1.1.2 + Tunnel destination directly connected, Ethernet1 + 00:00:aa:aa:aa:aa, Ethernet1 + 0 packets, 0 bytes + +switch(config)#**show nexthop-group summary** +Number of Nexthop Groups configured: 1 +Number of unprogrammed Nexthop Groups: 0 + + Nexthop Group Type Configured +-------------------- ------------ + MPLS over GRE 1 + + Nexthop Group Size Configured +-------------------- ------------ + 1 1` +``` + + +### size (Nexthop Group) + + +The **size** command configures the quantity of next-hop +entries in the Nexthop-Group Configuration Mode . Each entry specifies a next-hop IP +address for static routes to assign to the group. Configure entries with the entry (Next-hop Group) command. The default size is +**256** entries. + + +The **no size** and **default size** +commands restore the size of the configuration mode nexthop group to the default +value of **256** by removing the corresponding +**size** command from +***running-config***. + + +**Command Mode** + + +Nexthop-group Configuration + + +**Command Syntax** + + +size +entry_size + + +no size +entry_size + + +default size +entry_size + + +**Parameter** + + +**entry_size** Group size (entries). Value ranges from +**1** to **255** with a +default value of **256**. + + +**Example** + + +This command configures the next-hop group **NH-1** to contain +**128** +entries. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **size 128** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 128 + ttl 64 +switch(config-nexthop-group-NH-1)#` +``` + + +### ttl (Next-hop Group) + + +The **ttl** command specifies the number entered into the TTL +(time to live) encapsulation field of packets transmitted to the address designated +by the configuration mode next-hop group. The default TTL value is +**64**. + + +The **no ttl** and **default ttl** +commands restore the default TTL value written into TTL fields for the ***nexthop +group*** configuration mode by deleting the corresponding +**ttl** command from +***running-config***. + + +**Command Mode** + + +Nexthop-group Configuration + + +**Command Syntax** + + +ttl +hop_expiry + + +no ttl +hop_expiry + + +default ttl +hop_expiry + + +**Parameters** + + +**hop_expiry**     Period that the packet remains valid +(seconds or hops) Value ranges from **1** to +**64**. + + +**Restrictions** + + +This command is available only to Next-hop groups for tunnels of type +**IP-in-IP**, **GRE**, +**MPLS**, and **MPLS over +GRE**. + + +**Related commands** + + +The nexthop-group command places the +switch in the ***nexthop-group*** configuration mode. + + +**Examples** + +- This command configures the **ttl** setting to +**32** for next-hop group +**NH-1** +packets. +``` +`switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **ttl 32** +switch(config-nexthop-group-NH-1)# **show active** + nexthop-group NH-1 + size 128 + ttl 32 +switch(config-nexthop-group-NH-1)#` +``` + +- This command restores the **no ttl** setting for +next-hop group **NH-1** +packets. +``` +`switch(config-nexthop-group-NH-1)# **no ttl** +switch(config-nexthop-group-NH-1)# **show active nexthop-group NH-1** + size 128 + ttl 64 +switch(config-nexthop-group-NH-1)#` +``` + + +### tunnel-source (Next-hop Group) + + +The **tunnel-source** command specifies the address that is +entered into the source IP address encapsulation field of packets that are +transmitted as designated by the ***nexthop group*** configuration mode . +The command may directly specify an IP address or specify an interface from which an +IP address is derived. The default source address IP address is +**0.0.0.0**. + + +The **no +tunnel-source** and **default +tunnel-source** commands remove the source IP address setting from +the configuration mode nexthop group by deleting the +**tunnel-source** command from +***running-config***. + + +**Command +Mode** + + +Nexthop-group Configuration + + +**Command +Syntax** + + +tunnel-source +SOURCE + + +no tunnel-source +SOURCE + + +default tunnel-source +SOURCE + + +**Parameters** + +**SOURCE** +IP address or derivation interface. Options include: + +- **ipv4_addr**      An IPv4 address. + +- **intf ethernet** +**e_num**     Ethernet interface specified by +**e_num**. + +- **intf loopback** +**l_num**     Loopback interface specified by +**l_num**. + +- **intf management** +**m_num**     Management interface specified by +**m_num**. + +- **intf port-channel** +**p_num**     Port-channel interface specified by +**p_num**. + +- **intf vlan** +**v_num**     VLAN interface specified by +**v_num**. + + +**Restrictions** + + +This command is available only to Nexthop +groups for tunnels of type **ip-in-ip**. + +**Related +commands** + +The nexthop-group command places the switch in the +***nexthop-group*** configuration +mode. + + +**Example** + +These commands create **interface +loopback 100**, assign an IP address to the interface, then +specifies that address as the tunnel source for packets designated by nexthop-group +**NH-1**. +``` +`switch(config)# **interface loopback 100** +switch(config-if-Lo100)# **ip address 10.1.1.1/32** +switch(config-if-Lo100)# **exit** + +switch(config)# **nexthop-group NH-1** +switch(config-nexthop-group-NH-1)# **tunnel-source intf loopback 100** +switch(config-nexthop-group-NH-1)# **show active nexthop-group NH-1** + size 256 + ttl 64 + tunnel-source intf Loopback100 +switch(config-nexthop-group-NH-1)# **show nexthop-group NH-1** +Name Id type size ttl sourceIp +NH-1 2 ipInIp 256 64 10.1.1.1 + +switch(config-nexthop-group-NH-1)#` +``` + + +### tunnel nexthop-group unresolved + + +The **tunnel nexthop-group unresolved** command in the Router General Configuration Mode +installs a nexthop-group tunnel only if a viable nexthop group exists. Using this command overrides the default behavior of creating a nexthop-group +tunnel even if no viable nexthop-group exists for the configuration. + + +The **no | default** versions of the command removes the configuration from the ***running-config*** on the switch. + + +**Command Mode** + + +Router General Configuration + + +**Command Syntax** + + +**tunnel nexthop-group unresolved invalid** + + +**no tunnel nexthop-group unresolved invalid** + + +**default tunnel nexthop-group unresolved invalid** + + +**Parameters** + + +- **tunnel** - Specifies using a tunnel for the configuration. + +- **nexthop-group** - Applies the configuration to nexthop groups. + +- **unresolved** - Specifies applying the command to unreachable destinations. + +- **invalid** - Do not install the tunnel in the routing table. + + +**Example** + + +Use the following commands to apply the configuration to the switch: + + +``` +`switch(config)# **router general** +switch(config-router-general)# **tunnel nexthop-group unresolved invalid** +switch(config-router-general)` +``` diff --git a/docs/arista-scraped/static-inter-vrf-route.md b/docs/arista-scraped/static-inter-vrf-route.md new file mode 100644 index 00000000..1eaca6bc --- /dev/null +++ b/docs/arista-scraped/static-inter-vrf-route.md @@ -0,0 +1,82 @@ + + + +# Static Inter-VRF Route + + +The Static Inter-VRF Route feature adds support for static inter-VRF routes. This enables the configuration of routes to destinations in one ingress VRF with an ability to specify a next-hop in a different egress VRF through a static configuration. + + +You can configure static inter-VRF routes in default and non-default VRFs. A different +egress VRF is achieved by “tagging” the **next-hop** or **forwarding +via** with a reference to an egress VRF (different from the source +VRF) in which that next-hop should be evaluated. Static inter-VRF routes +with ECMP next-hop sets in the same egress VRF or heterogenous egress VRFs +can be specified. + + +The Static Inter-VRF Route feature is independent and complementary to other mechanisms that can be used to setup local inter-VRF routes. The other supported mechanisms in EOS and the broader use-cases they support are documented here: + +- [Inter-VRF Local Route Leaking using BGP VPN](/um-eos/eos-inter-vrf-local-route-leaking#xx1348142) + +- [Inter-VRF Local Route Leaking using VRF-leak Agent](/um-eos/eos-inter-vrf-local-route-leaking#xx1346287) + + +## Configuration + + +The configuration to setup static-Inter VRF routes in an ingress (source) VRF to forward IP traffic to a different egress (target) VRF can be done in the following modes: + +- This command creates a static route in one ingress VRF that points to a next-hop +in a different egress VRF. +ip | ipv6 +route [vrf +vrf-name +destination-prefix [egress-vrf +egress-next-hop-vrf-name] +next-hop] + + +## Show Commands + + +Use the **show ip route vrf** to display the egress VRF name if it +differs from the source VRF. + + +**Example** +``` +`switch# **show ip route vrf vrf1** + +VRF: vrf1 +Codes: C - connected, S - static, K - kernel, + O - OSPF, IA - OSPF inter area, E1 - OSPF external type 1, + E2 - OSPF external type 2, N1 - OSPF NSSA external type 1, + N2 - OSPF NSSA external type2, B - BGP, B I - iBGP, B E - eBGP, + R - RIP, I L1 - IS-IS level 1, I L2 - IS-IS level 2, + O3 - OSPFv3, A B - BGP Aggregate, A O - OSPF Summary, + NG - Nexthop Group Static Route, V - VXLAN Control Service, + DH - DHCP client installed default route, M - Martian, + DP - Dynamic Policy Route, L - VRF Leaked + +Gateway of last resort is not set + + S 1.0.1.0/24 [1/0] via 1.0.0.2, Vlan2180 (egress VRF default) + S 1.0.7.0/24 [1/0] via 1.0.6.2, Vlan2507 (egress VRF vrf3)` +``` + + + + + + +## Limitations + + + + + + - For bidirectional traffic to work correctly between a pair of VRFs, static inter-VRF + routes in both VRFs must be configured. + + - Static Inter-VRF routing is supported only in multi-agent routing protocol mode. diff --git a/docs/arista-scraped/traffic-management.md b/docs/arista-scraped/traffic-management.md new file mode 100644 index 00000000..a887ec0c --- /dev/null +++ b/docs/arista-scraped/traffic-management.md @@ -0,0 +1,10551 @@ + + + +# Traffic Management + + +This chapter describes Traffic Management on Arista switches, including configuration +instructions and command descriptions. Topics discussed by this chapter +include: + +- Traffic Management Conceptual Overview + +- Traffic Management Configuration Trident Platform +Switches + +- Traffic Management Configuration Trident II +Platform Switches + +- Traffic Management Configuration +Commands + + +## Traffic Management Conceptual +Overview + + +Traffic is managed through policy maps that apply data shaping methods to specific data streams. +A policy map is a data structure that identifies specific data streams and +then defines shaping parameters that modify packets within the streams. The +switch defines four types of policies: + +- Control Plane +Policies: Control plane policy maps are +applied to the control plane. + +- QoS Policies: QoS policy +maps are applied to Ethernet and port channel +interfaces. + +- Segment Routing Traffic Engineering Policy (SR-TE). + +- PBR Policies: PBR policy +maps are applied to Ethernet interfaces, port channel +interfaces and switch virtual interfaces (SVIs). + + +A policy map consists of classes. Each class contains an eponymous class map and traffic +resolution commands. + +- A class map is a data structure that defines a data stream by +specifying characteristics of data packets that comprise +that stream. Each class map is typed as either QoS, control +plane, or PBR and is available only to identically typed +policy maps. + +- Traffic resolution commands specify data handling methods for +traffic that matches a class map. Traffic resolution options +vary by policy map type. + + +Data packets that enter an entity to which +a policy map is assigned are managed with traffic resolution commands +of the first class that matches the packets. + + +### Control Plane Policies + + +The switch defines one control plane policy map named ***copp-system-policy***. The +***copp-system-policy*** policy map always applies to the control plane +and cannot be removed from the switch. Other control plane policy maps cannot be added. +**Copp-system-policy** consists of preconfigured classes, each containing a +static class map and traffic resolution commands. Preconfigured classes cannot be +removed from ***copp-system-policy***. + + +The switch provides static class maps and you cannot modify or delete them. The switch uses a +naming convention of static class maps as ***copp-system-*** +*name*, where *name* differentiates the class maps. Static class maps have +pre-defined internal conditions, not based on ACLs, and only listed in +***running-config*** as components of ***copp-system-policy***. +The sequence of static class maps in the policy map is not significant. Traffic +resolution commands define minimum (bandwidth) and maximum (shape) transmission rates +for data streams matching the corresponding class map. + + +***Copp-system-policy*** can be modified through the following steps: + +- Add classes consisting of an eponymous dynamic class map and traffic resolution +commands. +Create, edit, or delete dynamic class maps, and filter traffic +with a single IPv4 ACL, and list in +***running-config***. + +- Change traffic resolution commands for a preconfigured class. + + +The following section describes control plane traffic policy configuration procedures: + +- Configuring Control Plane Traffic Policies Trident Platform +Switches + + +### QoS Policies + + +QoS policy maps are user defined. +The switch does not provide preconfigured QoS policy maps and in the +default configuration, policy maps are not applied to any Ethernet or +port channel interface. Policy maps and class maps are created and applied +to interfaces through configuration commands. + + +A QoS policy map is composed of one or more classes. Each class contains an eponymous dynamic + class map and traffic resolution commands. Dynamic class maps are + user created, can be edited or deleted, filter traffic with a + single IPv4 ACL, and are listed in + ***running-config***. + + +QoS traffic resolution commands perform one of the following: + +- Set the Layer 2 CoS field + +- Set the DSCP value in the ToS byte + +- Specify a traffic class queue + + +The last class in all QoS policy maps is ***class-default***, which is composed as +follows: + +- The ***class-default*** class map matches all traffic except IPv4 or +IPv6 traffic and is not editable. + +- By default, ***class-default*** class contains no traffic resolution +commands. Traffic resolution commands can be added through +configuration commands. + + +Data packets that enter an interface to which +a policy map is assigned are managed with traffic resolution commands +that correspond to the first class that matches the packet. + + +These sections describe QoS traffic policy configuration procedures: + +- Configuring QoS Traffic Policies Arad Platform Switches + +- Configuring QoS Traffic Policies FM6000 Platform Switches + +- Configuring QoS Traffic Policies Petra Platform Switches + +- Configuring QoS Traffic Policies Trident Platform +Switches + + +### Segment Routing Traffic Engineering Policy (SR-TE) + + +Segment Routing Traffic Engineering Policy (SR-TE) policy uses Segment Routing +(SR) to enable a headend to steer traffic along any path without maintaining +per flow state in every node based on the policy. Configuring SR policy for +the MPLS dataplane (SR-MPLS) for Type-1 SR policy segments with BGP and +locally configured policies as sources of SR policy is available on DCS-7500 +and DCS-7280 family of switches. + + +#### SR Policy Overview + +**SR +Policy Identification**The following identifies an SR +policy. + +- **Endpoint** - An IPv4 or IPv6 address which +refers to the destination of the policy. EOS +allows 0/0 and 0:: and calls these IP addresses +*null endpoints*. + +- **Color** - An unsigned 32-bit opaque +numerical quantity. Define the semantic of a color +as you prefer. It can refer to, for instance, an +application or a type of traffic, such as low +latency, or a geographical location. + + +**SR Policy +Constituents**The SR policy consists of *candidate +paths*. Each candidate path has the following. + +- **SID-lists (SLs)** - An ordered list of +Segment Identifiers. Each SID provides a MPLS +label in the MPLS instantiation of SR). An SL +encodes one path from the headend to the +destination. Each SL has an optional weight +attached to it for the purpose of Unequal Cost +Multipath (UCMP) traffic distribution. The default +value for SL weight is +**1**. + +- **Preference** - An optional, unsigned 32-bit +integer used in the candidate path selection +algorithm to select the *active* candidate +path. The default value for preference is +**100**. + +- **Binding SID (BSID)** - an optional +SID.Note: In EOS, a BSID is mandatory for each +candidate path. + + +**SR Policy +Sources**A headend learns SR policies using the +following methods: + +- **BGP** + +- Single agent routing model (Ribd) + +- Multi-agent routing model + +- **Local configuration using CLI** + +- Single agent routing model (Ribd) + +- Multi-agent routing model + +- Openconfig YANG models + +- **PCEP**Note: EOS does not support +PCEP. + + +**Identity of a Candidate +Path** + +A candidate path within an SR policy is +identified by a 3-tuple of {Protocol-Origin, Originator, +Discriminator}. In EOS, for locally configured policies: + +- The ASN in the Originator set to 0. + +- The node address in the Originator set to +**0.0.0.0**. + +- The discriminator set to the Preference +configured.Note: EOS CLI allows configuring only +one candidate path at a given preference and does +not allow configuring the discriminator for a +candidate path. + + +**State of an SID List (SL)** + +The following +describes the state of an SL: + +- **Valid** - The top label of the SL resolves within +the LFIB to the outgoing next hop(s), interface(s) +and a label action. + +- **Invalid** - The top label of the SL unresolvable to +the outgoing next hop(s), interface(s) and a label +action. An SL is also marked as invalid when the SL +is resolvable, but the resolved labeled stack +exceeds the platform’s maximum SID depth (SID), that +is, exceeds the maximum number of labels the +platform can push in to the outgoing +packet.Note: The state is either +valid or +invalid. + + +**State of a Candidate Path** + +The following +describes the states of a candidate path. + +- **Invalid** - Not eligible to participate in the +best/active candidate path selection algorithm +because of one of the reasons below. + +- Invalid constituent SLs in the candidate +path. + +- No Binding SID present in the candidate +path. + +- Binding SIDpresent but outside SRLB range for +the candidate path. + +- **Valid** - At least one valid SL has lost out to +some other candidate path in the best / active +candidate path selection algorithm. + +- **Active**: - A valid candidate path exists and wins +the best / active candidate path selection +algorithm. The active candidate path installs in the +switch hardware and forwards traffic. + + +**State of an SR Policy** + + +EOS considers an SR +policy as *valid* when at least one of its candidate paths is +valid. Otherwise, the SR policy is +*invalid*. + + +**Resolution of an SL** + + +An SL +resolves if the top label (first SID) can be resolved in the system +Labeled FIB (LFIB) to yield a nexthop and outgoing interface(s). The +other labels in the SID-List do not play a part in +resolution. + + +**Best Candidate Path (Active Candidate +Path) Selection Algorithm** + +EOS overrides selection +based on discriminator by retaining the current active candidate +path even when current active path has a lower discriminator value. +This reduces the active path flap when a new path appears with the +same significance. The following lists a summary of valid candidate +paths ordering for a given policy. + +- The candidate path with higher preference selected. + +- Locally configured candidate path selected over a BGP +learned path + +- Lower originator selected in the following instances: + +- Lower AS number of Originator field +selected. + +- Lower Node address of Originator field +selected. + +- Current active candidate path selected in the following +instances: + + +The following displays the reason for not selecting a +path as an active path for a specified policy. + + +``` +`switch#**show traffic-engineering segment-routing policy endpoint color **` +``` + + +**Binding SID** + + +The following describes the use cases for the Binding +SID: + +- Stitch together multiple domains. + +- Stitch together different traffic tunnels . + +- Overcome label stack imposition limitation in hardware. + + +**BSID Conflict +Handling** + + +**Examples** + + +- **Between Policies** - If the policy (E1, C1) becomes +eligible to be active first, then it installs in the LFIB +and the policy (E2,C2) whose best path(CP1) conflicts with +the Policy (E1, C1) and does not become active. + +- Policy(E1, C1): CP1: Binding-SID 965536 (wins +best path) + +- Policy(E2, C2): CP1: Binding-SID 965536 (wins +best path) + +- CP2: Binding-SID 965537 + +- **with another Application**: The SR-TE policies have the +lowest preference when a conflict exists with any other +application in EOS using the SRLB range. The candidate paths +with the same binding-SID as that of an LFIB entry by +another application, for example, static adjacency segment, +remains invalid. + + +In both the cases, when the conflict no longer exists, the candidate +paths re-evaluate and may become active. + +**BGP as a +Source of Policies** +SR Policies from a BGP +peer (a controller, route reflector) received for installation at +the headend by EOS. It does not propagate the received policies to +BGP peers nor does it originate SR Policies for transmission to BGP +peers. + + +The following supports IPv4 or IPv6 peers which can be +single hop or multi-hop iBGP or eBGP peers. + + +- **SAFI 73 for AFI 1 and AFI 2**: IPv4 and IPv6 policy +endpoints, with the encoding defined in section 2.1 of +****Advertising Segment Routing Policies +in BGP****.Note: The nexthop +address-family must match the AFI of the +NLRI. + +- **Sub-TLVs of Tunnel Encapsulation TLV of type 15 (SR-TE Policy +Type) of the Tunnel Encapsulation Path Attribute** + + +- Preference (Sub-TLV Type 12) + +- Binding SID (Sub-TLV Type 13) of length +**2** or +**6** bytes + +- Segment List (Sub-TLV Type 128). The following +Segment List sub-TLVs are supported: + +- Type 1 Segment (Sub-TLV type 1) + +- Weight (Sub-TLV type 9) + +- Explicit NULL Label Policy (Sub-TLV Type +14) + +- SR Prefix SID (sub TLV 3 of TLV 149, TLV +150) + +- SR Range - Contents of TLV 149, TLV 150 +(multi-topology) + + +Note: EOS ignores all other sub-TLVs of the Tunnel Encapsulation TLV +and Segment List sub-TLVs. + + +**Route-Target and NO_ADVERTISE Community in SR-TE SAFI +Updates** + + +EOS implements the Acceptance and Usability checks as defined in +sections 4.2.1 and 4.2.2 of the IETF draft [Advertising Segment +Routing Policies in BGP](https://datatracker.ietf.org/doc/html/draft-ietf-idr-segment-routing-te-policy-02). However EOS skips +matching the Route-Target with the router-ID of the headend +if the SR-TE NLRI is tagged with +**NO_ADVERTISE** +community. + + +**ECMP does not support SR-TE SAFI Paths** + + +EOS does not support ECMP for BGP SR-TE SAFI. The BGP path +selects only one best candidate path and publishes it to +SR-TE Policy Agent for candidate path selection. Note: EOS +supports ECMP of BGP next hops where each next hop +resolves to an SR-TE policy. + + +**Path Selection within BGP** + + +The IETF draft **[Advertising Segment +Routing Policies in BGP](https://datatracker.ietf.org/doc/html/draft-ietf-idr-segment-routing-te-policy-02)** supports +passing multiple candidate paths from a single protocol +source for an SR-TE policy path selection. Therefore, it +includes a field distinguisher in the NLRI which can be +unique for each controller to make BGP pass through the +policies. However when multiple sources use the same +distinguisher, BGP performs a path selection for the tuple: +Endpoint, Color and Distinguisher. The best path for that +tuple publishes to the SR-TE Policy Agent for selecting an +Active path. The best +**bgp-best-path** selection +applies to SR-TE SAFI as well. + + +*Error Handling / Edge Cases* + + +- **Weight 0**: The IETF draft does not limit the range +of SL weight to exclude weight 0. A SID-List with +weight 0 is not used for forwarding so BGP module in +EOS does not pass on SID-Lists with weight 0 to the +SR-TE policy agent. Such SID-Lists will be visible +in **show bgp sr-te** commands +but not in **show traffic-engineering +segment-routing policy** +commands. + +- **Empty SLs**: Given the TLV encoding used to +propagate SR Policies in BGP, it is possible to +receive SID-Lists without SIDs. The BGP module in +EOS does not pass empty SID-Lists to SR-TE policy +agent. Such SID-Lists are visible in +**show bgp sr-te** commands +but not in **show traffic-engineering +segment-routing policy** +commands. + +- **Non Type 1 segments**: EOS supports only Type-1 +segments. When receiving a BGP update with a +SID-List that has non Type-1 segments, EOS ignores +the entire SID-List and sends a +`**BGP-4-SRTE_IGNORED_SEGMENT_LIST_UNSUPPORTED_SEGMENTS**` +syslog. Such SID-Lists are not stored locally, and +**show bgp sr-te** command +does not display them. Note: The SID-Lists made up +of all Type-1 segments pass to the SR-TE policy +agent. + + +**Steering Traffic into a Policy** + + +**Incoming label as BSID - Labelled Steering** + + +At +the headend when receiving a packet with a label stack with a BSID +of an active CP of a valid SR Policy as the top label, the headend +pops the label, and imposes the resolved label stack on the outgoing +packet. + + +**Example** + + +For instance, an SR Policy with +an active candidate path with BSID **965536** +and SL with label stack [**965540**, +**900001**, +**900002**]. Assume that +**965540** is an IS-IS SR +Adjacency SID. An incoming packet has a label stack +[**965536**, +**100000**] then the outgoing +label stack consists of [**900001**, +**900002**, +**100000**]. + + +**Steering BGP learnt IP(v6) prefixes - IP Steering** + + +**Incoming label is BSID - Labelled Steering** + + +At +the headend, BGP IPv4 and IPv6 routes receive one or more extended +color communities and recursively resolves them through any active +SR Policy that matches the BGP routes’ nexthop and color. When +receiving an IPv4 or IPv6 packet forwarded using this policy, the +SL’s resolved label stack imposes on the outgoing packet. + + +For +BGP routes received with color community to be steered via an SR +policy, the route’s nexthop must already be resolvable through IGP. +If no resolving route exists in IGP, the route is considered +unresolvable and does not program in hardware even if a matching SR +policy existsfor the corresponding nexthop and color. + +If no +matching SR policy exists for the received BGP nexthop and color, +the route resolves through the IGP route in IP RIB. If an active SR +policy that matches the BGP nexthop and color gets instantiated at a +later time, the BGP route changes from resolving through IGP to the +new active SR policy. Note: The recursion through SR policy is only +applicable for active BGP routes in RIB. + +**Color only IP steering +using CO bits** + + +It is possible to relax the +requirement of an exact match of the BGP route’s nexthop with the +endpoint of the SR Policy using the “CO” (Color Only) bits in the +color extended community. The “CO” bits are 2 reserved bits +repurposed for color only steering as defined in section 3 of [Advertising Segment Routing +Policies in BGP](https://datatracker.ietf.org/doc/html/draft-ietf-idr-segment-routing-te-policy-02). The exact match of the nexthop is +done with the CO bits set to 00 or 11. + +**CO = 01 +Steering**: relaxes the nexthop to match the null endpoint of a +policy. For a BGP route with nexthop N and color C, the following +order is used for resolution. If there is no IGP route resolving the +BGP nexthop, the route is not programmed in hardware. + +- Active SR policy with endpoint N and color C + +- Active SR policy with null endpoint (from the same AFI +as the BGP route) and color C + +- Active SR policy with null endpoint from any AFI and +color C + +- IGP route + + +**CO = 10 Steering**: in addition to the steps in CO += **01** steering, CO = +**10** additionally relaxes +the nexthop to match *any* endpoint. The following order is +used for resolving a BGP route with nexthop N and color C. The +behavior described is in accordance with section 8.8.1 of the IETF +draft [Segment Routing Policy for +Traffic Engineering](https://datatracker.ietf.org/doc/html/draft-filsfils-spring-segment-routing-policy-05). + + +- Active SR policy with endpoint N and color C + +- Active SR policy with null endpoint (from the same AFI +as the BGP route) and color C + +- Active SR policy with null endpoint from any AFI and +color C + +- Active SR policy for any endpoint from the same AFI as +the BGP route and color C + +- Active SR policy for any endpoint from any AFI and color +C + +- IGP route + + +**ECMP of IPv4/IPv6 Prefixes that Resolve over SR-TE +Policies** + + +When multiple BGP paths of BGP unicast prefixes resolve through +active SR policies form ECMP, the resulting FIB entry for +the BGP route has an ECMP of segment list paths which is a +union of all the segments-list entries present in each of +the resolving SR policies for the BGP paths. + + +**Example** + + +The following table displays four paths for prefix +**192.1.0.0/31**, and each +of the four paths resolves via SR-TE policies. + + +Table 1. List of Paths Resolved via SR-TE Policies + +| Path +| Nexthop +| Color +| Policy EP +| Policy Color +| Segment Lists +| Per SL Traffic Distribution +| + + +| 1 +| 1.0.0.2 +| CO(00):1000 +| 1.0.0.2 +| 1000 +| [2500 500], Weight: 1 +[2501 500], +Weight: 2 +| 8.33% +16.66% +| + + +| 2 +| 1.0.2.2 +| CO(00):2000 +| 1.0.2.2 +| 2000 +| [2502 500], Weight: 1 +[2503 500], +Weight: 1 +| 12.5% +12.5% +| + + +| 3 +| 1.0.4.2 +| CO(00):3000 +| 1.0.4.2 +| 3000 +| [2504 500], Weight: 1 +[2505 500], +Weight: 1 +| 12.5% +12.5% +| + + +| 4 +| 1.0.6.2 +| CO(00):4000 +| 1.0.6.2 +| 4000 +| [2506 500], Weight: 1 +[2507 500], +Weight: 1 +| 12.5% +12.5% +| + + +``` +`B I 192.1.0.0/31 [200/0] via SR-TE Policy 1.0.4.2, color 3000 + via SR-TE tunnel index 6, weight 1 + via 1.0.4.2, Ethernet1, label 2505 500 + via SR-TE tunnel index 5, weight 1 + via 1.0.4.2, Ethernet1, label 2504 500 + via SR-TE Policy 1.0.0.2, color 1000 + via SR-TE tunnel index 2, weight 1 + via 1.0.0.2, Ethernet2, label 2501 500 + via SR-TE tunnel index 1, weight 1 + via 1.0.0.2, Ethernet2, label 2500 500 + via SR-TE Policy 1.0.2.2, color 2000 + via SR-TE tunnel index 4, weight 1 + via 1.0.2.2, Ethernet3, label 2503 500 + via SR-TE tunnel index 3, weight 1 + via 1.0.2.2, Ethernet3, label 2502 500 + via SR-TE Policy 1.0.6.2, color 4000 + via SR-TE tunnel index 8, weight 1 + via 1.0.6.2, Ethernet6, label 2507 500 + via SR-TE tunnel index 7, weight 1 + via 1.0.6.2, Ethernet6, label 2506 500` +``` + + +The traffic distribution honors the weights of the SID-Lists. In +the example, each of the four SR Policies will get +**25%** of the total traffic +meant for prefix **192.1.0.0/31**. +Within each policy, the distribution is based on the weights +of the SID-Lists. + + +**ECMP Group when some BGP unicast paths resolve over SR +Policies and some via non SR Policy IGP +paths** + + +If some BGP paths resolve via SR Policy paths and some BGP paths +resolve via non SR Policy IGP, then the ECMP group formed +programmed as the active route in FIB, only considers the SR +Policy paths. ECMP in the FIB is not formed between paths +that resolve over SR Policy and paths that resolve via non +SR Policy IGP routes. In the example above, if SR Policy +with endpoint **1.0.6.2** and color +**4000** becomes inactive or +is removed, the FIB path for +**192.1.0.0/31** resolves +via 3 SR Policies as shown below. + + +``` +`B I 192.1.0.0/31 [200/0] via SR-TE Policy 1.0.4.2, color 3000 + via SR-TE tunnel index 6, weight 1 + via 1.0.4.2, Ethernet1, label 2505 500 + via SR-TE tunnel index 5, weight 1 + via 1.0.4.2, Ethernet1, label 2504 500 + via SR-TE Policy 1.0.0.2, color 1000 + via SR-TE tunnel index 2, weight 1 + via 1.0.0.2, Ethernet2, label 2501 500 + via SR-TE tunnel index 1, weight 1 + via 1.0.0.2, Ethernet2, label 2500 500 + via SR-TE Policy 1.0.2.2, color 2000 + via SR-TE tunnel index 4, weight 1 + via 1.0.2.2, Ethernet3, label 2503 500 + via SR-TE tunnel index 3, weight 1 + via 1.0.2.2, Ethernet3, label 2502 500` +``` + + +Note: [**show ip +bgp**](/um-eos/eos-border-gateway-protocol-bgp#xx1117919) still shows a 4-way ECMP. +The FIB paths switch to resolving via the (non SR Policy) +IGP paths when there are no BGP paths in the ECMP group that +resolve via an SR Policy. + + +**UCMP of IPv4/IPv6 prefixes using LinkBandwidth (LBW) Extended +Community that resolve over SR-TE policies not +supported** + + +When multiple BGP paths of BGP unicast prefixes resolve through +active SR policies form ECMP, and the unicast paths also +contain the LBW extended community, EOS does not form UCMP +amongst the unicast paths. Only ECMP is formed at the +unicast prefix level. The LBW is ignored the behavior is +identical to the behavior explained in the previous +section. + + +**Resolution of BGP unicast prefixes that resolve over other BGP +unicast prefixes resolved via SR Policies** + + +A BGP unicast prefix P1, that is recursively resolved via another +BGP prefix P2, such that P2 resolves via an SR Policy, then +in the FIB, P1 is programmed with the resolved nexthop +pointing to the non SR Policy resolution of P2. P1 does not +use P2s SR Policy for forwarding. + + +**Explicit Null Label Imposition** + + +When the address family of the BGP unicast prefix is +not the same as the address family of the endpoint of the SR +Policy that the unicast prefixes resolves via, an explicit +null label is automatically imposed in the outgoing label +stack. + + +**Example** + + +If an IPv4 unicast prefix **P1** resolves +over a policy whose endpoint **EP1** +is an IPv6 address (this can happen due to color only +CO=01/10 steering with **P1** having +an IPv4 nexthop) and the SR Policy had a SID-List whose +resolved label stack is [**1001**, +**1002**, +**1003**], the outgoing +packet is imposed with [**1001**, +**1002**, +**1003**, +**2**] where +**0** is the IPv4 explicit +null label. + + +If an IPv6 prefix **P2**, resolves over a +policy whose endpoint **EP2** is an +IPv4 address (this can happen with color only CO=01/10 +steering with **P2** having a IPv6 +nexthop) and the SR Policy had a SID-List whose resolved +label stack is [**1001**, +**1002**, +**1003**], the outgoing +packet is imposed with [**1001**, +**1002**, +**1003**, +**2**] where +**2** is the IPv6 explicit +null label. + + +The following table lists the configurations which result in +having explicit-null label in the resolved label stack. + + +Table 2. Configurations resulting in Explicit-Null Label in +Resolved Label Stack + +| ENLP configuration for the resolving SR +Policy +| IPv4 Prefixes +| IPv6 Prefixes +| + + +| None +| - +| - +| + + +| IPv4 +| IPv4 explicit null appended to the end +of label stack +| - +| + + +| IPv6 +| - +| IPv6 explicit null appended to the end +of label stack +| + + +| Both +| IPv4 explicit null appended to the end +of label stack +| IPv6 explicit null appended to the end +of label stack +| + + +| No/Default config (incase of +BGP learnt policies ENLP Sub-TLV is not +received) +| Resolving SR Policy has IPv4 Endpoint +address: +No explicit-null +| Resolving SR Policy has IPv4 Endpoint +address: +IPv6 explicit null appended to the end +of label stack +| + + +| Resolving SR Policy has IPv6 Endpoint +address: +IPv4 explicit null appended to the end +of label stack +| Resolving SR Policy has IPv6 Endpoint +address: +No explicit-null +| + + +#### Traffic Accounting + + +All egress tunnel counters (MPLS/GRE/MPLSoGRE using +SR-TE/Nexthop-group/BGP-LU tunnel types) share the same hardware +resource. + +- **7280E/7500E systems**: Up to +**16k** tunnels + +- **7280R/7500R systems**: Up to +**8k** tunnels + + +Tunnel counters are allocated on a first-come, first-served +basis. Configurations using GRE/MPLSoGRE, GRE, and MPLS further +limit a maximum of 4k countable egress MPLS tunnels on +7280R/7500R. + + +#### FEC Optimizations + + +The hardware FEC usage could be reduced as the underlying FEC is shared +among different routes. + +- Programming of the active candidate path of an SR-TE +policy in hardware is shared between the BSID route +and IP steering route. + +- If all of the following conditions are met, ISIS-SR MPLS +routes and tunnel entries directly point to the next +hop FEC generated by the routing agent (IGP FEC). + +- All the next hops of the MPLS route either +point to pop or forward (i.e. swapping to the same +label) label action. + +- The switch is either a 7280 or a 7500 +platform. + +- The corresponding SR-TE policy BSID routes (and +corresponding Segment List tunnels) that resolve +over ISIS-SR MPLS routes, will directly point to the +IGP FEC. + + +#### Configuring SR-TE + + +The following commands start the **SrTePolicy** +agent and enter the switch into the Traffic Engineering +configuration +sub-mode. +``` +`switch(config)# **router traffic-engineering** +switch(config-te)# **segment-routing**` +``` + + +Note: The agent must be running even if the only source of policies is +BGP. +**Static Policy Configuration** +The following commands set the policy using endpoint and color value, and +define the BSID for the +policy. +``` +`switch(config-te-sr)# **policy endpoint *v4Address*|*v6Address* color *color-value*** +switch(config-te-sr-policy)# **binding-sid *mpls-label*** +switch(config-te-sr-policy)# **path-group preference *value***` +``` + + +The following commands enter the policy path configuration sub mode, and +adds a segment list to the candidate +path. +``` +`switch(config-te-sr-policy)# **path-group preference *value*** +switch(config-te-sr-policy-path)# **segment-list label-stack** **label1 label2 …****weight *value***` +``` + + +Note: The default weight value is **1**. Adding weight +is optional. Repear the configuration statement for multiple segment +lists per candidate path. + +The following commands configures a null label +policy. +``` +`switch(config-te-sr-policy-path)# **explicit-null [none|ipv4|ipv6|both]**` +``` + + +Note: The null label policy configuration is optional. + + +**BGP configuration for SR-TE SAFI** + + +The following commands configures a BGP router to activate a neighbor to +negotiate and accept SR-TE address-family with this +peer. +``` +`switch(config)# **router bgp ** +switch(config-router-bgp)# **address-family ipv4|ipv6 sr-te** +switch(config-router-bgp-af-srte)# **neighbor *neighbor* activate**` +``` + + +The following command configures an inbound route-map to filter or modify +attributes on incoming SR-TE prefixes from the +peer. +``` +`switch(config-router-bgp-af-srte)# **neighbor *neighbor* route-map *routeMapName* in**` +``` + + +#### Configuring Egress SR-TE Traffic Accounting + + +The following command enables egress traffic accounting for SR policies +(also known as MPLS +tunnels). +``` +`switch(config)# **hardware counter feature mpls tunnel**` +``` + + +The following command displays current status of the MPLS +counters. +``` +`switch# **show hardware counter feature** +Feature Direction Counter Resource (Engine) +------------------ ---------------- -------------------------- +ACL-IPv4 out Jericho: 2, 3 +ACL in Jericho: 4, 5, 6, 7 +MPLS tunnel out Jericho: 8, 9` +``` + + +The following command disables egress traffic accounting for SR +policies. +``` +`switch(config)# **no hardware counter feature mpls tunnel**` +``` + + +The following command displays a summary information of SR-TE +SAFI. +``` +`switch# **show bgp sr-te summary** +BGP summary information for VRF default +Router identifier 100.1.1.2, local AS number 100 +Neighbor Status Codes: m - Under maintenance + Neighbor V AS MsgRcvd MsgSent InQ OutQ Up/Down State PfxRcd PfxAcc + 100.1.1.1 4 100 407 413 0 0 00:18:57 Estab 1 1 + 1000::1 4 100 407 413 0 0 00:18:57 Estab 1 1` +``` + + +The following command displays a summary information of candidate paths +received from neighbors which have negotiated AFI=1 for SR-TE +SAFI. +``` +`switch# **show bgp sr-te ipv4 summary** +BGP summary information for VRF default +Router identifier 100.1.1.2, local AS number 100 +Neighbor Status Codes: m - Under maintenance + Neighbor V AS MsgRcvd MsgSent InQ OutQ Up/Down State PfxRcd PfxAcc + 100.1.1.1 4 100 407 413 0 0 00:18:57 Estab 0 0` +``` + + +The following command displays a summary information of candidate paths +received from neighbors which have negotiated AFI=2 for SR-TE +SAFI. +``` +`switch# **show bgp sr-te ipv6 summary** +BGP summary information for VRF default +Router identifier 100.1.1.2, local AS number 100 +Neighbor Status Codes: m - Under maintenance + Neighbor V AS MsgRcvd MsgSent InQ OutQ Up/Down State PfxRcd PfxAcc + 1000::1 4 100 407 413 0 0 00:18:57 Estab 0 0` +``` + + +The following command displays all the SR-TE candidate +paths. +``` +`switch# **show bgp sr-te** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +Policy status codes: * - valid, > - active, E - ECMP head, e - ECMP + c - Contributing to ECMP +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - Link Local Nexthop + + Endpoint Color Distinguisher Next Hop Metric LocPref Weight Path +*> 133.1.1.1 0 1 130.1.1.3 0 100 0 ? +*> 133.1.1.1 0 2 130.1.1.3 0 100 0 ? +*> 1330::1 0 1 1300::3 0 100 0 ? +*> 1330::1 0 2 1300::3 0 100 0 ?` +``` + + +The following command displays all the SR-TE candidate paths with IPv4 +endpoints. +``` +`switch# **show bgp sr-te ipv4** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +Policy status codes: * - valid, > - active, E - ECMP head, e - ECMP + c - Contributing to ECMP +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - Link Local Nexthop + + Endpoint Color Distinguisher Next Hop Metric LocPref Weight Path +*> 133.1.1.1 0 1 130.1.1.3 0 100 0 ? +*> 133.1.1.1 0 2 130.1.1.3 0 100 0 ?` +``` + + +The following command displays all the SR-TE candidate paths with IPv6 +endpoints. +``` +`switch# **show bgp sr-te ipv6** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +Policy status codes: * - valid, > - active, E - ECMP head, e - ECMP + c - Contributing to ECMP +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - Link Local Nexthop + + Endpoint Color Distinguisher Next Hop Metric LocPref Weight Path +*> 1330::1 0 1 1300::3 0 100 0 ? +*> 1330::1 0 2 1300::3 0 100 0 ?` +``` + + +The following command displays information about a specific candidate +path. +``` +`switch# **show bgp sr-te endpoint 133.1.1.1 color 0 distinguisher 1** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +BGP routing table entry for Endpoint: 133.1.1.1 Color: 0 Distinguisher: 1 + Paths: 1 available + Local + 130.1.1.3 from 100.1.1.2 (100.1.1.2) + Origin INCOMPLETE, metric 0, localpref 100, IGP metric 0, weight 0, + received 00:01:29 ago, valid, internal, best + Community: no-advertise + Rx SAFI: SR TE Policy` +``` + + +The following command displays information about a specific candidate +path including the contents of the Tunnel encapsulation path +attribute TLV of type SR +policy. +``` +`switch# **show bgp sr-te endpoint 133.1.1.1 color 0 distinguisher 1 detail** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +BGP routing table entry for Endpoint: 133.1.1.1 Color: 0 Distinguisher: 1 + Paths: 1 available + Local + 130.1.1.3 from 100.1.1.2 (100.1.1.2) + Origin INCOMPLETE, metric 0, localpref 100, IGP metric 0, weight 0, + received 00:01:29 ago, valid, internal, best + Community: no-advertise + Rx SAFI: SR TE Policy + Tunnel encapsulation attribute: SR Policy + Preference: 200 + Binding SID: 965536 + Explicit null label policy: IPv4 + Segment-List: Label Stack: [ 16004 16003 ], Weight: 10 + Segment-List: Label Stack: [ 2000 3000 ]` +``` + + +The following command displays information about SR candidate paths +received from the specified neighbor. The “policies” keyword +displays only the candidate paths that are accepted. +“received-policies” additionally also displays the rejected +candidate +paths. +``` +`switch# **show bgp neighbors 100.1.1.2 ipv4 sr-te policies** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +Policy status codes: * - valid, > - active +Origin codes: i - IGP, e - EGP, ? - incomplete +AS Path Attributes: Or-ID - Originator ID, C-LST - Cluster List, LL Nexthop - Link Local Nexthop + + Endpoint Color Distinguisher Next Hop Metric LocPref Weight Path +*> 133.1.1.1 0 1 133.1.1.3 0 100 0 ? +*> 133.1.1.1 0 2 133.1.1.3 0 100 0 ?` +``` + + +The following command displays information about SR candidate paths +received from the specified neighbor along with the contents of the +Tunnel Encapsulation path attribute’s TLV of type SR Policy. The +**policies** keyword displays +only the candidate paths that are accepted. +**received-policies** +additionally also displays the rejected candidate +paths.. +``` +`switch# **show bgp neighbors 100.1.1.2 ipv4 sr-te policies detail** +BGP routing table information for VRF default +Router identifier 100.1.1.1, local AS number 100 +BGP routing table entry for Endpoint: 133.1.1.1 Color: 0 Distinguisher: 2 + Paths: 1 available + Local + 130.1.1.3 from 100.1.1.2 (100.1.1.2) + Origin INCOMPLETE, metric 0, localpref 100, IGP metric 0, weight 0, + received 00:01:29 ago, invalid, internal + Rx SAFI: SR TE Policy + Tunnel encapsulation attribute: SR Policy + Preference: 200 + Binding SID: 965536 + Explicit null label policy: IPv4 + Segment-List: Label Stack: [ 16004 16003 ], Weight: 10 + Segment-List: Label Stack: [ 2000 3000 ]` +``` + + +### PBR Policies + + +Policy-Based Routing (PBR) +allows the operator to specify the next hop for selected incoming packets +on an L3 interface, overriding the routing table. Incoming packets are +filtered through a policy map referencing one or more ACLs, and matching +packets are routed to the next hop specified. + + +A PBR policy map is composed of one or more classes and can include next-hop information for each +class. It can also include single-line raw match statements, +which have the appearance and function of a single line from an +ACL. Each class contains an eponymous class map. Class maps are +user-created, can be edited or deleted, filter traffic using +IPv4 ACLs, and are listed in ***running-config***. + + +These sections describe PBR policy configuration procedures: + +- Configuring PBR Policies Arad Platform Switches + +- Configuring PBR Policies FM6000 Platform Switches + +- Configuring PBR Policies Petra Platform Switches + +- Configuring PBR Policies Trident Platform Switches + + +## Traffic Management Configuration + Arad Platform Switches + + +Traffic policies are implemented +by policy maps, which are applied to the control plane, or to L3 interfaces +for Policy-Based Routing (PBR). Policy maps contain classes, which are +composed of class maps and traffic resolution commands. + + +Traffic Management Conceptual +Overview describes traffic policies. + + +### Configuring Control Plane Traffic +PoliciesArad Platform Switches + + +Default control plane traffic +policies are implemented automatically without user intervention. These +policies are modified by associating traffic resolution commands with +static classes that comprise the control plane policy map. + + +#### Static Class Maps + + +Control plane traffic policies utilize +static class maps, which are provided by the switch, are not editable, +and cannot be deleted. + + +#### Editing the Policy Map + + +The only control plane policy map is **copp-system-policy**, which cannot +be deleted. In its default form, **copp-system-policy** +consists of the classes listed in class (policy-map (control-plane) Arad). Although +the underlying class map of each class cannot be edited, the traffic resolution +commands can be adjusted. The default classes cannot be removed from the policy map +and their sequence within the policy map is not editable. + + +Policy maps are modified in policy-map configuration mode. The policy-map type +copp command enters policy-map configuration mode. + + +**Examples** + + +This command enters policy-map configuration mode for editing copp-system-policy. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + + +The **class (policy-map (control-plane) +Arad)** command enters policy-map-class configuration mode, +where traffic resolution commands are modified for the configuration mode class. + + +**Example** + + +This command enters policy-map-class configuration mode for the copp-system-lacp +static +class. +``` +`switch(config-pmap-copp-system-policy)# **class copp-system-lacp** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)#` +``` + + +Two traffic resolution commands determine bandwidth parameters for class traffic: + +- bandwidth (policy-map-class (control-plane) Arad) specifies the +minimum bandwidth. + +- shape (policy-map-class (control-plane) Arad) specifies the maximum bandwidth. + + +**Example** + + +These commands configure a bandwidth range of **2000** to +**4000** kilobits per seconds (kbps) for traffic +filtered by the copp-system-lacp class +map: +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **bandwidth kbps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **shape kbps 4000** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)#` +``` + + +Policy-map and policy-map-class configuration modes are group-change modes. Changes +are saved with the **exit** command or discarded with the +**abort** command. The **show +active** command displays the saved version of policy map. The +**show pending** command displays the modified policy +map. + + +**Example** + + +These commands exit policy-map-class configuration mode, display the pending +policy-map, then exit policy-map configuration mode, which saves the altered policy +map to +***running-config***. +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class copp-system-bpdu + + class copp-system-lldp + + class copp-system-lacp + shape kbps 4000 + bandwidth kbps 2000 + + class copp-system-l3ttl1 + + class copp-system-l3slowpath + + +switch(config-pmap-copp-system-policy)# **exit** +switch(config)#` +``` + + +#### Applying Policy Maps to the Control Plane + + +The **copp-system-policy** +policy map is always applied to the control plane. No commands are available +to add or remove this assignment. + + +#### Displaying Policy Maps + + +The show policy-map interface type qos command displays the configured +values of the policy maps classes and the number of packets filtered and dropped as +a result of the class maps. + + +**Example** + + +These commands exit policy-map-class configuration mode, display the pending +policy-map, then exit policy-map configuration mode, which saves the altered policy +map to +***running-config***. +``` +`switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + Hardware programming status: InProgress + + Class-map: copp-system-mlag (match-any) + shape : 10000001 kbps + bandwidth : 10000001 kbps + Out Packets : 0 + Drop Packets : 0 + + Class-map: copp-system-bpdu (match-any) + shape : 2604 kbps + bandwidth : 1302 kbps + Out Packets : 0 + Drop Packets : 0 + + Class-map: copp-system-lacp (match-any) + shape : 4230 kbps + bandwidth : 2115 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)# + +switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **exit**` +``` + + +### Configuring QoS Traffic Policies Arad Platform Switches + + +QoS traffic policies are implemented +by creating class maps and policy maps, then applying the policy maps +to Ethernet and port channel interfaces. + + +#### Creating Class Maps + + +QoS traffic policies utilize dynamic class maps that are created and modified in class-map +configuration mode. The class-map type +qos command enters class-map configuration mode. + + +**Example** + + +This command enters class-map configuration mode to create QoS class map named +**Q-CMap_1**. +``` +`switch(config)# **class-map type qos match-any Q-CMap_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +A class map contains one IPv4 access control list (ACL). The **match +ip access-group** command assigns an ACL to the +class map. Subsequent **match** commands replace the +existing **match** command. Class maps filter traffic +only on ACL permit rules. Deny ACL rules are disregarded. + + +**Example** + + +This command adds the IPv4 ACL named **ACL_1** to the +class +map. +``` +`switch(config-cmap-Q-CMap_1)# **match ip access-group ACL_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +Class-map configuration mode is a group-change mode. Changes made in a +group-change mode are saved by exiting the mode. The **show +active** command displays the saved version of class +map. The **show pending** command displays the unsaved +class map. + + +**Example** + + +The **show active** command indicates that the +configuration mode class map is not stored in ***running-config***. +The **show pending** command displays the class map to +be stored upon exiting class-map configuration +mode. +``` +`switch(config-cmap-Q-CMap_1)# **show active** +switch(config-cmap-Q-CMap_1)# **show pending** +class-map type qos match-any Q-CMap_1 + match ip access-group ACL_1 + +switch(config-cmap-Q-CMap_1)#` +``` + + +The **exit** command returns the switch to global +configuration mode and saves pending class map changes. The +**abort** command returns the switch to +global configuration mode and discards pending changes. + + +**Example** + + +This command exits class-map configuration mode and stores pending changes to +***running-config***. +``` +`switch(config-cmap-CP-CMAP_1)# **exit** +switch(config)# **show class-map type control-plane CP-CMAP_1** + Class-map: CP-CMAP_1 (match-any) + Match: ip access-group name ACLv4_1 +switch(config)#` +``` + + +#### Creating Policy Maps + + +Policy maps are created and modified in policy-map configuration mode. The policy-map type +quality-of-service command enters policy-map configuration mode. + + +**Example** + + +This command places the switch in policy-map configuration mode and creates a +QoS policy map named +**Q-PMAP_1**. +``` +`switch(config)# **policy-map type quality-of-service Q-PMAP_1** +switch(config-pmap-Q-PMAP_1)#` +``` + + +Policy map are edited by adding or removing classes. A class automatically +contains its eponymous class map; traffic resolution commands are added or +edited in ***policy-map-class*** configuration mode. The +**below** command adds a class to the +configuration mode policy map and places the switch in +***policy-map-class*** configuration mode, where traffic +resolution commands are added to the class. + + +**Example** + + +This command adds the **Q-CMap_1** class to the +**Q-PMAP_1** policy map and places the +switch in ***policy-map-class*** configuration +mode. +``` +`switch(config-pmap-Q-PMAP_1)# **class Q-CMap_1** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +The **set cos** commands configure traffic resolution +methods for data that passes the class map: + +- **set cos** sets the Layer 2 CoS +field. + +- **set dscp** sets the DSCP value in the ToS +byte. + +- **set traffic class** specifies a traffic +class queue. + + +**Example** + + +These commands configure the policy map to set the **CoS field +7** on packets filtered by the class map, then +assigns those packets to **traffic class +4**. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set cos 7** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set traffic-class 4** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +***Policy-map*** and***policy-map-class*** configuration modes +are group-change modes. Changes are saved with the +**exit** command or discarded with the +**abort** command. The **show +active** and **show pending** +commands display the saved and modified policy map versions, +respectively. + + +**Example** + + +These commands exit policy-map-class configuration mode, display the pending +policy-map, then exit policy-map configuration mode to save the altered +policy map to +***running-config***. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **exit** +switch(config-pmap-Q-PMAP_1)# **show pending** +policy-map type quality-of-service Q-PMAP_1 + class Q-CMap_1 + set cos 7 + set traffic-class 4 + + class class-default + +switch(config-pmap-Q-PMAP_1)# **exit** +switch(config)#` +``` + + +The last class in all QoS policy maps is ***class-default***. The +***class-default*** class map matches all traffic except +IPv4 or IPv6 traffic and provides no traffic resolution commands. The +***class-default*** class map is not editable; traffic +resolution commands can be added to the ***class-default*** +class. + + +To modify traffic resolution commands for the ***class-default*** +class, enter ***policy-map-class*** configuration mode for the +class, then enter the desired **set** commands. + + +**Example** + + +These commands enter ***policy-map-class*** configuration mode for +***class-default***, configures the stream to enter +**traffic class 2**, and saves the altered +policy map to +***running-config***. +``` +`switch(config)# **policy-map type quality-of-service Q-PMap_1** +switch(config-pmap-Q-PMap_1)# **class class-default** +switch(config-pmap-c-Q-PMap_1-class-default)# **set traffic-class 2** +switch(config-pmap-c-Q-PMap_1-class-default)# **exit** +switch(config-pmap-Q-PMap_1)# **exit** +switch(config)# **show policy-map type qos Q-PMap_1** +Service-policy Q-PMap_1 + + Class-map: Q-CMap_1 (match-any) + Match: ipv6 access-group name ACLv6_1 + set cos 7 + set traffic-class 4 + + Class-map: class-default (match-any) + set traffic-class 2 + +switch(config)#` +``` + + +#### Applying Policy Maps to an Interface + + +The service-policy +type qos (Interface mode) command applies a specified policy map to the +configuration mode interface. + + +These commands apply **PMAP-1** policy map to +**interfaceEthernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **show active** +switch(config-if-Et8)# **service-policy input PMAP-1** +switch(config-if-Et8)# **show active** +interface Ethernet8 + service-policy type qos input PMAP-1 +switch(config-if-Et8)#` +``` + + +### Configuring PBR Policies +Arad Platform Switches + + +Policy-Based Routing (PBR) +is implemented by creating class maps and policy maps, then applying +the policy maps to Ethernet interfaces, port channel interfaces or switch +virtual interfaces (SVIs). + + +#### Creating PBR Class Maps + + +PBR policies utilize class maps that are created and modified in the ***class-map*** +configuration mode. The class-map type + pbr command enters the ***class-map*** configuration mode. + + + + +**Example** + + + This command enters the ***class-map*** configuration mode to create a PBR class +map named +CMAP1. +``` +`switch(config)# **class-map type pbr match-any CMAP1** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +A class map contains one or more access control lists (ACLs). The match (policy-map (pbr)) command +assigns an ACL to the class map. Subsequent **match** commands add +additional ACLs to the class map. Class maps filter traffic only on ACL permit rules. +Deny ACL rules are disregarded; if a class map includes ACLs with deny rules, the +configuration reverts to its previous state. + + + + +**Example** + + + This command adds the ACL named **ACL1** to the class +map. +``` +`switch(config-cmap-PBR-CMAP1)# **match ip access-group ACL1** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +The ***class-map*** configuration mode is a group-change mode. Changes made in a +group-change mode are saved by exiting the mode. The **show + active** command displays the saved version of class map. + + + The **show active** command indicates that the configuration mode +class map is not stored in ***running-config***. + +``` +`switch(config-cmap-PBR-CMAP1)# **show active** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +The **exit** command returns the switch to the + ***global*** configuration mode and saves pending class map changes. The +**abort** command returns the switch to the + ***global*** configuration mode and discards pending changes. + + + + +**Example** + + + This command exits class-map configuration mode and stores pending changes to +***running-config***. +``` +`switch(config-cmap-PBR-CMAP1)# **exit** +switch(config)# **show class-map type pbr CMAP1** +class-map type pbr match-any CMAP1 + 10 match ip access-group ACL1 +switch(config)#` +``` + + + + + +#### Creating PBR Policy Maps + + +Policy maps are created and modified in policy-map configuration mode. The policy-map type pbr command enters the +***policy-map*** configuration mode. + + + + +**Example** + + + This command enters the ***policy-map*** configuration mode for creating a PBR +policy map named +**PMAP1**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)#` +``` + + + + +Policy map are edited by adding or removing classes. A class automatically contains its +eponymous class map; next-hop commands are added or edited in the +***policy-map-class*** configuration mode. The class (policy-map (pbr)) command adds +a class to the configuration mode policy map and places the switch in the +***policy-map-class*** configuration mode, where next-hop commands are +added to the class. + + + + **Examples** + + - This command adds the CMAP1 class to the policy map and + places the switch into the ***policy-map-class*** configuration + mode. +``` +`switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +The set +nexthop (policy-map-class pbr) command configures the next hop for + data that passes the class map. + + - This command configures the policy map to set the next hop to + **10.12.0.5** on packets filtered by the class + map. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop 10.12.0.5** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +The set nexthop-group (policy-map-class(pbr) Arad) command configures a + nexthop group as the next hop for data that passes the class map. + + - These commands configure the policy map **PMAP1** to set +the next hop to a nexthop group named **GROUP1** for traffic +defined by class map **CMAP1**. + +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop-group GROUP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +The +***policy-map*** and ***policy-map-class*** configuration + modes are group-change modes. Changes are saved with the + **exit** command or discarded with the + **abort** command. The **show + active** command displays the currently saved map + version. + + - These commands exits the ***policy-map-class*** configuration mode, + then exits the ***policy-map*** configuration mode to save the altered + policy map to + ***running-config***. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **exit** +switch(config-pmap-PMAP1)# **exit** +switch(config)#` +``` + + +#### Applying a PBR Policy Map to an Interface + + +The service-policy type pbr (Interface mode) command applies the +specified PBR policy map to the configuration mode interface. Only one PBR service +policy is supported per interface. + + + These commands apply the PMAP1 PBR policy map to **interface ethernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **service-policy type pbr input PMAP1** +switch(config-if-Et8)#` +``` + + + + + +#### Hardware Decapsulation + + +When hardware decapsulation takes place, +PBR policy maps on Arad platform switches match on outer packet headers +(i.e., they match based on the attributes of the packet before it is +decapsulated). + + +## Traffic Management Configuration + FM6000 Platform Switches + + +Traffic policies are implemented by policy maps, which are applied to the control plane or an +interface. Policy maps contain classes, which are composed of class maps and traffic +resolution commands. Traffic Management Conceptual Overview describes traffic policies. + + +FM6000 platform switches support the following traffic policies: + +- Control plane policies manage control plane traffic. + +- QoS traffic policies manage traffic on Ethernet and port channel +interfaces. + + +These sections describe the construction and application of policy maps on FM6000 platform +switches: + +- Configuring Control Plane Traffic Policies FM6000 Platform +Switches + +- Configuring QoS Traffic Policies FM6000 Platform Switches + +- Configuring PBR Policies FM6000 Platform Switches + + +### Configuring Control Plane Traffic +PoliciesFM6000 Platform Switches + + +Default control plane traffic +policies are implemented automatically without user intervention. These +policies are modified by associating traffic resolution commands with +static classes that comprise the control plane policy map. + + +#### Static Class Maps + + +Control plane traffic policies utilize +static class maps, which are provided by the switch, are not editable, +and cannot be deleted. + + +#### Editing the Policy Map + + +The only control plane policy map is **copp-system-policy**, which cannot +be deleted. In its default form, **copp-system-policy** consists of the classes +listed in copp-system-policy default classes: +FM6000 Platform Switches. Although the underlying class map of each class +cannot be edited, the traffic resolution commands can be adjusted. The default +classes cannot be removed from the policy map and their sequence within the policy +map is not editable. + + +Table 3. Copp-system-policy Default Classes: FM6000 Platform Switches + +| Class Name +| **shape (pps)** +| **bandwidth (pps)** +| + + +| copp-system-arp +| 10000 +| 1000 +| + + +| copp-system-default +| 8000 +| 1000 +| + + +| copp-system-ipmcrsvd +| 10000 +| 1000 +| + + +| copp-system-ipmcmiss +| 10000 +| 1000 +| + + +| copp-system-igmp +| 10000 +| 1000 +| + + +| copp-system-l2rsvd +| 10000 +| 10000 +| + + +| copp-system-l3slowpath +| 10000 +| 1000 +| + + +| copp-system-pim-ptp +| 10000 +| 1000 +| + + +| copp-system-ospf-isis +| 10000 +| 1000 +| + + +| copp-system-selfip +| 5000 +| 5000 +| + + +| copp-system-selfip-tc6to7 +| 5000 +| 5000 +| + + +| copp-system-sflow +| 25000 +| 1000 +| + + +Policy maps are modified in the ***policy-map*** configuration mode. The policy-map type +copp command enters the ***policy-map*** configuration +mode. + + +**Example** + + +This command enters the ***policy-map*** configuration mode for editing +***copp-system-policy***. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + + +The class (policy-map (control-plane) FM6000) command +enters the ***policy-map-class*** configuration mode, where traffic +resolution commands are modified for the configuration mode class. + + +**Example** + + +This command enters the ***policy-map-class*** configuration mode for the +copp-system-arp static +class. +``` +`switch(config-pmap-copp-system-policy)# **class copp-system-arp** +switch(config-pmap-c-copp-system-policy-copp-system-arp)#` +``` + + +Two traffic resolution commands determine bandwidth parameters for class traffic: + +- bandwidth (policy-map-class (control-plane) FM6000) + +- shape (policy-map-class (control-plane) FM6000) + + +**Example** + + +These commands configure a bandwidth range of **2000** to +**4000** packets per seconds (pps) for traffic +filtered by the copp-system-arp class +map: +``` +`switch(config-pmap-c-copp-system-policy-copp-system-arp)# **bandwidth pps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-arp)# **shape pps 4000** +switch(config-pmap-c-copp-system-policy-copp-system-arp)#` +``` + + +***The policy-map*** and ***policy-map-class*** configuration modes +are group-change modes. Changes are saved with the **exit** +command or discarded with the **abort** command. The +**show active** command displays the saved version of +policy map. The **show pending** command displays the modified +policy map. + + +**Example** + + +These commands exit the ***policy-map-class*** configuration mode, display +the pending policy-map, then exits the ***policy-map*** configuration mode, +which saves the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-copp-system-policy-CP-CMAP_1)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class CP-CMAP_1 + shape pps 4000 + bandwidth pps 2000 + + class copp-system-bpdu + + class copp-system-lldp + + class copp-system-lacp + + class copp-system-arp + + class copp-system-arpresolver + + class copp-system-default + +switch(config-pmap-copp-system-policy)#**exit** +switch(config)#` +``` + + +#### Applying Policy Maps to the Control Plane + + +The **copp-system-policy** policy map is always applied to the control +plane. No commands are available to add or remove this assignment. + + +### Configuring QoS Traffic Policies + FM6000 Platform Switches + + +QoS traffic policies are implemented +by creating class maps and policy maps, then applying the policy maps +to Ethernet and port channel interfaces. + + +#### Creating Class Maps + + +QoS traffic policies utilize dynamic class maps that are created and modified in the +***class-map*** configuration mode. The class-map type qos command enters the ***class-map*** +configuration mode. + + +**Example** + + +This command enters the ***class-map*** configuration mode to create +QoS class map named +**Q-CMap_1**. +``` +`switch(config)# **class-map type qos match-any Q-CMap_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +A class map contains one IPv4 access control list (ACL). The match (class-map (qos) FM6000) +command assigns an ACL to the class map. Subsequent +**match** commands replace the existing +**match** command. Class maps filter +traffic only on ACL permit rules. Deny ACL rules are disregarded. + + +**Example** + + +This command adds the IPv4 ACL named **ACL_1** to the +class +map. +``` +`switch(config-cmap-Q-CMap_1)# **match ip access-group ACL_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +The ***class-map*** configuration mode is a group-change mode. Changes +made in a group-change mode are saved by exiting the mode. The +**show active** command displays the saved +version of class map. The **show pending** command +displays the unsaved class map. + + +**Example** + + +The **show active** command indicates that the +configuration mode class map is not stored in ***running-config***. +The **show pending** command displays the class map to +be stored upon exiting the ***class-map*** configuration +mode. +``` +`switch(config-cmap-Q-CMap_1)# **show active** +switch(config-cmap-Q-CMap_1)# **show pending** +class-map type qos match-any Q-CMap_1 + match ip access-group ACL_1 + +switch(config-cmap-Q-CMap_1)#` +``` + + +The **exit** command returns the switch to the +***global*** configuration mode and saves pending class +map changes. The **abort** command returns the switch +to the ***global*** configuration mode and discards pending +changes. + + +**Example** + + +This command exits the ***class-map*** configuration mode and stores +pending changes to +***running-config***. +``` +`switch(config-cmap-CP-CMAP_1)# **exit** +switch(config)# **show class-map type control-plane CP-CMAP_1** + Class-map: CP-CMAP_1 (match-any) + Match: ip access-group name ACLv4_1 +switch(config)#` +``` + + +#### Creating Policy Maps + + +Policy maps are created and modified in the ***policy-map*** configuration mode. The +policy-map type +quality-of-service command enters the ***policy-map*** +configuration mode. + + +**Example** + + +This command places the switch in the ***policy-map*** configuration +mode and creates a QoS policy map named +**Q-PMAP_1**. +``` +`switch(config)# **policy-map type quality-of-service Q-PMAP_1** +switch(config-pmap-Q-PMAP_1)#` +``` + + +Policy map are edited by adding or removing classes. A class automatically +contains its eponymous class map; traffic resolution commands are added or +edited in the ***policy-map-class*** configuration mode. The class (policy-map (qos) FM6000) +command adds a class to the configuration mode policy map and places the +switch in the *policy-map-class* configuration mode, where traffic +resolution commands are added to the class. + + +**Example** + + +This command adds the **Q-CMap_1** class to the +**Q-PMAP_1** policy map and places the +switch in the ***policy-map-class*** configuration +mode. +``` +`switch(config-pmap-Q-PMAP_1)# **class Q-CMap_1** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +set (policy-map-class (qos) FM6000) +commands configure traffic resolution methods for data that passes the class +map: + +- **set cos** sets the Layer 2 CoS +field. + +- **set dscp** sets the DSCP value in the ToS +byte. + +- **set traffic class** specifies a traffic +class queue. + + +**Example** + + +These commands configure the policy map to set the **CoS field +7** on packets filtered by the class map, then +assigns those packets to **traffic class +4**. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set cos 7** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set traffic-class 4** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +The ***policy-map*** and ***policy-map-class*** configuration +modes are group-change modes. Changes are saved with the +**exit** command or discarded with the +**abort** command. The **show +active** and **show pending** +commands display the saved and modified policy map versions, +respectively. + + +**Example** + + +These commands exit the ***policy-map-class*** configuration mode, +display the pending policy-map, then exits the ***policy-map*** +configuration mode to save the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **exit** +switch(config-pmap-Q-PMAP_1)# **show pending** +policy-map type quality-of-service Q-PMAP_1 + class Q-CMap_1 + set cos 7 + set traffic-class 4 + + class class-default + +switch(config-pmap-Q-PMAP_1)# **exit** +switch(config)#` +``` + + +The last class in all QoS policy maps is ***class-default***. The +***class-default*** class map matches all traffic except +IPv4 or IPv6 traffic and provides no traffic resolution commands. The +***class-default*** class map is not editable; traffic +resolution commands can be added to the ***class-default*** +class. + + +To modify traffic resolution commands for the ***class-default*** +class, enter the ***policy-map-class*** configuration mode for the +class, then enter the desired **set** commands. + + +**Example** + + +These commands enter the ***policy-map-class*** configuration mode for +***class-default***, configures the stream to enter +**traffic class 2**, and saves the altered +policy map to +***running-config***. +``` +`switch(config)# **policy-map type quality-of-service Q-PMap_1** +switch(config-pmap-Q-PMap_1) #**class class-default** +switch(config-pmap-c-Q-PMap_1-class-default)# **set traffic-class 2** +switch(config-pmap-c-Q-PMap_1-class-default)# **exit** +switch(config-pmap-Q-PMap_1)# **exit** +switch(config)# **show policy-map type qos Q-PMap_1** +Service-policy Q-PMap_1 + + Class-map: Q-CMap_1 (match-any) + Match: ipv6 access-group name ACLv6_1 + set cos 7 + set traffic-class 4 + + Class-map: class-default (match-any) + set traffic-class 2 + +switch(config)#` +``` + + +#### Applying Policy Maps to an Interface + + +The service-policy type qos (Interface mode) command applies a specified +policy map to the configuration mode interface. + +These commands apply +**PMAP-1** policy map to **interface ethernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **show active** +switch(config-if-Et8)# **service-policy input PMAP-1** +switch(config-if-Et8)# **show active** +interface Ethernet8 + service-policy type qos input PMAP-1 +switch(config-if-Et8)#` +``` + + +### Configuring PBR Policies +FM6000 Platform Switches + + +Policy-Based Routing (PBR) is implemented by creating class maps and policy maps, then applying + the policy maps to Ethernet interfaces, port channel interfaces or Switch Virtual + Interfaces (SVIs). + + +#### Creating PBR Class Maps + + +PBR policies utilize class maps that are created and modified in the ***class-map*** +configuration mode. The class-map type +pbr command enters the ***class-map*** configuration +mode. + + + + +**Example** + + + This command enters the ***class-map*** configuration mode to create a PBR class +map named +**CMAP1**. +``` +`switch(config)# **class-map type pbr match-any CMAP1** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +A class map contains one or more IPv4 access control lists (ACLs). The match (policy-map + (pbr)) command assigns an ACL to the class map. Subsequent +**match** commands add additional ACLs to the class map. +Class maps filter traffic only on ACL permit rules. Deny ACL rules are disregarded; if a +class map includes ACLs with deny rules, the configuration reverts to its previous +state. + + + +On FM6000 platform switches, counters are not supported, so a [counters per-entry (ACL configuration modes)](/um-eos/eos-acls-and-route-maps#xx1151725) command in +an ACL is ignored. + + + + +**Example** + + + This command adds the IPv4 ACL named **ACL1** to the class +map. +``` +`switch(config-cmap-PBR-CMAP1)# **match ip access-group ACL1** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +The ***class-map*** configuration mode is a group-change mode. Changes made in a +group-change mode are saved by exiting the mode. The **show + active** command displays the saved version of class map. + + + The **show active** command indicates that the configuration mode +class map is not stored in ***running-config***. + +``` +`switch(config-cmap-PBR-CMAP1)# **show active** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +The **exit** command returns the switch to ***global*** +configuration mode and saves pending class map changes. The + **abort** command returns the switch to ***global*** +configuration mode and discards pending changes. + + + + +**Example** + + + This command exits the ***class-map*** configuration mode and stores pending +changes to +***running-config***. +``` +`switch(config-cmap-PBR-CMAP1)# **exit** +switch(config)# **show class-map type pbr CMAP1** +class-map type pbr match-any CMAP1 + 10 match ip access-group ACL1 +switch(config)#` +``` + + + + + +#### Creating PBR Policy Maps + + +Policy maps are created and modified in the ***policy-map*** configuration mode. The + policy-map type + pbr command enters the ***policy-map*** configuration +mode. + + + + +**Example** + + + This command enters the ***policy-map*** configuration mode for creating a PBR +policy map named +**PMAP1**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)#` +``` + + + + +Policy map are edited by adding or removing classes. A class automatically contains its +eponymous class map; next-hop commands are added or edited in the +***policy-map-class*** configuration mode. The class (policy-map + (pbr)) command adds a class to the configuration mode policy map and +places the switch in the ***policy-map-class*** configuration mode, where +next-hop commands are added to the class. + + + + **Examples** + + - This command adds the **CMAP1** class to the policy map and +places the switch in the ***policy-map-class*** configuration + mode. +``` +`switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +The set + nexthop (policy-map-class pbr) command configures the + next hop for data that passes the class map. + + - This command configures the policy map to set the next hop to +**10.12.0.5** on packets filtered by the class + map. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop 10.12.0.5** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +The +***policy-map*** and ***policy-map-clas***s configuration + modes are group-change modes. Changes are saved with the + **exit** command or discarded with the + **abort** command. The **show + active** command displays the currently saved map + version. + + + + +**Example** + + + These commands exits the ***policy-map-class*** configuration mode, then exits +the ***policy-map configuration*** mode to save the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **exit** +switch(config-pmap-PMAP1)# **exit** +switch(config)#` +``` + + + + + +#### Applying a PBR Policy Map to an Interface + + +The service-policy +type pbr (Interface mode) command applies the specified PBR +policy map to the configuration mode interface. Only one PBR service policy is supported +per interface. + + + These commands apply the **PMAP1** PBR policy map to +**interface ethernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **service-policy type pbr input PMAP1** +switch(config-if-Et8)#` +``` + + + + + +#### Hardware Decapsulation + + +When hardware decapsulation takes place, +PBR policy maps on FM6000 platform switches match on outer packet headers +(i.e., they match based on the attributes of the packet before it is +decapsulated). + + +## Traffic Management Configuration + Petra Platform Switches + + +Traffic policies are implemented +by policy maps, which are applied to the control plane. Policy maps contain +classes, which are composed of class maps and traffic resolution commands. +QoS traffic policies are not supported on 7500 Series switches. + + +Traffic Management Conceptual +Overview describes traffic policies. + + +### Configuring Control Plane Traffic +PoliciesPetra Platform Switches + + +Default control plane traffic +policies are implemented automatically without user intervention. These +policies are modified by associating traffic resolution commands with +static classes that comprise the control plane policy map. + + +#### Static Class Maps + + +Control plane traffic policies utilize +static class maps, which are provided by the switch, are not editable, +and cannot be deleted. + + +#### Editing the Policy Map + + +The only control plane policy map is **copp-system-policy**, which cannot +be deleted. In its default form, **copp-system-policy** consists of the classes +listed in copp-system-policy default classes: +Petra Platform Switches. Although the underlying class map of each class +cannot be edited, the traffic resolution commands can be adjusted. The default +classes cannot be removed from the policy map and their sequence within the policy +map is not editable. + + +Table 4. copp-system-policy default classes: Petra Platform Switches + +| Class Name +| **shape (kbps)** +| **bandwidth (kbps)** +| + + +| copp-system-bpdu +| 2500 +| 1250 +| + + +| copp-system-default +| 2500 +| 250 +| + + +| copp-system-igmp +| 2500 +| 250 +| + + +| copp-system-ipbroadcast +| 2500 +| 250 +| + + +| copp-system-ipmc +| 2500 +| 250 +| + + +| copp-system-ipmcmiss +| 2500 +| 250 +| + + +| copp-system-ipmcrsvd +| 2500 +| 250 +| + + +| copp-system-ipunicast +| NO +LIMIT +| 250 +| + + +| copp-system-l3destmiss +| 2500 +| 250 +| + + +| copp-system-l3slowpath +| 2500 +| 250 +| + + +| copp-system-l3ttl0 +| 2500 +| 250 +| + + +| copp-system-l3ttl1 +| 2500 +| 250 +| + + +| copp-system-lacp +| 2500 +| 1250 +| + + +| copp-system-lldp +| 2500 +| 250 +| + + +| copp-system-unicast-arp +| 2500 +| 250 +| + + +Policy maps are modified in the ***policy-map*** configuration mode. The policy-map type +copp command enters the ***policy-map*** configuration +mode. + + +**Example** + + +This command enters the***policy-map*** configuration mode for editing +**copp-system-policy**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + + +The class (policy-map (control-plane) Petra) command +enters the ***policy-map-class*** configuration mode, where traffic +resolution commands are modified for the configuration mode class. + + +**Example** + +- This command enters the ***policy-map-class*** +configuration mode for the **copp-system-lldp** static +class. +``` +`switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +Two traffic resolution commands determine bandwidth parameters for class traffic: + +- bandwidth (policy-map-class (control-plane) +Petra) specifies the minimum bandwidth. + +- shape (policy-map-class (control-plane) +Petra) specifies the maximum bandwidth. + + +**Example** + + +These commands configure a bandwidth range of **2000** to +**4000** kilobits per seconds (kbps) for traffic +filtered by the**copp-system-arp** class +map: +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **bandwidth kbps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **shape kbps 4000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +The ***policy-map*** and ***policy-map-class*** configuration modes +are group-change modes. Changes are saved with the **exit** +command or discarded with the **abort** command. The +**show active** command displays the saved version of +policy map. The **show pending** command displays the +configured policy map. + + +Petra platform switches do not support all discrete rate values. When a +**bandwidth** or **shape** +command specifies a value that is not supported, the switch converts the rate to the +next highest discrete value that it supports. The show policy-map interface type qos command displays the +converted rate and not the user configured rate. + + +**Example** + + +These commands exits the ***policy-map-class*** configuration mode, display +the pending policy-map, then exits the ***policy-map*** configuration mode, +which saves the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class copp-system-bpdu + + class copp-system-lldp + shape kbps 4000 + bandwidth kbps 2000 + + class copp-system-lacp + +switch(config-pmap-copp-system-policy)# **exit** +switch(config)#` +``` + + +Changes are saved with the **exit** command or discarded with +the **abort** command. The **show +active** command displays the saved version of policy map. The +**show pending** command displays the modified policy +map. + + +#### Displaying Policy Maps + + +The show policy-map interface type qos command displays the traffic +resolution rates of the policy maps classes and the number of packets filtered and +dropped as a result of the class maps. The shape and bandwidth rates may differ from +configured values, because the switch does not support all discrete rate values. + + +**Example** + + +These commands exits the ***policy-map-class*** configuration mode, display +the pending policy-map, then exits the *policy-map* configuration mode, which +saves the altered policy map to +***running-config***. +``` +`switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + Hardware programming status: InProgress + + Class-map: copp-system-mlag (match-any) + shape : 10000001 kbps + bandwidth : 10000001 kbps + Out Packets : 0 + Drop Packets : 0 + + Class-map: copp-system-lacp (match-any) + shape : 2604 kbps + bandwidth : 1302 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)#` +``` + + +#### Applying Policy Maps to the Control Plane + + +The **copp-system-policy** policy map is always applied to the control +plane. No commands are available to add or remove this assignment. + + +### Configuring QoS Traffic Policies + Petra Platform Switches + + +QoS traffic policies are not supported on Petra platform switches. + + +### Configuring PBR Policies +Petra Platform Switches + + +PBR policies are not supported on Petra platform switches. + + +## Traffic Management Configuration + Trident Platform Switches + + +Traffic policies are implemented by policy maps, which are applied to the control plane or an +interface. Policy maps contain classes, which are composed of class maps and traffic +resolution commands. Traffic Management Conceptual Overview describes traffic policies. + + +Trident platform switches support the following traffic policies: + +- Control plane policies manage control plane traffic. + +- QoS traffic policies manage traffic on Ethernet and port channel +interfaces. + + +These sections describe the construction and application of policy maps: + +- Configuring Control Plane Traffic Policies Trident Platform +Switches + +- Configuring QoS Traffic Policies Trident Platform Switches + +- Configuring PBR Policies Trident Platform Switches + + +### Configuring Control Plane Traffic +PoliciesTrident Platform Switches + + +Default control plane traffic +policies are implemented automatically without user intervention. These +policies are modified by creating class maps and editing the policy map +to include the new class maps. + + +#### Creating Class Maps + + +Control plane traffic policies utilize static and dynamic class maps. Static class maps are +provided by the switch, are not editable, and cannot be deleted. Dynamic class maps +are created and modified in the ***class-map*** configuration mode. The +class-map type +copp command enters the***class-map*** configuration +mode. + + +**Example** + + +This command enters the ***class-map*** configuration mode for creating or +editing a control plane dynamic class map named +**CP-CMAP_1**. +``` +`switch(config)# **class-map type copp match-any CP-CMAP_1** +switch(config-cmap-CP-CMAP_1)#` +``` + + +Class maps contain one IPv4 or IPv6 access control list (ACL). The match (class-map (control-plane) Trident) command +assigns an ACL to the class map. Subsequent **match** commands +replace the existing **match** command. Class maps filter +traffic only on ACL permit rules. Deny ACL rules are disregarded. + + +**Example** + + +This command assigns the IPv4 ACL named **ACLv4_1** to the +class +map. +``` +`switch(config-cmap-CP-CMAP_1)# **match ip access-group ACLv4_1** +switch(config-cmap-CP-CMAP_1)#` +``` + + +The ***class-map*** configuration mode is a group-change mode. Changes are +saved by exiting the mode. The **show active** command +displays the saved version of class map. The **show pending** +command displays the unsaved class map. + + +**Example** + + +The **show active** command indicates that the configuration +mode class map is not stored in ***running-config***. The **show +pending** command displays the class map to be stored upon +exiting the ***class-map*** configuration +mode. +``` +`switch(config-cmap-CP-CMAP_1)# **show active** +switch(config-cmap-CP-CMAP_1)# **show pending** +class-map type copp match-any CP-CMAP_1 + match ip access-group ACLv4_1 + +switch(config-cmap-CP-CMAP_1)#` +``` + + +The **exit** command returns the switch to the +***global*** configuration mode and saves pending class map changes. The +**abort** command returns the switch to the +***global*** configuration mode and discards pending class map +changes. + + +**Example** + + +This command exits the ***class-map*** configuration mode and stores pending +changes to +***running-config***. +``` +`switch(config-cmap-CP-CMAP_1)# **exit** +switch(config)# **show class-map type control-plane CP-CMAP_1** + Class-map: CP-CMAP_1 (match-any) + Match: ip access-group name ACLv4_1 +switch(config)#` +``` + + +#### Editing the Policy Map + + +The only control plane policy map is **copp-system-policy**, which cannot +be deleted. In its default form, **copp-system-policy** +consists of the classes listed in copp-system-policy default classes: Trident Platform Switches. Although +the underlying class map of each class cannot be edited, the traffic resolution +commands can be adjusted. The default classes cannot be removed from the policy map +and their sequence within the policy map is not editable. + + +Table 5. copp-system-policy default classes: Trident Platform Switches + +| Class Name +| **shape (pps)** +| **bandwidth (pps)** +| + + +| copp-system-bpdu +| 5000 +| 5000 +| + + +| copp-system-lacp +| 5000 +| 5000 +| + + +| copp-system-selfip-tc6to7 +| 5000 +| 5000 +| + + +| copp-system-selfip +| 5000 +| 5000 +| + + +| copp-system-tc6to7 +| 10000 +| 1000 +| + + +| copp-system-lldp +| 10000 +| 1000 +| + + +| copp-system-ipmcrsvd +| 10000 +| 1000 +| + + +| copp-system-igmp +| 10000 +| 1000 +| + + +| copp-system-ipmcmiss +| 10000 +| 1000 +| + + +| copp-system-glean +| 10000 +| 1000 +| + + +| copp-system-tc3to5 +| 10000 +| 1000 +| + + +| copp-system-arp +| 10000 +| 1000 +| + + +| copp-system-arpresolver +| 10000 +| 1000 +| + + +| copp-system-l3destmiss +| 10000 +| 1000 +| + + +| copp-system-l3slowpath +| 10000 +| 1000 +| + + +| copp-system-l3ttl1 +| 10000 +| 1000 +| + + +| copp-system-default +| 8000 +| 1000 +| + + +| copp-system-acllog +| 10000 +| 1000 +| + + +| copp-system-sflow +| 25000 +| 0 +| + + +Policy maps are modified in the ***policy-map*** configuration mode. The policy-map type +copp command enters the ***policy-map*** configuration +mode. + + +**Example** + + +This command enters the ***policy-map*** configuration mode for editing +**copp-system-policy**. +``` +`switch(config)#**policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + + +Dynamic classes are inserted in front of the static classes. Classes automatically +contain their eponymous class map; traffic resolution commands are created or edited +in the ***policy-map-class*** configuration mode. The class +(policy-map (control-plane) Trident) command adds a class to the policy +map and places the switch in the ***policy-map-class*** configuration mode, +where traffic resolution commands are added to the class. + + +**Example** + + +This command adds the **CP-CMAP_1** class to the +copp-system-policy policy map and places the switch in the +***policy-map-class*** configuration +mode. +``` +`switch(config-pmap-copp-system-policy)# **class CP-CMAP_1** +switch(config-pmap-c-copp-system-policy-CP-CMAP_1)#` +``` + + +Two traffic resolution commands determine bandwidth parameters for class traffic: + +- bandwidth (policy-map-class (control-plane) Trident) specifies +the minimum bandwidth. + +- shape (policy-map-class (control-plane) Trident) specifies the maximum bandwidth. + + +**Example** + + +These commands configure a bandwidth range of **2000** to +**4000** packets per seconds (pps) for traffic +filtered by the **CP-CMAP_1** class +map: +``` +`switch(config-pmap-c-copp-system-policy-CP-CMAP_1)# **bandwidth pps 2000** +switch(config-pmap-c-copp-system-policy-CP-CMAP_1)# **shape pps 4000** +switch(config-pmap-c-copp-system-policy-CP-CMAP_1)#` +``` + + +The ***policy-map*** and ***policy-map-class*** configuration modes +are group-change modes. Changes are saved with the **exit** +command or discarded with the **abort** command. The +**show active** command displays the saved version of +policy map. The **show pending** command displays the modified +policy map. + + +**Example** + + +These commands exits the ***policy-map-class*** configuration mode, display +the pending policy-map, then exits the ***policy-map*** configuration mode, +which saves the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-copp-system-policy-CP-CMAP_1)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class CP-CMAP_1 + shape pps 4000 + bandwidth pps 2000 + + class copp-system-bpdu + + class copp-system-lldp + + class copp-system-lacp + + class copp-system-arp + + class copp-system-arpresolver + + class copp-system-default + +switch(config-pmap-copp-system-policy)# **exit** +switch(config)#` +``` + + +To modify traffic resolution commands for a static class, enter the +***policy-map-class*** configuration mode for the class, then enter +the desired **bandwidth** and **shape** +commands. + + +**Example** + + +These commands enters the ***policy-map-class*** configuration mode for +**copp-system-bpdu** class, change the bandwidth range +for the class, then save the altered policy map to +***running-config***. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-bpdu** +switch(config-pmap-c-copp-system-policy-copp-system-bpdu)# **shape pps 200** +switch(config-pmap-c-copp-system-policy-copp-system-bpdu)# **bandwidth pps 100** +switch(config-pmap-c-copp-system-policy-copp-system-bpdu)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class CP-CMAP_1 + shape pps 4000 + bandwidth pps 2000 + + class copp-system-bpdu + shape pps 200 + bandwidth pps 100 + + class copp-system-lldp + +switch(config-pmap-copp-system-policy)# **exit** +switch(config)#` +``` + + +#### Applying Policy Maps to the Control Plane + + +The **copp-system-policy** policy map is always applied to the control +plane. No commands are available to add or remove this assignment. + + +### Configuring QoS Traffic Policies + Trident Platform Switches + + +QoS traffic policies are implemented +by creating class maps and policy maps, then applying the policy maps +to Ethernet and port channel interfaces. + + +#### Creating Class Maps + + +QoS traffic policies utilize dynamic class maps that are created and modified in the +***class-map*** configuration mode. The class-map type qos command enters the ***class-map*** +configuration mode. + + +**Example** + + +This command enters the ***class-map*** configuration mode to create +QoS class map named +**Q-CMap_1**. +``` +`switch(config)# **class-map type qos match-any Q-CMap_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +A class map contains one IPv4 or IPv6 Access Control List (ACL). The match (class-map (qos) Trident) +command assigns an ACL to the class map. Subsequent +**match** commands replace the existing +**match** command. Class maps filter +traffic only on ACL permit rules. Deny ACL rules are disregarded. + + +**Example** + + +This command adds the IPv6 ACL named **ACLv6_1** to the +class +map. +``` +`switch(config-cmap-Q-CMap_1)# **match ipv6 access-group ACLv6_1** +switch(config-cmap-Q-CMap_1)#` +``` + + +The ***class-map*** configuration mode is a group-change mode. Changes +made in a group-change mode are saved by exiting the mode. The +**show active** command displays the saved +version of class map. The **show pending** command +displays the unsaved class map. + + +**Example** + + +The **show active** command indicates that the +configuration mode class map is not stored in ***running-config***. +The **show pending** command displays the class map to +be stored upon exiting the ***class-map*** configuration +mode. +``` +`switch(config-cmap-Q-CMap_1)# **show active** +switch(config-cmap-Q-CMap_1)# **show pending** +class-map type qos match-any Q-CMap_1 + match ipv6 access-group ACLv6_1 + +switch(config-cmap-Q-CMap_1)#` +``` + + +The **exit** command returns the switch to +***global*** configuration mode and saves pending class +map changes. The **abort** command returns the switch +to ***global*** configuration mode and discards pending class map +changes. + + +**Example** + + +This command exits the ***class-map*** configuration mode and stores +pending changes to +***running-config***. +``` +`switch(config-cmap-CP-CMAP_1)# **exit** +switch(config)# **show class-map type control-plane CP-CMAP_1** + Class-map: CP-CMAP_1 (match-any) + Match: ip access-group name ACLv4_1 +switch(config)#` +``` + + +#### Creating Policy Maps + + +Policy maps are created and modified in the ***policy-map*** configuration mode. The +policy-map type +quality-of-service command enters the ***policy-map*** +configuration mode. + + +**Example** + + +This command enters the ***policy-map*** configuration mode for +creating a QoS policy map named +**Q-PMAP_1**. +``` +`switch(config)# **policy-map type quality-of-service Q-PMAP_1** +switch(config-pmap-Q-PMAP_1)#` +``` + + +Policy maps are edited by adding or removing classes. A class automatically +contains its eponymous class map; traffic resolution commands are added or +edited in the ***policy-map-class*** configuration mode. The class (policy-map (qos) Trident) +command adds a class to the configuration mode policy map and places the +switch in the ***policy-map-class*** configuration mode, where +traffic resolution commands are added to the class. + + +**Example** + + +This command adds the **Q-CMap_1** class to the +**Q-PMAP_1** policy map and places the +switch in the ***policy-map-class*** configuration +mode. +``` +`switch(config-pmap-Q-PMAP_1)# **class Q-CMap_1** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +The set (policy-map-class (qos) Trident) +command configures traffic resolution methods for data that passes the class +map: + +- **set cos** sets the layer 2 CoS +field. + +- **set dscp** sets the DSCP value in the ToS +byte. + +- **set traffic class** specifies a traffic +class queue. + + +**Example** + + +These commands configure the policy map to set **CoS field +7** on packets filtered by the class map, then +assigns those packets to **traffic class +4**. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set cos 7** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **set traffic-class 4** +switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)#` +``` + + +The ***policy-map*** and ***policy-map-class*** configuration +modes are group-change modes. Changes are saved with the **exit** command +or discarded with the **abort** command. The +**show active** and **show +pending** commands display the saved and modified +policy map versions, respectively. + + +**Example** + + +These commands exit the ***policy-map-class*** configuration mode, +display the pending policy-map, then exits the ***policy-map*** +configuration mode to save the altered policy map to +***running-config***. +``` +`switch(config-pmap-c-Q-PMAP_1-Q-CMap_1)# **exit** +switch(config-pmap-Q-PMAP_1)# **show pending** +policy-map type quality-of-service Q-PMAP_1 + class Q-CMap_1 + set cos 7 + set traffic-class 4 + + class class-default + +switch(config-pmap-Q-PMAP_1)# **exit** +switch(config)#` +``` + + +The last class in all QoS policy maps is ***class-default***. The +***class-default*** class map matches all traffic except +IPv4 or IPv6 traffic and provides no traffic resolution commands. The +***class-default*** class map is not editable; traffic +resolution commands can be added to the ***class-default*** +class. + + +To modify traffic resolution commands for the ***class-default*** +class, enter the ***policy-map-class*** configuration mode for the +class, then enter the desired **set** commands. + + +**Example** + + +These commands enters the ***policy-map-class*** configuration mode for +***class-default***, configures the stream to enter +**traffic class 2**, and saves the altered +policy map to +***running-config***. +``` +`switch(config)# **policy-map type quality-of-service Q-PMap_1** +switch(config-pmap-Q-PMap_1)# **class class-default** +switch(config-pmap-c-Q-PMap_1-class-default)# **set traffic-class 2** +switch(config-pmap-c-Q-PMap_1-class-default)# **exit** +switch(config-pmap-Q-PMap_1)# **exit** +switch(config)# **show policy-map type qos Q-PMap_1** +Service-policy Q-PMap_1 + + Class-map: Q-CMap_1 (match-any) + Match: ipv6 access-group name ACLv6_1 + set cos 7 + set traffic-class 4 + + Class-map: class-default (match-any) + set traffic-class 2 + +switch(config)#` +``` + + +#### Applying Policy Maps to an Interface + + +The service-policy +type qos (Interface mode) command applies a specified policy map to the +configuration mode interface. + + +**Example** + + +These commands apply **PMAP-1** policy map to +**interface ethernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **show active** +switch(config-if-Et8)# **service-policy input PMAP-1** +switch(config-if-Et8)# **show active** +interface Ethernet8 + service-policy type qos input PMAP-1 +switch(config-if-Et8)#` +``` + + +### Configuring PBR Policies +Trident Platform Switches + + +Policy-Based Routing (PBR) is implemented by creating class maps and policy maps, then applying + the policy maps to Ethernet interfaces, port channel interfaces or Switch Virtual + Interfaces (SVIs). + + +#### Creating PBR Class Maps + + +PBR policies utilize class maps that are created and modified in the ***class-map*** +configuration mode. The class-map type +pbr command enters the ***class-map*** configuration +mode. + + + + +**Example** + + + This command enters the ***class-map*** configuration mode to create a PBR class +map named +**CMAP1**. +``` +`switch(config)# **class-map type pbr match-any CMAP1** +switch(config-cmap-PBR-CMAP1)#` +``` + + + + +A class map contains one or more Access Control Lists (ACLs). The match (policy-map + (pbr)) command assigns an ACL to the class map. Subsequent +**match** commands add additional ACLs to the class map. +Class maps filter traffic only on ACL permit rules. Deny ACL rules are disregarded; if a +class map includes ACLs with deny rules, the configuration reverts to its previous +state. + + + + +**Examples** + + + + +- This command adds the ACL named **ACL1** to the + class +map. +``` +`switch(config-cmap-PBR-CMAP1)# **match ip access-group ACL1** +switch(config-cmap-PBR-CMAP1)#` +``` + + +The ***class-map*** +configuration mode is a group-change mode. Changes made in a group-change mode are +saved by exiting the mode. The **show active** command +displays the saved version of class map. + +- The **show active** command indicates that the configuration + mode class map is not stored in ***running-config***. + +``` +`switch(config-cmap-PBR-CMAP1)# **show active** +switch(config-cmap-PBR-CMAP1)#` +``` + +- The **exit** command returns the switch to +***global*** configuration mode and saves pending class map changes. The + **abort** command returns the switch to +***global*** configuration mode and discards pending changes. + +- This command exits the ***class-map*** configuration mode and stores pending + changes to + ***running-config***. +``` +`switch(config-cmap-PBR-CMAP1)# **exit** +switch(config)# **show class-map type pbr CMAP1** +class-map type pbr match-any CMAP1 + 10 match ip access-group ACL1 +switch(config)#` +``` + + + + +#### Creating PBR Policy Maps + + +Policy maps are created and modified in the ***policy-map*** configuration mode. The + policy-map type + pbr command enters policy-map configuration mode. + + + + +**Examples** + + + + +- This command enters the ***policy-map*** configuration mode for creating a +PBR policy map named +**PMAP1**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)#` +``` + + + + +Policy map are edited by adding or removing classes. A class automatically +contains its eponymous class map; next-hop commands are added or edited in the +***policy-map-class*** configuration mode. The class (policy-map +(pbr)) command adds a class to the configuration mode +policy map and places the switch in the ***policy-map-class*** +configuration mode, where next-hop commands are added to the class. + +- This command adds the **CMAP1** class to the policy map and + places the switch in the ***policy-map-class*** configuration + mode. +``` +`switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + +- The set nexthop (policy-map-class pbr) command + configures the next hop for data that passes the class map.This command configures + the policy map to set the next hop to **10.12.0.5** on packets + filtered by the class + map. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop 10.12.0.5** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + +- The ***policy-map*** and ***policy-map-class*** configuration modes + are group-change modes. Changes are saved with the **exit** + command or discarded with the **abort** command. The + **show active** command displays the currently saved map + version. These commands exits the ***policy-map-class*** configuration mode, + then exits the ***policy-map*** configuration mode to save the altered policy + map to + ***running-config***. +``` +`switch(config-pmap-c-PMAP1-CMAP1)# **exit** +switch(config-pmap-PMAP1)# **exit** +switch(config)#` +``` + + + + +#### Applying a PBR Policy Map to an Interface + + +The service-policy type pbr + (Interface mode) command applies the specified PBR policy map to the +configuration mode interface. Only one PBR service policy is supported per interface. + + - These commands apply the **PMAP1** PBR policy map to +**interface ethernet +8**. +``` +`switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **service-policy type pbr input PMAP1** +switch(config-if-Et8)#` +``` + + +#### Hardware Decapsulation + + +When hardware decapsulation takes place, +PBR policy maps on Trident platform switches match on inner packet headers +(i.e., they match based on the attributes of the decapsulated packet). + + +## Traffic Management Configuration + Trident II Platform Switches + + +Traffic policies are implemented by policy maps, which are applied to the control plane or an + interface. Policy maps contain classes, which are composed of class maps and traffic + resolution commands. Traffic +Management Conceptual Overview describes traffic policies. + + +Trident platform switches support the following traffic policies: + +- Control plane policies manage control plane traffic. + +- QoS traffic policies manage traffic on Ethernet and port channel interfaces. + + + + +### Configuring Control Plane Traffic +PoliciesTrident II Platform Switches + + +Default control plane traffic +policies are implemented automatically without user intervention. These +policies are modified by associating traffic resolution commands with +static classes that comprise the control plane policy map. + + +#### Static Class Maps + + +Control plane traffic policies utilize +static class maps, which are provided by the switch, are not editable, +and cannot be deleted. + + +#### Editing the Policy Map + + +The only control plane policy map is **copp-system-policy**, which cannot +be deleted. In its default form, **copp-system-policy** +consists of the classes listed in copp-system-policy default classes: Trident II Platform Switches. +Although the underlying class map of each class cannot be edited, the traffic +resolution commands can be adjusted. The default classes cannot be removed from the +policy map and their sequence within the policy map is not editable. + + +Table 6. copp-system-policy default classes: Trident II Platform Switches + +| Class Name +| shape (pps) +| bandwidth (pps) +| + + +| copp-system-acllog +| 1000 +| 10000 +| + + +| copp-system-arp +| 1000 +| 10000 +| + + +| copp-system-arpresolver +| 1000 +| 10000 +| + + +| copp-system-bfd +| 5000 +| 10000 +| + + +| copp-system-bgp +| 5000 +| 5000 +| + + +| copp-system-bpdu +| 5000 +| 5000 +| + + +| copp-system-default +| 1000 +| 8000 +| + + +| copp-system-glean +| 1000 +| 10000 +| + + +| copp-system-igmp +| 1000 +| 10000 +| + + +| copp-system-ipmcmiss +| 1000 +| 10000 +| + + +| copp-system-ipmcrsvd +| 1000 +| 10000 +| + + +| copp-system-l3destmiss +| 1000 +| 10000 +| + + +| copp-system-l3slowpath +| 1000 +| 10000 +| + + +| copp-system-l3ttl1 +| 1000 +| 10000 +| + + +| copp-system-lacp +| 5000 +| 5000 +| + + +| copp-system-lldp +| 1000 +| 10000 +| + + +| copp-system-mlag +| 5000 +| 5000 +| + + +| copp-system-selfip +| 5000 +| 5000 +| + + +| copp-system-selfip-tc6to7 +| 5000 +| 5000 +| + + +| copp-system-sflow +| 0 +| 25024 +| + + +| copp-system-tc3to5 +| 1000 +| 10000 +| + + +| copp-system-tc6to7 +| 1000 +| 10000 +| + + +| copp-system-urm +| 1000 +| 10000 +| + + +Policy maps are modified in the ***policy-map*** configuration mode. The policy-map type copp command enters +the ***policy-map*** configuration mode. + + +**Examples** + +- This command enters the ***policy-map*** configuration mode for +editing +**copp-system-policy**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + +- The class (policy-map (control-plane) Trident II) command enters the +***policy-map-class*** configuration mode, where traffic +resolution commands are modified for the configuration mode class. + +- This command enters the ***policy-map-class*** configuration mode +for the **copp-system-lacp static** +class. +``` +`switch(config-pmap-copp-system-policy)# **class copp-system-lacp** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)#` +``` + + +Two traffic resolution commands determine bandwidth parameters for class traffic: + +- bandwidth (policy-map-class (control-plane) Trident II) +specifies the minimum bandwidth. + +- shape (policy-map-class (control-plane) Trident II) specifies the maximum bandwidth. + + +**Examples** + +- These commands configure a bandwidth range of **2000** +to **4000** packets per seconds (pps) for traffic +filtered by the **copp-system-lacp** class +map: +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **bandwidth pps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **shape pps 4000** +switch(config-pmap-c-copp-system-policy-copp-system-lacp)#` +``` + +- The ***policy-map*** and ***policy-map-class*** +configuration modes are group-change modes. Changes are saved with the +**exit** command or discarded with the +**abort** command. The **show +active** command displays the saved version of policy +map. The **show pending** command displays the +modified policy map. + +- These commands exits the ***policy-map-class*** configuration mode, +display the pending ***policy-map***, then exit +***policy-map*** configuration mode, which saves the altered +policy map to +***running-config***. +``` +`switch(config-pmap-c-copp-system-policy-copp-system-lacp)# **exit** +switch(config-pmap-copp-system-policy)# **show pending** +policy-map type copp copp-system-policy + class copp-system-bpdu + + class copp-system-lldp + + class copp-system-lacp + shape pps 4000 + bandwidth pps 2000 + + class copp-system-arp + +switch(config-pmap-copp-system-policy)# **exit** +switch(config)#` +``` + + +#### Applying Policy Maps to the Control Plane + + +The **copp-system-policy** policy map is always applied to the control +plane. No commands are available to add or remove this assignment. + + +## Traffic Management Configuration Commands + + +### Traffic Policy (Control Plane) Configuration Commands + + +- bandwidth (policy-map-class (control-plane) +Arad) + +- bandwidth (policy-map-class (control-plane) +FM6000) + +- bandwidth (policy-map-class (control-plane) +Helix) + +- bandwidth (policy-map-class (control-plane) +Petra) + +- bandwidth (policy-map-class (control-plane) +Trident) + +- bandwidth (policy-map-class (control-plane) Trident +II) + +- class-map type +copp + +- class (policy-map (control-plane) Arad) + +- class (policy-map (control-plane) +FM6000) + +- class (policy-map (control-plane) +Helix) + +- class (policy-map (control-plane) +Petra) + +- class (policy-map (control-plane) +Trident) + +- class (policy-map (control-plane) Trident +II) + +- match (class-map (control-plane) Helix) + +- match (class-map (control-plane) +Trident) + +- match (class-map (control-plane) Trident +II) + +- policy-map type +copp + +- shape (policy-map-class (control-plane) +Arad) + +- shape (policy-map-class (control-plane) +FM6000) + +- shape (policy-map-class (control-plane) +Helix) + +- shape (policy-map-class (control-plane) +Petra) + +- shape (policy-map-class (control-plane) +Trident) + +- shape (policy-map-class (control-plane) Trident +II) + + +### Traffic Policy (PBR) Configuration Commands + + +- action set-ttl + +- class (policy-map +(pbr)) + +- class-map type +pbr + +- feature pbr + +- match (class-map +(pbr)) + +- match (policy-map +(pbr)) + +- platform arad tcam counters feature + +- policy-map type +pbr + +- resequence +(class-map (pbr)) + +- resequence +(policy-map (pbr)) + +- service-policy type pbr (Interface +mode) + +- set nexthop (policy-map-class pbr) + +- set nexthop-group (policy-map-class(pbr) +Arad) + + +### CPU Traffic Policy Command + + +- feature traffic-policy cpu + +- feature traffic-policy port + + +### Traffic Policy (QoS) Configuration Commands + + +- class-map type +qos + +- class (policy-map (qos) FM6000) + +- class (policy-map (qos) Helix) + +- class (policy-map (qos) Trident) + +- class (policy-map (qos) Trident II) + +- match (class-map (qos) FM6000) + +- match (class-map (qos) Helix) + +- match (class-map (qos) Trident) + +- match (class-map (qos) Trident II) + +- policy-map type quality-of-service + +- policy-map type quality-of-service policer + +- service-policy type qos (Interface +mode) + +- set (policy-map-class (qos) FM6000) + +- set (policy-map-class (qos) Helix) + +- set (policy-map-class (qos) Trident) + +- set (policy-map-class (qos) Trident II) + + +### Traffic Policy Display and Utility Commands + + +- clear policy-map +counters + +- show +class-map type control-plane + +- show class-map +type pbr + +- show class-map +type qos + +- show policy-map +type copp + +- show policy-map +type pbr + +- show policy-map +type qos + +- show +policy-map type qos counters + +- show policy-map +copp + +- show +policy-map interface type qos + +- show policy-map interface type qos +counters + +- show traffic-policy + + +### action set-ttl + + +The TTL action is effective only when it is configured along with a set nexthop or +nexthop-group action. The TCAM profile has the set-ttl-3b or set-ttl action in the pbr +ip and pbr ipv6 features, such as in the tc-counters system profile. + + +**Command Mode** + + +For IP + + +TCAM feature PBR IP configuration mode. + + +For IPv6 + + +TCAM feature PBR IPv6 configuration mode. + + +**Command Syntax** + + +action set-time [set-ttl | +set-ttl-3b] + + +no action set-time [set-ttl | +set-ttl-3b] + + +default action set-time [set-ttl | +set-ttl-3b] + + +**Parameters** + + +- **set-ttl**Set time to live. + +- **set-ttl-3b** Set 3-bit time to live. + + +**Examples** + + +- In the following example, for IP, the action sets the time to live for the next +hop. + + +``` +`(config)# hardware tcam +(config-tcam)# profile pbr-set-ttl copy default +(config-tcam-profile-pbr-set-ttl)# feature pbr ip +(config-tcam-feature-pbr-ip)# action set-ttl` +``` + +- In the following example, for IPv6, the action sets the time to live for the next +hop group. + + +``` +`config)# hardware tcam +(config-tcam)# profile pbr-set-ttl copy default +(config-tcam-profile-pbr-set-ttl)# feature pbr ip +(config-tcam-feature-pbr-ip)# feature pbr ipv6 +(config-tcam-feature-pbr-ipv6)# action set-ttl` +``` + + +### bandwidth (policy-map-class +(control-plane)Arad) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) +Arad)** + + +**Command Syntax** + + +bandwidth kbps +kilobits + + +no bandwidth + + +default bandwidth + + +**Parameters** + + +**kilobits** Minimum data rate in kilobits per second. Value +ranges from **1** to **10000000**. + + +**Related Commands** + +- class (policy-map (control-plane) Arad) places the switch in the +***policy-map-class*** (control plane) configuration +mode. + +- shape (policy-map-class (control-plane) Arad) specifies the +maximum bandwidth for traffic defined by the associated class map in its +configuration mode policy map class. + + +**Static Classes Default Bandwidth** + + +Arad platform switches define these default bandwidths for control plane static +classes: + +- copp-system-bgp 250 copp-system-l3lpmoverflow 250 + +- copp-system-bpdu 1250 copp-system-l3slowpath 250 + +- copp-system-default 250 copp-system-l3ttl1 250 + +- copp-system-ipbroadcast 250 copp-system-lacp 1250 + +- copp-system-ipmc 250 copp-system-linklocal 250 + +- copp-system-ipmcmiss 250 copp-system-lldp 250 + +- copp-system-ipunicast 250 copp-system-mlag 250 + +- copp-system-l2broadcast 250 copp-system-multicastsnoop 250 + +- copp-system-l2unicast 250 copp-system-OspfIsis 250 + +- copp-system-l3destmiss 250 copp-system-sflow 250 + + +**Example** + + +These commands configure the minimum bandwidth of **500** kbps +for data traffic specified by the class map +**copp-system-lldp** of the default +***control-plane*** policy map. + +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **bandwidth kbps 500** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + Hardware programming status: InProgress + + Class-map: copp-system-lldp (match-any) + shape : 2500 kbps + bandwidth : 500 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)#` +``` + + +### bandwidth (policy-map-class +(control-plane)FM6000) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) +FM6000)** + + +**Command Syntax** + + +bandwidth pps +packets + + +no bandwidth + + +default bandwidth + + +**Parameters** + + +**packets** Minimum data rate in packets per second. Value +ranges from **1** to **100000**. + + +**Related Commands** + +- class (policy-map (control-plane) FM6000) places the switch in +***policy-map-class*** (control plane) configuration +mode. + +- shape (policy-map-class (control-plane) FM6000) specifies the +maximum bandwidth for traffic defined by the associated class map in its +configuration mode policy map class. + + +**Static Classes Default Bandwidth** + + +FM6000 platform switches define these default bandwidths for control plane static +classes: + +- copp-system-arp 1000 copp-system-l3slowpath 1000 + +- copp-system-default 1000 copp-system-pim-ptp 1000 + +- copp-system-ipmcrsvd 1000 copp-system-ospf-isis 1000 + +- copp-system-ipmcmiss 1000 copp-system-selfip 5000 + +- copp-system-igmp 1000 copp-system-selfip-tc6to7 5000 + +- copp-system-l2rsvd 10000 copp-system-sflow 1000 + + +**Example** + + +These commands configure the minimum bandwidth of **1000** +packets per second for data traffic specified by the class map +**PMAP-1** in the policy map named +**copp-system-policy**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class PMAP-1** +switch(config-pmap-c-copp-system-policy-PMAP-1)# **bandwidth pps 1000** +switch(config-pmap-c-copp-system-policy-PMAP-1)#` +``` + + +### bandwidth (policy-map-class +(control-plane)Helix) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) +Helix)** + + +**Command Syntax** + + +bandwidth pps +packets + + +no bandwidth + + +default bandwidth + + +**Parameter** + + +**packets** Minimum data rate in packets per second. Value +ranges from **1** to **100000**. + + +**Related Commands** + +- class (policy-map (control-plane) Helix) places the switch in +***policy-map-class*** (control plane) configuration +mode. + +- shape (policy-map-class (control-plane) Helix) specifies the +maximum bandwidth for traffic defined by the associated class map in its +configuration mode policy map class. + + +**Static Classes Default Bandwidth** + + +Helix platform switches define these default bandwidths for control plane static +classes: + +- copp-system-acllog 1000 copp-system-l3ttl1 1000 + +- copp-system-arp 1000 copp-system-lacp 5000 + +- copp-system-arpresolver 1000 copp-system-lldp 1000 + +- copp-system-bfd 5000 copp-system-mlag 5000 + +- copp-system-bgp 5000 copp-system-OspfIsis 5000 + +- copp-system-bpdu 5000 copp-system-selfip 5000 + +- copp-system-default 1000 copp-system-selfip-tc6to7 5000 + +- copp-system-glean 1000 copp-system-sflow 0 + +- copp-system-igmp 1000 copp-system-tc3to5 1000 + +- copp-system-ipmcmiss 1000 copp-system-tc6to7 1000 + +- copp-system-ipmcrsvd 1000 copp-system-urm 1000 + +- copp-system-l3destmiss 1000 copp-system-vrrp 1000 + +- copp-system-l3slowpath 1000 + + +**Example** + + +These commands configure the minimum bandwidth of **500** +packets per second for data traffic specified by the class map +**copp-system-lldp**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **bandwidth pps 500** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map interface control-plan copp-system-policy** +Service-policy input: copp-system-policy + Number of units programmed: 4 + Hardware programming status: Successful + + Class-map: copp-system-lldp (match-any) + shape : 10000 pps + bandwidth : 500 pps + Out Packets : 304996 + Drop Packets : 0 + +switch(config)#` +``` + + +### bandwidth (policy-map-class +(control-plane)Petra) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) +Petra)** + + +**Command Syntax** + + +bandwidth kbps +kilobits + + +no bandwidth + + +default bandwidth + + +**Parameter** + + +**kbits** Minimum data rate in kilobits per second. Value +ranges from **1** to **10000000**. + + +**Related Commands** + +- class (policy-map (control-plane) Petra) places the switch in +***policy-map-class*** (control plane) configuration +mode. + +- shape (policy-map-class (control-plane) Petra) specifies the +maximum bandwidth for traffic defined by the associated class map in its +***policy map class*** configuration mode . + + +**Static Classes Default Bandwidth** + + +Petra platform switches define these default bandwidths for control plane static +classes: + +- copp-system-bpdu 1250 copp-system-l3destmiss 250 + +- copp-system-default 250 copp-system-l3slowpath 250 + +- copp-system-igmp 250 copp-system-l3ttl0 250 + +- copp-system-ipbroadcast 250 copp-system-l3ttl1 250 + +- copp-system-ipmc 250 copp-system-lacp 1250 + +- copp-system-ipmcmiss 250 copp-system-lldp 250 + +- copp-system-ipmcrsvd 250 copp-system-unicast-arp 250 + +- copp-system-ipunicast 250 + + +**Guidelines** + + +Petra does not support all discrete rate values. When a specified discrete value is not supported, the switch converts the rate to the next highest discrete value that it supports. The **show** command displays the converted rate and not the user-configured rate. + + +**Example** + + +These commands configure a minimum bandwidth of **500** kbps +for data traffic specified by the class map ***copp-system-lldp*** of the +default ***control-plane*** policy map. Because the switch does not support +the discrete value of **500** kbps, it converts the bandwidth +up to **651** +kbps. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **bandwidth kbps 500** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + Hardware programming status: InProgress + + Class-map: copp-system-lldp (match-any) + shape : 2766 kbps + bandwidth : 651 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)#` +``` + + +### bandwidth (policy-map-class +(control-plane)Trident II) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) Trident +II)**. + + +**Command Syntax** + + +bandwidth pps +packets + + +no bandwidth + + +default bandwidth + + +**Parameter** + + +**packets** Minimum data rate in packets per second. Value +ranges from 1 to **100000**. + + +**Related Commands** + +- class (policy-map (control-plane) Trident II) places the switch +in ***policy-map-class*** (control plane) configuration mode. + +- shape (policy-map-class (control-plane) Trident II) specifies +the maximum bandwidth for traffic defined by the associated class map in its +configuration mode policy map class. + + +**Static Classes Default Bandwidth** + + +Trident II platform switches define these default bandwidths for control plane static +classes: + +- copp-system-acllog 1000 copp-system-l3slowpath 1000 + +- copp-system-arp 1000 copp-system-l3ttl1 1000 + +- copp-system-arpresolver 1000 copp-system-lacp 5000 + +- copp-system-bfd 5000 copp-system-lldp 1000 + +- copp-system-bgp 5000 copp-system-mlag 5000 + +- copp-system-bpdu 5000 copp-system-selfip 5000 + +- copp-system-default 1000 copp-system-selfip-tc6to7 5000 + +- copp-system-glean 1000 copp-system-sflow 0 + +- copp-system-igmp 1000 copp-system-tc3to5 1000 + +- copp-system-ipmcmiss 1000 copp-system-tc6to7 1000 + +- copp-system-ipmcrsvd 1000 copp-system-urm 1000 + +- copp-system-l3destmiss 1000 + + +**Example** + + +These commands configure the minimum bandwidth of **500** +packets per second for data traffic specified by the class map +**copp-system-lldp**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **bandwidth pps 500** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map interface control-plan copp-system-policy** +Service-policy input: copp-system-policy + Number of units programmed: 4 + Hardware programming status: Successful + + Class-map: copp-system-lldp (match-any) + shape : 10000 pps + bandwidth : 500 pps + Out Packets : 304996 + Drop Packets : 0 + +switch(config)#` +``` + + +### bandwidth (policy-map-class +(control-plane)Trident) + + +The **bandwidth** command specifies the minimum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no bandwidth** and **default +bandwidth** commands remove the minimum bandwidth guarantee for +the configuration mode class by deleting the corresponding +**bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration + + +accessed through **class (policy-map (control-plane) +Trident)**. + + +**Command Syntax** + + +bandwidth pps +packets + + +no bandwidth + + +default bandwidth + + +**Parameter** + + +**packets** Minimum data rate in packets per second. Value +ranges from **1** to **100000**. + + +**Related Commands** + +- class (policy-map (control-plane) Trident) places the switch in +***policy-map-class*** (control plane) configuration +mode. + +- shape (policy-map-class (control-plane) Trident) specifies the +maximum bandwidth for traffic defined by the associated class map in its +configuration mode policy map class. + + +**Static Classes Default Bandwidth** + + +Trident platform switches define these default bandwidths for control plane static +classes: + +- copp-system-arp 1000 copp-system-lldp 1000 + +- copp-system-arpresolver 1000 copp-system-l3destmiss 1000 + +- copp-system-bpdu 5000 copp-system-l3slowpath 1000 + +- copp-system-default 1000 copp-system-l3ttl1 1000 + +- copp-system-glean 1000 copp-system-selfip 5000 + +- copp-system-igmp 1000 copp-system-selfip-tc6to7 5000 + +- copp-system-ipmcmiss 1000 copp-system-sflow 0 + +- copp-system-ipmcrsvd 1000 copp-system-tc6to7 1000 + +- copp-system-lacp 5000 copp-system-tc3to5 1000 + + +**Example** + + +These commands configure the minimum bandwidth of **1000** +packets per second for data traffic specified by the class map +**PMAP-1** in the policy map named +**copp-system-policy**. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class PMAP-1** +switch(config-pmap-c-copp-system-policy-PMAP-1)# **bandwidth pps 1000** +switch(config-pmap-c-copp-system-policy-PMAP-1)#` +``` + + +### class (policy-map (control-plane) + Arad) + + +The **class** command places the switch in policy-map-class +(control plane) configuration mode, which is a group change mode for changing +bandwidth and shape parameters associated with a specified class. All changes in a +group change mode edit session are pending until the end of the session. + +A +policy map is an ordered list of classes. The control plane policy map contains +**20** static classes. Each class contains an +eponymous class map and may contain **bandwidth** and +**shape** commands. + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams minimum +transmission rate through the control plane. + +- **shape** command defines the streams maximum +transmission rate through the control plane. + + +Static class maps identify a data stream by definition. Each data packet +is managed by commands of the first class whose map matches the packets content. +Dynamic classes are not supported for control plane policing on Arad platform +switches. + + +Each class corresponds to a transmission queue. Queue scheduling is +round-robin until ***bandwidth*** rate for a queue is exceeded. Scheduling +becomes strict-priority with CPU queue number determining priority until the +***shape*** rate is reached. Packets are dropped after the shape +rate is exceeded. + + +The **exit** command returns the +switch to policy-map configuration mode. Saving policy-map-class changes also +require an exit from policy-map mode, which saves pending policy-map-class and +policy-map changes to ***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and +**default class** commands remove +**policy-map-class** commands for the specified class +assignment from the policy map. + + +**Command Mode** + + +Policy-Map (control plane) configuration accessed through **policy-map type +copp** command. + + +**Command +Syntax** + + +class +class_name + + +no class +class_name + + +default class +class_name + + +**Parameter** + +**class_name** name of the class. +**Static Classes** + +Arad platform switches provide the following static control plane classes: + +- copp-system-bgp copp-system-l2broadcast copp-system-linklocal + +- copp-system-bpdu copp-system-l2unicast copp-system-lldp + +- copp-system-default copp-system-l3destmiss copp-system-mlag + +- copp-system-ipbroadcast copp-system-l3lpmoverflow +copp-system-multicastsnoop + +- copp-system-ipmc copp-system-l3slowpath copp-system-OspfIsis + +- copp-system-ipmcmiss copp-system-l3ttl1 copp-system-sflow + +- copp-system-ipunicast copp-system-lacp + + +**Commands Available in Policy-map-class (control plane) Configuration +Mode** + +- bandwidth (policy-map-class (control-plane) Arad) + +- shape (policy-map-class (control-plane) Arad) + +- **exit** saves pending class map changes, then returns +the switch to global configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to global configuration mode. + + +**Related Commands** + +policy-map type copp places switch +in ***policy-map*** (control plane) configuration +mode. +**Example** + +These commands enters ***policy-map-class*** +configuration mode to modify the shape, bandwidth parameters associated with the static +class named ***copp-system-lldp***. + +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +### class (policy-map (control-plane) + FM6000) + + +The **class** command places the switch in +***policy-map-class*** (control plane) configuration mode, which is +a group change mode for changing bandwidth and shape parameters associated with a +specified class. All changes in a group change mode edit session are pending until +the end of the session. + +A policy map is an ordered list of classes. The +control plane policy map contains **12** static classes. Each +class contains an eponymous class map and may contain +**bandwidth** and **shape** +commands. + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams minimum +transmission rate through the control plane. + +- **shape** command defines the streams maximum +transmission rate through the control plane. + + +Static class maps identify a data stream by definition. Each data packet +is managed by commands of the first class whose map matches the packets content. +Dynamic classes are not supported for control plane policing on FM6000 platform +switches. + + +Each class corresponds to a transmission queue. Queue scheduling is +round-robin until ***bandwidth*** rate for a queue is exceeded. Scheduling +becomes strict-priority with CPU queue number determining priority until the +***shape*** rate is reached. Packets are dropped after the shape +rate is exceeded. + + +The **exit** command returns the +switch to policy-map configuration mode. Saving policy-map-class changes also +require an exit from policy-map mode, which saves pending policy-map-class and +policy-map changes to ***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and +**default class** commands remove +**policy-map-class** commands for the specified class +assignment from the policy map. The class is removed from the policy map if it is a +dynamic class. + + +**Command Mode** + + +Policy-Map (control plane) configuration accessed through **policy-map type +copp** command. + + +**Command Syntax** + + +class +class_name + + +no class +class_name + + +default class +class_name + + +**Parameter** + +**class_name** name of the class. +**Static Classes** + +FM6000 platform switches provide the following static control plane classes: + +- copp-system-arp copp-system-igmp copp-system-PimPtp + +- copp-system-default copp-system-l2rsvd copp-system-selfip + +- copp-system-ipmcmiss copp-system-l3slowpath copp-system-selfip-tc6to7 + +- copp-system-ipmcrsvd copp-system-OspfIsis copp-system-sflow + + +**Commands Available in Policy-map-class (control plane) +Configuration Mode** + +- bandwidth (policy-map-class (control-plane) FM6000) + +- shape (policy-map-class (control-plane) FM6000) + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + + +**Related Commands** + +policy-map type copp places switch +in ***policy-map*** (control plane) configuration +mode. +**Example** + +These commands enters +***policy-map-class*** configuration mode to modify the shape, bandwidth +parameters associated with the static class named +**copp-system-arp**. + +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-arp** +switch(config-pmap-c-copp-system-policy-copp-system-arp)#` +``` + + +### class (policy-map (control-plane) + Helix) + + +The **class** command places the switch in +***policy-map-class*** (control plane) configuration mode, which is +a group change mode for changing bandwidth and shape parameters associated with a +specified class. All changes in a group change mode edit session are pending until +the end of the session. + +A policy map is an ordered list of classes. The +**control plane** policy map contains +23 static classes. Each class contains an eponymous class map +and may contain **bandwidth** and +**shape** commands. + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams minimum +transmission rate through the control plane. + +- **shape** command defines the streams maximum +transmission rate through the control plane. + + +Static class maps identify a data stream by definition. Each data packet +is managed by commands of the first class whose map matches the packets content. +Dynamic classes are not supported for control plane policing on Helix platform +switches. + + +Each class corresponds to a transmission queue. Queue scheduling is +strict-priority; CPU queue number determines priority until the ***shape*** +rate is reached. Packets are dropped after the shape rate is exceeded. + + +The +**exit** command returns the switch to +***policy-map*** configuration mode. Saving policy-map-class changes +also require an exit from ***policy-map*** mode, which saves the pending +***policy-map-class*** and ***policy-map*** changes to +***running-config*** and returns the switch to global configuration +mode. The **abort** command discards pending changes, +returning the switch to the ***global*** configuration mode. + + +The +**no class** and **default +class** commands remove the ***policy-map-class*** commands +for the specified class assignment from the policy map. + + +**Command +Mode** + + +Policy-Map (control plane) configuration accessed through **policy-map type +copp** command. + + +**Command +Syntax** + + +class +class_name + + +no class +class_name + + +default class +class_name + + +**Parameter** + +**class_name** name of the class. +**Static Classes** + +Helix platform switches provide the following static control plane classes: + +- copp-system-acllog copp-system-ipmcmiss copp-system-OspfIsis + +- copp-system-arp copp-system-ipmcrsvd copp-system-selfip + +- copp-system-arpresolver copp-system-l3destmiss +copp-system-selfip-tc6to7 + +- copp-system-bfd copp-system-l3slowpath copp-system-sflow + +- copp-system-bgp copp-system-l3ttl1 copp-system-tc3to5 + +- copp-system-bpdu copp-system-lacp copp-system-tc6to7 + +- copp-system-default copp-system-lldp copp-system-urm + +- copp-system-glean copp-system-lldp copp-system-vrrp + +- copp-system-igmp copp-system-lldp + + +**Commands Available in Policy-map-class (control plane) +Configuration Mode** + +- bandwidth (policy-map-class (control-plane) Helix) + +- shape (policy-map-class (control-plane) Helix) + +- **exit** saves pending class map changes, then returns +the switch to the *global* configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + + +**Related Commands** + +policy-map type copp places switch in ***policy-map*** (control +plane) configuration mode. +**Example** + +These commands enters ***policy-map-class*** configuration mode to modify +the shape, bandwidth parameters associated with the static class named +**copp-system-arp**. + +``` +`switch(config)# **policy-map** +switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +### class (policy-map (control-plane) + Petra) + + +The **class** command places the switch in policy-map-class +(control plane) configuration mode, which is a group change mode for changing +bandwidth and shape parameters associated with a specified class. All changes in a +group change mode edit session are pending until the end of the session. + + +A policy map is an ordered list of classes. The control plane policy map contains 15 +static classes. Each class contains an eponymous class map and may contain +**bandwidth** and **shape** +commands. + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams minimum +transmission rate through the control plane. + +- **shape** command defines the streams maximum +transmission rate through the control plane. + + +Static class maps identify a data stream by +definition. Each data packet is managed by commands of the first class +whose map matches the packets content. Dynamic classes are not supported +for control plane policing on Petra platform switches. + + +Each class corresponds to a transmission queue. Queue scheduling is round-robin until +***bandwidth*** rate for a queue is exceeded. Scheduling becomes +strict-priority with CPU queue number determining priority until the +***shape*** rate is reached. Packets are dropped after the shape rate is +exceeded. + + +The **exit** command returns the switch to +***policy-map*** configuration mode. Saving the +***policy-map-class*** changes also require an exit from +***policy-map*** mode, which saves the pending +***policy-map-class*** and ***policy-map*** changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the **policy-map-class** commands for the +specified class assignment from the policy map. + + +**Command Mode** + + +Policy-Map (control plane) configuration accessed through policy-map type copp command. + + +**Command Syntax** + + +class +class_name + + +no class +class_name + + +default class +class_name + + +**Parameter** + + +**class_name** name of the class. + + +**Static Classes** + + +Petra platform switches provide the following static control plane classes: + +- copp-system-bpdu copp-system-ipmcmiss copp-system-l3ttl0 + +- copp-system-default copp-system-ipmcrsvd copp-system-l3ttl1 + +- copp-system-igmp copp-system-ipunicast copp-system-lacp + +- copp-system-ipbroadcast copp-system-l3destmiss copp-system-lldp + +- copp-system-ipmc copp-system-l3slowpath copp-system-unicast-arp + + +**Commands Available in Policy-map-class (control plane) Configuration Mode** + +- bandwidth (policy-map-class (control-plane) Petra) + +- shape (policy-map-class (control-plane) Petra) + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + + +**Related Commands** + + +policy-map type copp places switch +in ***policy-map*** (control plane) configuration mode. + + +**Example** + + +These commands enters ***policy-map-class*** configuration mode to modify the +shape, bandwidth parameters associated with the static class named +**copp-system-lldp**. + +``` +`switch(config)# **policy-map** +switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +### class (policy-map (control-plane) + Trident II) + + +The **class** command places the switch in +***policy-map-clas***s (control plane) configuration mode, which is +a group change mode for changing bandwidth and shape parameters associated with a +specified class. All changes in a group change mode edit session are pending until +the end of the session. + + +A policy map is an ordered list of classes. The +control plane policy map contains **23** static classes. Each +class contains an eponymous class map and may contain +**bandwidth** and **shape** +commands. + + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams +minimum transmission rate through the control plane. + +- **shape** command defines the streams +maximum transmission rate through the control plane. + + +Static class maps identify a data stream by definition. Each data packet is +managed by commands of the first class whose map matches the packets content. +Dynamic classes are not supported for control plane policing on Trident II platform +switches. + + +Each class corresponds to a transmission queue. Queue scheduling is +strict-priority; CPU queue number determines priority until the ***shape*** +rate is reached. Packets are dropped after the shape rate is exceeded. + + +The +**exit** command returns the switch to the +***policy-map*** configuration mode. Saving the +***policy-map-class*** changes also require an exit from the +***policy-map*** mode, which saves the pending +***policy-map-class*** and ***policy-map*** changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and +**default class** commands remove the +***policy-map-class*** commands for the specified class assignment +from the policy map. + + +**Command Mode** + + +Policy-Map (control plane) +configuration accessed through **policy-map type copp** +command. + + +**Command Syntax** + + +class +class_name + + +no class +class_name + + +default class +class_name + + +**Parameter** + +**class_name** +name of the class. +**Static Classes** + +Trident II platform switches +provide the following static control plane classes: + +- copp-system-acllog copp-system-igmp copp-system-mlag + +- copp-system-arp copp-system-ipmcmiss copp-system-selfip + +- copp-system-arpresolver copp-system-ipmcrsvd copp-system-selfip-tc6to7 + +- copp-system-bfd copp-system-l3destmiss copp-system-sflow + +- copp-system-bgp copp-system-l3slowpath copp-system-tc3to5 + +- copp-system-bpdu copp-system-l3ttl1 copp-system-tc6to7 + +- copp-system-default copp-system-lacp copp-system-urm + +- copp-system-glean copp-system-lldp + + +**Commands Available in Policy-map-class (control plane) Configuration +Mode** + +- bandwidth (policy-map-class (control-plane) Trident II) + +- shape (policy-map-class (control-plane) Trident II) + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + + +**Related Commands** + +policy-map type copp places switch in ***policy-map (control +plane)*** configuration mode. +**Example** + +These commands enters +the ***policy-map-class*** configuration mode to modify the shape, bandwidth +parameters associated with the static class named +**copp-system-arp**. + +``` +`switch(config)# **policy-map** +switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)#` +``` + + +### class (policy-map (control-plane) + Trident) + + +The **class** command places the switch in +***policy-map-class*** (control plane) configuration mode, which is +a group change mode for changing bandwidth and shape parameters associated with a +specified class. The command adds the specified class to the policy map if it was +not previously included. All changes in a group change mode edit session are pending +until the end of the session. + + +A policy map is an ordered list of classes. The control plane policy map contains 18 +static classes and up to 30 dynamic classes. Dynamic classes +contain an eponymous class map. All classes may contain +**bandwidth** and **shape** +commands. + +- The class map identifies a data stream. + +- **bandwidth** command defines the streams minimum +transmission rate through the control plane. + +- **shape** command defines the streams maximum +transmission rate through the control plane. + + +Dynamic class maps identify a data stream with an ACL assigned by **match (class-map (control-plane) +Trident)**. Static class maps identify a data stream by +definition. Each data packet is managed by commands of the first class whose map +matches the packets content. + + +Static classes are provided with the switch and cannot be removed from the policy map +or modified by the **class** command. Dynamic classes are user +defined and added to the policy map by this command. Dynamic classes are always +placed in front of the static classes. Bandwidth and shape parameters are editable +for all classes. + + +Each class corresponds to a transmission queue. Queue scheduling is round-robin until +***bandwidth*** rate for a queue is exceeded. Scheduling becomes +strict-priority with CPU queue number determining priority until the +***shape*** rate is reached. Packets are dropped after the shape rate is +exceeded. + + +The **exit** command returns the switch to policy-map +configuration mode. Saving the ***policy-map-class*** changes also require +an exit from ***policy-map*** mode, which saves the pending +***policy-map-class*** and ***policy-map*** changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the ***policy-map-class*** commands for the specified class +assignment from the policy map. The class is removed from the policy map if it is a +dynamic class. + + +**Command Mode** + + +Policy-Map (control plane) configuration accessed through **policy-map type +copp** command. + + +**Command Syntax** + + +class +class_name [PLACEMENT] + + +no class +class_name [PLACEMENT] + + +default class +class_name [PLACEMENT] + + +**Parameters** + +- **class_name** name of the class. + +- **PLACEMENT** Specifies the classs map placement. +Configurable only for dynamic classes. + +- **no parameter** New classes are placed between the dynamic and +static classes. Previously defined classes retain their current +policy map placement. + +- **insert-before** +**dynamic_class** Class is inserted in front +of the specified dynamic class. + + +**Static Classes** + + +Trident switches provide the following static control plane classes: + +- copp-system-acllog copp-system-ipmcmiss copp-system-lldp + +- copp-system-arp copp-system-ipmcrsvd copp-system-selfip + +- copp-system-arpresolver copp-system-l3destmiss +copp-system-selfip-tc6to7 + +- copp-system-bpdu copp-system-l3slowpath copp-system-sflow + +- copp-system-glean copp-system-l3ttl1 copp-system-tc3to5 + +- copp-system-igmp copp-system-lacp copp-system-tc6to7 + + +**Commands Available in Policy-map-class (control plane) Configuration Mode** + +- bandwidth (policy-map-class (control-plane) Trident) + +- shape (policy-map-class (control-plane) Trident) + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + + +**Related Commands** + +- class-map type copp places +switch in the ***class-map*** (control-plane) configuration +mode. + +- policy-map type copp places +switch in the ***policy-map*** (control plane) configuration +mode. + + +**Example** + + +These commands add **CM-1** class to the +**copp-system-policy** policy +map. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class CM-1** +switch(config-pmap-c-copp-system-policy-CM-1)#` +``` + + +### class (policy-map (pbr) + + +The **class (policy-map (pbr)** command places the switch in +***policy-map-class (pbr)*** configuration mode, which is a group +change mode that modifies the specified class of the configuration mode Policy-Based +Routing (PBR) policy map. The command adds the class to the policy map if it was not +previously included in the policy map. All changes in a group change mode edit +session are pending until the mode is exited, and can be canceled by using the +**abort** command. + + +A PBR policy map is an ordered list of classes. Each class contains an eponymous +class map and can contain set commands to specify next hop. Classes without set +commands translate to no action being performed on that class of packets. + +- The class map identifies a data stream through ACLs. Class maps are +configured in the ***class-map*** (pbr) configuration mode. + +- **Set** commands can be used to specify the next hop +for a given class. **Set** commands are configured in +***policy-map-class*** (pbr) configuration mode. + + +PBR policy maps can also contain one or more raw match statements which filter +incoming traffic without using ACLs. Data packets are managed by commands of the +first class or raw match statement matching the packets contents. + + +The **exit** command returns the switch to the +***policy-map*** (pbr) configuration mode. However, saving the +policy-map-class changes also requires an exit from ***policy-map*** (pbr) +configuration mode. This saves all the pending policy map and policy-map-class +changes to ***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the class assignment from the configuration mode policy map by +deleting the corresponding **class** configuration from +***running-config***. + + +**Command Mode** + + +Policy-Map (pbr) Configuration accessed through **policy-map type +pbr**. + + +**Command Syntax** + + +[sequence_number] class +class_name + + +no [sequence_number] class +class_name + + +default [sequence_number] +class +class_name + + +no [sequence_number] + + +default [sequence_number] + + +**Parameters** + +- **sequence_number** Sequence number +(**1** to +**4294967295**) assigned to the rule. If no number is +entered, the number is derived by adding **10** to the +number of the policy maps last numbered line. To increase the distance +between existing entries, use the **resequence** +command. + +- **class_name** name of the class. + + +**Commands Available in Policy-map-class (pbr) Configuration Mode** + +- set +nexthop (policy-map-class pbr) sets next hop for the class. + +- **exit** saves pending class changes and returns +switch to ***policy-map (pbr)*** configuration mode. + +- **abort** discards pending class changes and returns +switch to ***policy-map (pbr)*** configuration mode. + + +**Related Commands** + +- class-map type pbr places +switch in the ***class-map*** (pbr) configuration mode. + +- policy-map type pbr places +switch in the ***policy-map (pbr)*** configuration mode. + + +**Example** + + +These commands add the **CMAP1** class map to the +**PMAP1** policy map, then place the switch in +***policy-map-class*** configuration mode where the next hops can be +assigned to the class. Changes will not take effect until both modes are +exited. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +### class (policy-map (qos) + FM6000) + + +The **class** command places the switch in +***policy-map-class*** (qos) configuration mode, which is a group +change mode that modifies the specified class of the configuration mode policy map. +The command adds the class to the policy map if it was not previously included in +the policy map. All changes in a group change mode edit session are pending until +the end of the session. + + +A policy map is an ordered list of classes. Each class contains an eponymous class +map and at least one set command: + +- The class map identifies a data stream through an ACL. Class maps are +configured in the ***class-map*** (qos) configuration mode. + +- **Set** commands either modify a packets content (CoS +or DSCP fields) or assigns it to a traffic class queue. +**Set** commands are configured in the +***policy-map-class***(qos) configuration mode. +Data +packets are managed by commands of the first class whose map matches the +packets content. + + +The **exit** command returns the switch to the +***policy-map*** configuration mode. However, saving +policy-map-class changes also require an exit from the ***policy-map*** +mode. This saves all pending policy map and policy-map-class changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the class assignment from the configuration mode policy map by +deleting the corresponding **class** configuration from +***running-config***. + + +**Command Mode** + + +Policy-Map (qos) Configuration accessed through **policy-map type +quality-of-service**. + + +**Command Syntax** + + +class +class_name [PLACEMENT] + + +no class +class_name [PLACEMENT] + + +default class +class_name [PLACEMENT] + + +**Parameters** + +- **class_name** name of the class. + +- **PLACEMENT** Specifies the map placement within the +list of class maps. + +- **no parameter** Class is placed at the top of the list. + +- **insert-before** +**existing_class** Class is inserted in front +of the specified class. + + +**Commands Available in Policy-map-class (qos) Configuration Mode** + +- set +(policy-map-class (qos) FM6000) + +- **exit** saves pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + +- **abort** discards pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + + +**Related Commands** + + +- class-map type qos +places switch in the***class-map*** (QoS) configuration mode. + +- policy-map type quality-of-service places switch in the +***policy-map*** (QoS) configuration mode + + +**Example** + + +These commands add the **CMAP_1** class map to the +**PMAP_1** policy map, then places the switch in the +***policy-map-class*** configuration +mode. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### class (policy-map (qos) + Helix) + + +The **class** command places the switch in the +***policy-map-class*** (QoS) configuration mode, which is a group +change mode that modifies the specified class of the configuration mode policy map. +The command adds the class to the policy map if it was not previously included in +the policy map. All changes in a group change mode edit session are pending until +the end of the session. + + +A policy map is an ordered list of classes. Each class contains an eponymous class +map and at least one set command: + +- The class map identifies a data stream through an ACL. Class maps are +configured in the ***class-map*** (qos) configuration mode. + +- **Set** commands either modify a packets content (CoS +or DSCP fields) or assigns it to a traffic class queue. +**Set** commands are configured in the +***policy-map-class*** (qos) configuration mode. +Data +packets are managed by commands of the first class whose map matches the +packets content. + + +The **exit** command returns the switch to the +***policy-map*** configuration mode. However, saving +policy-map-class changes also require an exit from the ***policy-map*** +mode. This saves all the pending policy map and policy-map-class changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the class assignment from the configuration mode policy map by +deleting the corresponding **class** configuration from +***running-config***. + + +**Command Mode** + + +Policy-Map (qos) Configuration accessed through **policy-map type +quality-of-service** command. + + +**Command Syntax** + + +class +class_name [PLACEMENT] + + +no class +class_name [PLACEMENT] + + +default class +class_name [PLACEMENT] + + +**Parameters** + +- **class_name** name of the class. + +- **PLACEMENT** Specifies the map placement within the +list of class maps. + +- **no parameter** Class is placed at the top of +the list. + +- **insert-before** +**existing_class** Class is inserted in front +of the specified class. + + +**Commands Available in Policy-map-class (QoS) Configuration Mode** + +- set +(policy-map-class (qos) Helix) + +- **exit** saves pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + +- **abort** discards pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + + +**Related Commands** + +- class-map type qos places +switch in the ***class-map*** (qos) configuration mode. + +- policy-map type +quality-of-service places switch in the ***policy-map*** +(QoS) configuration mode. + + +**Example** + + +These commands add the **CMAP_1** class map to the +**PMAP_1** policy map, then places the switch in +***policy-map-class*** configuration +mode. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### class (policy-map (qos) + Trident II) + + +The **class** command places the switch in the +***policy-map-class*** (QoS) configuration mode, which is a group +change mode that modifies the specified class of the configuration mode policy map. +The command adds the class to the policy map if it was not previously included in +the policy map. All changes in a group change mode edit session are pending until +the end of the session. + + +A policy map is an ordered list of classes. Each class contains an eponymous class +map and at least one set command: + +- The class map identifies a data stream through an ACL. Class maps are +configured in ***class-map (qos)*** configuration mode. + +- **Set** commands either modify a packets content (CoS +or DSCP fields) or assigns it to a traffic class queue. +**Set** commands are configured in +***policy-map-class (qos)*** configuration mode. + + +Data packets are managed by commands of the first class whose map matches the packets +content. + + +The **exit** command returns the switch to the +***policy-map*** configuration mode. However, saving the +policy-map-class changes also require an exit from the ***policy-map*** +mode. This saves all the pending policy map and policy-map-class changes to +***running-config*** and returns the switch to the +***global*** configuration mode. The **abort** +command discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no class** and **default class** +commands remove the class assignment from the configuration mode policy map by +deleting the corresponding **class** configuration from +***running-config***. + + +**Command Mode** + + +Policy-Map (qos) Configuration accessed through **policy-map type +quality-of-service** command. + + +**Command Syntax** + + +class +class_name [PLACEMENT] + + +no class +class_name [PLACEMENT] + + +default class +class_name [PLACEMENT] + + +**Parameters** + +- **class_name** name of the class. + +- **PLACEMENT** Specifies the map placement within the +list of class maps. + +- **no parameter** Class is placed at the top of +the list. + +- **insert-before** +**existing_class** Class is inserted in front +of the specified class. + + +**Commands Available in Policy-map-class (qos) Configuration Mode** + +- set (policy-map-class (qos) Trident +II) + +- **exit** saves pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + +- **abort** discards pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + + +**Related Commands** + +- class-map type qos places +switch in ***class-map (qos)*** configuration mode. + +- policy-map type +quality-of-service places switch in ***policy-map +(qos)*** configuration mode. + + +**Example** + + +These commands add the **CMAP_1** class map to the +**PMAP_1** policy map, then places the switch in +***policy-map-class*** configuration +mode. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### class (policy-map (qos) + Trident) + + +The **class** command places the switch in +**policy-map-class (qos)** configuration mode, which +is a group change mode that modifies the specified class of the configuration mode +policy map. The command adds the class to the policy map if it was not previously +included in the policy map. All changes in a group change mode edit session are +pending until the end of the session. + + +A policy map is an ordered list of classes. Each class contains an eponymous class +map and at least one set command: + +- The class map identifies a data stream through an ACL. Class maps are +configured in **class-map (qos)** configuration +mode. + +- **Set** commands either modify a packets content (CoS +or DSCP fields) or assigns it to a traffic class queue. +**Set** commands are configured in +***policy-map-class (qos)*** configuration mode. +Data +packets are managed by commands of the first class whose map matches the +packets content. + + +The **exit** command returns the switch to +***policy-map*** configuration mode. However, saving policy-map-class +changes also require an exit from ***policy-map*** mode. This saves all the +pending policy map and policy-map-class changes to ***running-config*** and +returns the switch to the ***global*** configuration mode. The +**abort** command discards pending changes, returning +the switch to the ***global*** configuration mode. + + +The **no class** and **default class** +commands remove the class assignment from the configuration mode policy map by +deleting the corresponding **class** configuration from +***running-config***. + + +**Command Mode** + + +Policy-Map (qos) Configuration accessed through **policy-map type +quality-of-service** command. + + +**Command Syntax** + + +class +class_name [PLACEMENT] + + +no class +class_name [PLACEMENT] + + +default class +class_name [PLACEMENT] + + +**Parameters** + +- **class_name** name of the class. + +- **PLACEMENT** Specifies the map placement within the +list of class maps. + +- **no parameter** Class is placed at the top of the list. + +- **insert-before** +**existing_class** Class is inserted in front +of the specified class. + + +**Commands Available in Policy-map-class (qos) Configuration Mode** + +- set +(policy-map-class (qos) Trident) + +- **exit** saves pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + +- **abort** discards pending class changes and returns +switch to ***policy-map (qos)*** configuration mode. + + +**Related Commands** + +- class-map type qos places +switch in ***class-map (qos)*** configuration mode. + +- policy-map type +quality-of-service places switch in ***policy-map +(qos)*** configuration mode. + + +**Example** + + +These commands add the **CMAP_1** class map to the +**PMAP_1** policy map, then places the switch in +***policy-map-class*** configuration +mode. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### class-map type copp + + +The **class-map type copp** command places the switch in +***Class-Map*** (control plane) +configuration mode, which is a group change mode that modifies a +control-plane dynamic class map. A dynamic class map is a data +structure that uses Access Control Lists (ACLs) to define a data +stream by specifying characteristics of data packets that comprise +that stream. Control-plane policy maps use class maps to specify +which control plane traffic is controlled by policy map +criteria. + + +The **exit** command saves +pending class map changes to ***running-config*** and +returns the switch to the ***global*** configuration mode. +Class map changes are also saved by entering a different +configuration mode. The **abort** command +discards pending changes and returns the switch to the +***global*** configuration mode. + + +The +**no class-map type copp** and +**default class-map type +copp** commands delete the specified class map +by removing the corresponding **class-map type +copp** command and its associated +configuration. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +class-map type copp match-any +class_name + + +no class-map type copp +[match-any] +class_name + + +default class-map type +copp [match-any] +class_name + + +**Parameter** + +**class_name** Name of class +map. +**Commands Available in Class-Map (Control +Plane) Configuration Mode** + + +match (class-map (control-plane) +Trident) + +**Related Commands** + +- policy-map +type copp + +- class (policy-map (control-plane) +Trident) + +- class-map type +qos + + +**Example** + +This command creates the +control plane class map named +***CP-MAP-1*** and places the +switch in ***class-map*** configuration +mode. +``` +`switch(config)# **class-map type copp match-any CP-CMAP-1** +switch(config-cmap-CP-CMAP-1)#` +``` + + +### class-map type pbr + + +The **class-map type pbr** command places the switch in the +***class-map*** (pbr) configuration mode for the specified class +map, and creates the class map if one does not already exist. The +***class-map*** (PBR) configuration mode is a group change mode that +modifies a class map for Policy-Based Routing (PBR). PBR class maps contain one or +more **match** statements which filter incoming traffic using ACLs. PBRs can then +use these class maps to set next-hop IP addresses for the traffic that matches them. +(Classes without set commands translate to no action being performed on that class +of packets.) + + +The **exit** command saves pending class +map changes to ***running-config***, then returns the switch to the +***global*** configuration mode. Class map changes are also saved by +directly entering a different configuration mode. The +**abort** command discards pending changes and returns the +switch to the ***global*** configuration mode. + + +The **no +class-map type pbr** and **default class-map type +pbr** commands delete the specified class map by removing the +corresponding **class-map type pbr** command and its +associated configuration. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +class-map type pbr match-any +map_name + + +no class-map type pbr match-any +map_name + + +default class-map type pbr +match-any +map_name + + +**Parameter** + +**map_name** Name of class map.**Commands Available in +Class-Map (PBR) configuration mode** + +- match (class-map +(pbr)) + +- resequence (class-map +(pbr)) + + +**Related Commands** + +- policy-map type pbr + +- class (policy-map +(pbr)) + + +**Example** + +This command creates the PBR class map named +**MAP1** and places the switch in ***class-map +(pbr)*** configuration mode where match criteria can be configured for +the +class. +``` +`switch(config)# **class-map type pbrmatch-any MAP1** +switch(config-cmap-MAP1)#` +``` + + +### class-map type qos + + +The **class-map type qos** command places the switch in the +***class-map*** (QoS) configuration mode, which is a group change +mode that modifies a QoS dynamic class map. A dynamic class map is a data structure +that uses Access Control Lists (ACLs) to define a data stream by specifying +characteristics of data packets that comprise that stream. QoS policy maps use class +maps to specify the traffic (to which the policy map is assigned) that is +transformed by policy map criteria. + + +The **exit** command +saves pending class map changes to ***running-config***, then returns the +switch to the ***global*** configuration mode. Class map changes are also +saved by entering a different configuration mode. The **abort** command discards +pending changes and returns the switch to the ***global*** configuration +mode. + + +The **no class-map type qos** and +**default class-map type qos** commands delete the +specified class map by removing the corresponding **class-map type +qos** command and its associated configuration. The +**class-map** and **class-map type +qos** commands are equivalent. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +class-map +type qos +match-any +class_name + + +no class-map type qos +match-any +class_name + + +default class-map type qos +match-any +class_name + + +**Parameters** + + +**class_name** Name of class +map. + +**Commands Available in Class-Map (QoS) Configuration +Mode** + +- match +(class-map (qos) FM6000) + +- match +(class-map (qos) Trident) + + +**Conditions** +**class-map** +**map_name** and **class-map type qos** +**map_name** are identical commands. + +**Related +Commands** + +- policy-map type +quality-of-service + +- class +(policy-map (qos) FM6000) + +- class +(policy-map (qos) Trident) + + +**Example** + +This command creates the QoS class map named +**MAP-1** and places the switch in +***class-map*** configuration +mode. +``` +`switch(config)# **class-map type qos match-any MAP-1** +switch(config-cmap-MAP-1)#` +``` + + +### clear policy-map counters + + +The **clear policy-map** command resets the specified policy +map counters to zero. Policy map counters record the quantity of packets that are +filtered by the ACLs that comprise a specified policy map. + + +**Command Mode** + + +Privileged EXEC + + +**Command Syntax** + + +clear policy-map +INTERFACE_NAME + counters +MAP_NAME + + +**Parameters** + +- **INTERFACE_NAME** Interface for which command clears +table counters. Options include: + +- **interface control-plane** Control +plane. + +- MAP_NAME Policy map for which command clears counters. +Options include: + +- ***copp-system-policy*** Name of only policy map supported +for the control plane. + + +### feature pbr + + +Policy-Based Routing (PBR) is a feature that is applied on IPv4 or IPv6 +routable ports, to preferentially route packets. Forwarding is based on a policy that is +enforced at the ingress of the applied interface and overrides normal routing decisions. +In addition to matches on regular ACLs, PBR policy-maps can also include “raw match” +statements that look like a single entry of an ACL as a convenience for users. + + +**Configuration Mode** + + +For IP: + + +TCAM PBR profile set TTL configuration mode. + + +For IPv6: + + +TCAM feature PBR IP configuration mode. + + +**Command Syntax** + + +For IP: + + +feature pbr +ip [copy] + + +no feature pbr +ip [copy] + + +default feature pbr +ip [copy] + + +For IPv6: + + +feature pbr +ipv6 [copy | bank] + + +no feature pbr +ipv6 [copy | bank] + + +default featue pbr +ipv6 [copy | bank] + + +**Parameters** + + +For IP: + + +**copy** Copy a feature from a TCAM profile. + + +For IPv6: + + +- **copy**Copy a feature from a TCAM profile. + +- **bank**TCAM banks to reserve. + + +**Examples** + +- In the following example, the PBR is configured on an IP routable port. + + +``` +`(config)# **hardware tcam** +(config-tcam)# **profile pbr-set-ttl copy default** +(config-tcam-profile-pbr-set-ttl)# **feature pbr ip**` +``` + +- In the following example, the PBR is configured on an IPv6 routable port. + + +``` +`(config)# **hardware tcam** +(config-tcam)# **profile pbr-set-ttl copy default** +(config-tcam-profile-pbr-set-ttl)# **feature pbr ip** +(config-tcam-feature-pbr-ip)# **feature pbr ipv6**` +``` + + +### feature traffic-policy cpu + + +The **feature traffic-policy cpu** command configures the CPU +traffic policy features for the IPv4 and IPv6 traffic in user-defined TCAM +profile. + + +The **no feature traffic-policy cpu** and **default +feature traffic-policy cpu** commands remove the CPU policy +configurations from ***running-config***. + + +**Command Mode** + + +Hardware TCAM + + +**Command Syntax** + + +feature traffic-policy cpu [ipv4 | +ipv6] + + +no feature traffic-policy cpu [ipv4 | +ipv6] + + +default feature traffic-policy cpu [ipv4 | +ipv6] + + +**Parameters** + +- **ipv4** CPU traffic policy for IPv4 traffic. + +- **ipv6** CPU traffic policy for IPv6 traffic. + + +**Example** + + +These commands places the switch in the hardware TCAM profile mode and configures the +CPU traffic policy features for IPv4 traffic in the TCAM profile +test. +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile test** +switch(config-hw-tcam-profile-test)# **feature traffic-policy cpu ipv4**` +``` + + +### feature traffic-policy port + + +The **feature traffic-policy port** command configures the +port-related traffic policy features for the IPv4 and IPv6 traffic in user-defined +TCAM profile. + + +The **no feature traffic-policy port** and **default +feature traffic-policy port** commands remove the CPU policy +configurations from ***running-config***. + + +**Command Mode** + + +Hardware TCAM + + +**Command Syntax** + + +feature traffic-policy port [ipv4 | +ipv6] + + +no feature traffic-policy port [ipv4 | +ipv6] + + +default feature traffic-policy port [ipv4 | +ipv6] + + +**Parameters** + +- **ipv4** port traffic policy for IPv4 traffic. + +- **ipv6** port traffic policy for IPv6 traffic. + + +**Example** + + +These commands places the switch in the hardware TCAM profile mode and configures the +port traffic policy features for IPv4 traffic in the TCAM profile +test. +``` +`switch(config)# **hardware tcam** +switch(config-hw-tcam)# **profile test** +switch(config-hw-tcam-profile-test)# **feature traffic-policy port ipv4**` +``` + + +### match (class-map (control-plane) + Helix) + + +The **match** command assigns an ACL to the configuration +mode class map. A class map can contain only one ACL. Class maps only use permit +rules to filter data; deny rules are ignored. The command accepts IPv4 and IPv4 +standard ACLs. + + +A class map is assigned to a policy map by the **class (policy-map (control-plane) +Helix)** command. + + +The ***class map*** (control +plane) configuration mode is a group change mode. **Match** +statements are not saved to ***running-config*** until the edit session is +completed by exiting the mode. + + +The **no match** and +**default match** commands remove the +**match** statement from the configuration mode class +map by deleting the corresponding command from +***running-config***. + + +**Command Mode** + + +Class-Map (control plane) configuration accessed through **class-map type +copp** command. + + +**Command +Syntax** + + +match ip access-group +list_name + + +no match ip access-group +list_name + + +default match ip access-group +list_name + + +**Parameters** + +**list_name** name of ACL assigned to class +map.**Related Commands** + +- class-map type copp places +the switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class (policy-map (control-plane) Helix) assigns a ***class +map*** to a ***policy map***. + + +**Guidelines** + + +Static class maps cannot be modified by this +command. + + +**Match** statements are saved to +***running-config*** only upon exiting ***class-map (control +plane)*** configuration mode. + + +**Example** + +These +commands add the IP ACL **list_1** to the +**map_1** class map, then saves the command by exiting +***class-map*** +mode. +``` +`switch(config)# **class-map type copp map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (control-plane) + Trident II) + + +The **match** command assigns an ACL to the configuration +mode class map. A class map can contain only one ACL. Class maps only use permit +rules to filter data; deny rules are ignored. The command accepts IPv4 and IPv4 +standard ACLs. + + +A class map is assigned to a policy map by the **class (policy-map (control-plane) Trident +II)** command. + + +The ***class map*** (control +plane) configuration mode is a group change mode. **Match** +statements are not saved to ***running-config*** until the edit session is +completed by exiting the mode. + + +The **no match** and +**default match** commands remove the +**match** statement from the configuration mode class +map by deleting the corresponding command from +***running-config***. + + +**Command Mode** + + +Class-Map (control plane) configuration accessed through **class-map type +copp** command. + + +**Command +Syntax** + + +list_name + + +list_name + + +list_name + + +**Parameter** + +**list_name** +name of ACL assigned to class map.**Related Commands** + +- class-map type copp places +the switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class (policy-map (control-plane) Trident II) assigns a class +map to a ***policy map***. + + +**Guidelines** + + +Static class maps cannot be modified by this +command. + + +**Match** statements are saved to +***running-config*** only upon exiting **class-map +(control plane)** configuration +mode. + + +**Example** + +These commands add the IP ACL +**list_1** to the **map_1** +class map, then saves the command by exiting ***class-map*** +mode. +``` +`switch(config)# **class-map type copp map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (control-plane) + Trident) + + +The **match** command assigns an ACL to the configuration mode +class map. A class map can contain only one ACL. Class maps only use permit rules to +filter data; deny rules are ignored. The command accepts IPv4, IPv6, IPv4 standard, +and IPv6 standard ACLs. + + +A class map is assigned to a policy map by the **class (policy-map (control-plane) +Trident)** command. + + +Class map (control plane) configuration mode is a group change mode. **Match** +statements are not saved to ***running-config*** until the edit session is +completed by exiting the mode. + + +The **no match** and **default match** +commands remove the **match** statement from the configuration mode class map by +deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +Class-Map (control plane) configuration accessed through **class-map type +copp** command + + +**Command Syntax** + + +match +IP_VERSION +access-group +list_name + + +no match +IP_VERSION +access-group +list_name + + +default match +IP_VERSION +access-group +list_name + + +**Parameters** + +- **IP_VERSION** IP version of the specified ACL. Options +include: + +- **ipv4** IPv4. + +- **ipv6** IPv6. + +- **list_name** name of ACL assigned to class map. + + +**Related Commands** + +- class-map type copp places +the switch in ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class (policy-map (control-plane) Trident) assigns a class map +to a policy map. + + +**Guidelines** + + +Static class maps cannot be modified by this command. + + +**Match** statements are saved to ***running-config*** +only upon exiting ***class-map (control plane)*** configuration mode. + + +**Example** + + +These commands add the IPv4 ACL names **list_1** to the +**map_1** class map, then saves the command by exiting +**class-map** +mode. +``` +`switch(config)# **class-map type copp map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (pbr)) + + +The **match** command assigns ACLs to the configuration mode +Policy-Based Routing (PBR) class map. The command accepts IPv4, IPv4 standard, IPv6 +and IPv6 standard ACLs. + + +***Class map (pbr)*** configuration mode is a group change mode. +**Match** statements are not saved to +***running-config*** until the edit session is completed by exiting +the mode. + + +The **no match** and **default match** +commands remove the **match** statement from the configuration mode class map by +deleting the corresponding command from ***running-config***. + + +Note: PBR ACLs use only permit rules to filter data; if there are deny rules in an ACL +used by PBR, the configuration will be reverted. + + +**Command Mode** + + +Class-map (pbr) configuration accessed through **class-map type +pbr** command. + + +**Command Syntax** + + +[sequence_number] match [ip +| ipv6] access-group +list_name + + +no [sequence_number] match +[ip | ipv6] +access-group +list_name + + +default [sequence_number] +[ip | ipv6] +access-group +list_name + + +no [sequence_number] + + +default [sequence_number] + + +**Parameters** + + +- **sequence_number** Sequence number +(**1** to **4294967295**) +assigned to the rule. If no number is entered, the number is derived by adding +**10** to the number of the class maps last +numbered line. To increase the distance between existing entries, use the +**resequence** command. + +- **list_name** name of ACL assigned to class +map. + + +**Related Commands** + +- class-map type pbr places the +switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class (policy-map +(pbr)) assigns a class map to a policy map. + + +**Example** + + +These commands add the IPv4 ACL named **list1** to the +***map1*** class map, then save the change by +exiting ***class-map*** +mode. +``` +`switch(config)# **class-map type pbr map1** +switch(config-cmap-map1)# **match ip access-group list1** +switch(config-cmap-map1)# **exit** +switch(config)#` +``` + + +### match (class-map (qos) + FM6000) + + +The **match** command assigns an ACL to the configuration mode +class map. A class map can contain only one ACL. Class maps only use permit rules to +filter data; deny rules are ignored. The command accepts IPv4 and IPv4 standard +ACLs. + + +The ***class map (qos)*** configuration mode is a group change mode. +**Match** statements are not saved to ***running-config*** until the +edit session is completed by exiting the mode. + + +The **no match** and **default match** +commands remove the **match** statement from the configuration +mode class map by deleting the corresponding command from +***running-config***. + + +**Command Mode** + + +Class-map (qos) configuration accessed through **class-map type +qos** command. + + +**Command Syntax** + + +match +IP_VERSION +access-group +list_name + + +no match +IP_VERSION +access-group +list_name + + +default match +IP_VERSION +access-group +list_name + + +**Parameters** + +- **IP_VERSION** IP version of the specified ACL. Options +include: + +- **ipv4** IPv4. + +- **list_name** name of ACL assigned to class map. + + +**Related Commands** + +- class-map type qos places the +switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class +(policy-map (qos) FM6000) assigns a ***class map*** to a +***policy map***. + + +**Example** + + +These commands add the IPv4 ACL named ***list_1*** to the +**map_1** class map, then saves the command by exiting +***class-map*** +mode. +``` +`switch(config)# **class-map type qos map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (qos) + Helix) + + +The **match** command assigns an ACL to the configuration mode +class map. A class map can contain only one ACL. Class maps only use permit rules to +filter data; deny rules are ignored. The command accepts IPv4, IPv4 standard, IPv6, +and IPv6 standard ACLs. + + +the ***class map (QoS)*** configuration mode is a group change mode. +**Match** statements are not saved to +***running-config*** until the edit session is completed by exiting +the mode. + + +The **no match** and **default match** +commands remove the **match** statement from the configuration mode class map by +deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +Class-Map (QoS) configuration accessed through **class-map type +qos** command. + + +**Command Syntax** + + +match +IP_VERSION +access-group +list_name + + +no match +IP_VERSION +access-group +list_name + + +default match +IP_VERSION +access-group +list_name + + +**Parameters** + +- IP_VERSION IP version of the specified ACL. Options +include: + +- **ipv4** IPv4. + +- **ipv6** IPv6. + +- **list_name** name of ACL assigned to class map. + + +**Related Commands** + +- class-map type qos places the +switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class +(policy-map (qos) Helix) assigns a class map to a policy +map. + + +**Example** + + +These commands add the IPv4 ACL named **list_1** to the +**map_1** class map, then saves the command by exiting +***class-map*** +mode. +``` +`switch(config)# **class-map type qos map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (qos) + Trident II) + + +The **match** command assigns an ACL to the configuration mode +class map. A class map can contain only one ACL. Class maps only use permit rules to +filter data; deny rules are ignored. The command accepts IPv4, IPv4 standard, IPv6, +and IPv6 standard ACLs. + + +The ***class map (QoS)*** configuration mode is a group change mode. +**Match** statements are not saved to +***running-config*** until the edit session is completed by exiting +the mode. + + +The **no match** and **default match** +commands remove the **match**statement from the configuration mode class map by +deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +The ***class-map (qos)*** configuration accessed through **class-map type +qos** command. + + +**Command Syntax** + + +**IP_VERSION +list_name** + + +**IP_VERSION +list_name** + + +**IP_VERSION +list_name** + + +**Parameters** + +- **IP_VERSION** IP version of the specified ACL. Options +include: + +- **ipv4** IPv4. + +- **ipv6** IPv6. + +- **list_name** name of ACL assigned to class map. + + +**Related Commands** + +- class-map type qos places the +switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class +(policy-map (qos) Trident) assigns a class map to a policy +map. + + +**Example** + + +These commands add the IPv4 ACL named **list_1** to the +**map_1** class map, then saves the command by exiting +***class-map*** +mode. +``` +`switch(config)# **class-map type qos map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (class-map (qos) + Trident) + + +The **match** command assigns an ACL to the configuration mode +class map. A class map can contain only one ACL. Class maps only use permit rules to +filter data; deny rules are ignored. The command accepts IPv4, IPv4 standard, IPv6, +and IPv6 standard ACLs. + + +Class map (QoS) configuration mode is a group change mode. +**Match** statements are not saved to +***running-config*** until the edit session is completed by exiting +the mode. + + +The **no match** and **default match** +commands remove the **match**statement from the configuration mode class map by +deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +Class-Map (qos) configuration accessed through **class-map type +qos** command. + + +**Command Syntax** + + +match +IP_VERSION +access-group +list_name + + +no match +IP_VERSION +access-group +list_name + + +default match +IP_VERSION +access-group +list_name + + +**Parameters** + +- **IP_VERSION** IP version of the specified ACL. Options +include: + +- **ipv4** IPv4. + +- **ipv6** IPv6. + +- **list_name** name of ACL assigned to class map. + + +**Related Commands** + +- class-map type qos places the +switch in the ***class-map*** configuration mode. + +- **exit** saves pending class map changes, then returns +the switch to the ***global*** configuration mode. + +- **abort** discards pending class map changes, then +returns the switch to the ***global*** configuration mode. + +- class +(policy-map (qos) Trident) assigns a ***class map*** to a +***policy map***. + + +**Example** + + +These commands add the IPv4 ACL named **list_1** to the +**map_1** class map, then saves the command by exiting +***class-map*** +mode. +``` +`switch(config)# **class-map type qos map_1** +switch(config-cmap-map_1)# **match ip access-group list_1** +switch(config-cmap-map_1)# **exit** +switch(config)#` +``` + + +### match (policy-map (pbr)) + + +The **match** command creates a policy map clause entry that +specifies one filtering condition. When a packet matches the filtering criteria, its +next hop is set as specified. When a packets properties do not equal the statement +parameters, the packet is evaluated against the next clause or class map in the +policy map, as determined by sequence number. If all clauses fail to set a next hop +for the packet, the packet is routed according to the FIB. + + +The **no match** and **default match** +commands remove the **match** statement from the configuration mode policy map by +deleting the corresponding command from ***running-config***. + + +**Command Mode** + + +Policy-Map (pbr) Configuration accessed through policy-map type pbr +command. + + +**Command Syntax** + + +[sequence_number] match ip +SOURCE_ADDR +DEST_ADDR [set nexthop [recursive] +NH-addr_1 [NH-addr_2] ... +[NH-addr_n]] + + +no match ip +SOURCE_ADDR +DEST_ADDR [set nexthop [recursive] +NH-addr_1 [NH-addr_2] ... +[NH-addr_n]] + + +default match match ip +SOURCE_ADDR +DEST_ADDR [set nexthop [recursive] +NH-addr_1 [NH-addr_2] ... +[NH-addr_n]] + + +no +SEQ_NUM + + +default +SEQ_NUM + + +**Parameters** + +- **sequence_number** Sequence number assigned to the +rule. If no number is entered, the number is derived by adding +**10** to the number of the policy maps last +numbered line. To increase the distance between existing entries, use the +**resequence** command. + +- **SOURCE_ADDR** and **DEST_ADDR** +source and destination address filters. Options include: + +- **network_addr** subnet address (CIDR or +address-mask). + +- **any** packets from or to all addresses are +matched. + +- **host** +**ip_addr** IP address (dotted decimal +notation). +Source and destination subnet addresses support +discontiguous masks. + +- **recursive** enables recursive next hop resolution. + +- **NH_addr** IP address of next hop. If multiple +addresses are entered, they are treated as an ECMP group. + + +**Related Commands** + +- policy-map type pbr enters +the policy-map (PBR) configuration mode. + +- show policy-map type +pbr displays the PBR policy maps. + + +**Example** + + +These commands create a match rule in policy map **PMAP1** +which sets the next hop to **192.168.3.5** for packets +received from **172.16.0.0/12** regardless of their +destination, then exit the mode to save the +changes. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **match ip 172.16.0.0/12 any set nexthop 192.163.3.5** +switch(config-pmap-PMAP1)# **exit** +switch(config)#` +``` + + +### platform arad tcam counters +feature + + +The **platform arad tcam counters feature** command enables +incrementing PBR hardware counters corresponding to ACL. If counters for PBR are +enabled, then counters for ACL will be automatically disabled in all cases. If +counters for ACL are enabled, then counters for PBR will be automatically disabled +in all cases. + + +The **no platform arad tcam counters feature** command disables +PBR/ACL counters selection. The **default platform arad tcam counters +feature** commands resets the default behavior. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +platform arad tcam counters feature +[OPTIONS] + + +no platform arad tcam counters feature +[OPTIONS] + + +default platform arad tcam counters feature +[OPTIONS] + + +**Parameters** + + +**OPTIONS** Assign the TCAM counters feature. Options +include: + +- **pbr** assign the TCAM counters feature PBR hardware +counters. + +- **acl** assign the TCAM counters feature ACL hardware +counters. + + +**Examples** + +- This command enables incrementing ACL hardware counters +selection. +``` +`switch(config)# **platform arad tcam counters feature acl** +switch(config)#` +``` + +- This command disables incrementing ACL hardware counters +selection. +``` +`switch(config)# **no platform arad tcam counters feature acl** +switch(config)#` +``` + + +### policy-map type copp + + +The **policy-map type copp** command places the switch in the +***policy-map*** (control plane) configuration mode, which is a +group change mode that modifies a ***control-plane*** policy map. A policy +map is a data structure that consists of class maps that identify a specific data +stream and specify bandwidth and shaping parameters that controls its transmission. +Control plane policy maps are applied to the control plane to manage traffic. + + +The ***copp-system-policy*** policy map is supplied with the switch and is +always applied to the control plane. The ***copp-system-policy*** is the +only valid control plane policy map. + + +The **exit** command saves pending policy map changes to +***running-config*** and returns the switch to the +***global*** configuration mode. Policy map changes are also saved by +entering a different configuration mode. The **abort** command +discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no policy-map type copp** and **default policy-map type copp** commands delete the specified policy map by removing the corresponding **policy-map type copp** command and its associated configuration. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +policy-map type copp copp-system-policy + + +no policy-map type copp copp-system-policy + + +default policy-map type copp copp-system-policy + + +The ***copp-system-policy*** is supplied with the switch and is the only +valid control plane policy map. + + +**Commands Available in Policy-Map Configuration Mode** + +- class (policy-map (control-plane) FM6000) + +- class (policy-map (control-plane) Trident) + + +**Related Commands** + + +class-map type copp enters the +***control-plane class-map*** configuration mode for modifying a +control-plane dynamic class map. + + +Only Helix and Trident platform switches support dynamic classes for control plane +policing. + + +**Example** + + +This command places the switch in the ***policy-map*** configuration mode to +edit the ***copp-system-policy*** policy +map. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)#` +``` + + +### policy-map type pbr + + +The **policy-map type pbr** command places the switch in +***policy-map (pbr)*** configuration mode, which is a group change +mode that modifies a Policy-Based Routing (PBR) policy map. The command also creates +the specified policy map if it does not already exist. A PBR policy map is a data +structure that consists of class maps that identify specific packets and the next +hops for those packets. Policy maps are applied to Ethernet or port channel +interfaces to manage traffic. + + +The **exit** command saves +pending policy map changes to ***running-config*** and returns the switch to +the ***global*** configuration mode. Policy map changes are also saved by +entering a different configuration mode. The **abort** command +discards pending changes, returning the switch to the ***global*** +configuration mode. + + +The **no policy-map type pbr** and +**default policy-map type pbr** commands delete the +specified policy map by removing the corresponding **policy-map type +pbr** command and its associated +configuration. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +policy-map +type pbr +map_name + + +no policy-map type pbr +map_name + + +default policy-map type pbr +map_name + + +**Parameter** + +**map_name** Name of policy map.**Commands Available +in Policy-Map Configuration Mode** + +- class (policy-map +(pbr)) + +- match (policy-map +(pbr)) + + +**Related Commands** + +- class-map type pbr + +- service-policy type pbr (Interface mode) + + +**Example** + +This command creates the PBR policy map named +**PMAP1** and places the switch in +***policy-map*** configuration +mode. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)#` +``` + + +### policy-map type quality-of-service + + +The **policy-map type quality-of-service** command places +the switch in the ***policy-map (QoS)*** configuration mode, which is a +group change mode that modifies a QoS policy map. A policy map is a data structure +that consists of class maps that identify a specific data stream and shaping +parameters that controls its transmission. Policy maps are applied to Ethernet or +port channel interfaces to manage traffic. + + +The **exit** +command saves pending policy map changes to ***running-config*** and returns +the switch to the ***global*** configuration mode. Policy map changes are +also saved by entering a different configuration mode. The +**abort** command discards pending changes, returning +the switch to the ***global*** configuration mode. + + +The **no +policy-map type quality-of-service** and **default +policy-map type quality-of-service** commands delete the +specified policy map by removing the corresponding **policy-map type +quality-of-service** command and its associated configuration. +The **policy-map** and **policy-map type +quality-of-service** commands are +equivalent. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +policy-map +type quality-of-service +map_name + + +no policy-map type +quality-of-service +map_name + + +default policy-map type +quality-of-service +map_name + + +**Parameter** + +**map_name** Name of policy map.**Commands Available +in Policy-Map Configuration Mode** + +- class +(policy-map (qos) FM6000) + +- class +(policy-map (qos) Trident) + + +**Conditions** + + +**policy-map** +**map_name** and **policy-map type +quality-of-service** +**map_name** are identical commands. + +**Related +Commands** + +- class-map type qos + +- service-policy type qos (Interface mode) + + +**Example** + +This command creates the QoS policy map named +**PMAP-1** and places the switch in the +***policy-map*** configuration +mode. +``` +`switch(config)# **policy-map PMAP-1** +switch(config-pmap-PMAP-1)#` +``` + + +### policy-map type quality-of-service counter + + +The **policy-map type quality-of-service counter** command in the Global Configuration Mode +configures per interface counters for policy map and class map matches. + + +The **no | default** versions of the command disables the feature and removes it from the +***running-config***. + + +**Command Mode** + + +Global Configuration + + +**Command Syntax** + + +**policy-map type quality-of-service counter per-interface** + + +**no policy-map type quality-of-service counter per-interface** + + +**default policy-map type quality-of-service counter per-interface** + + +**Parameters** + + +- **policy-map type quality-of-service** - Specify the Policy Map as Quality of Service (QoS). + +- **counter** - Change the counter settings. + +- **per-interface - Specify using per-interface counters.** + + + + +**Example** + + +Use the following command to enable QoS per-interface counters: + + +``` +`switch(config)# **policy-map type quality-of-service counter per-interface**` +``` + + +### policy-map type quality-of-service policer + + +The **policy-map type quality-of-service policer copy** +command is used to copy an existing QoS policy map to the policy map +**policer**. + + +The +**policy-map type quality-of-service +policer drop counter** command is used +to enable drop counters for the QoS policy map +**policer**. + + +The +**no policy-map type quality-of-service +policer** and **default +policy-map type quality-of-service +policer** commands delete the policy map +**policer** by removing the +corresponding **policy-map type quality-of-service +policer** command and its associated +configuration. + + +The **no policy-map type +quality-of-service policer drop +counter** and **default policy-map +type quality-of-service policer drop +counter** commands disable drop counters +for the policy map **policer**. + + +**Command Mode** + + +Global Configuration + + +**Command +Syntax** + + +policy-map type quality-of-service +policer copy +map_name + + +policy-map type +quality-of-service policer drop +counter + + +no policy-map type +quality-of-service policer + + +default +policy-map type quality-of-service +policer + + +**Parameter** + +**map_name** Name of policy map to +copy.**Related Commands** + +- class-map type +qos + +- service-policy type qos (Interface +mode) + + +**Examples** + + +- This command copies the QoS policy map named +**PMAP-1** to the policy map +**policer**. +``` +`switch(config)#**policy-map type quality-of-service policer copy PMAP-1** +switch(config-pmap-PMAP-1)#` +``` + +- This command enables drop counters for the QoS policy map +**policer**. +``` +`switch(config)#**policy-map type quality-of-service policer drop counter** +switch(config)#` +``` + + +### resequence (class-map +(pbr)) + + +The **resequence** command assigns sequence numbers to +rules in the configuration mode class map. Command parameters specify the number of +the first rule and the numeric interval between consecutive rules. Once changed, +rule numbers persist unless changed again using the +**resequence** command, but the interval used for +numbering new rules reverts to **10** on the exiting +***class-map (pbr)*** configuration mode. + + +Maximum rule +sequence number is **4294967295**. + + +**Command +Mode** + + +Class-Map (PBR) Configuration accessed through **class-map type +pbr** command. + + +**Command Syntax** + + +resequence [start_num +[inc_num]] + +******Parameters** + +- **start_num** sequence number assigned to the first +rule. Default is ***10***. + +- **inc_num** numeric interval between consecutive +rules. Default is ***10***. + + +**Example** + +The **resequence** +command renumbers the rules in **CMAP1**, starting the first +command at number **100** and incrementing subsequent lines by +**20**. +``` +`switch(config)# **class-map type pbr match-any CMAP1** +switch(config-cmap-CMAP1)# **show active** +class-map type pbr match-any CMAP1 +10 match ip access-group group1 +20 match ip access-group group2 +30 match ip access-group group3 +switch(config-cmap-CMAP1)# **resequence 100 20** +switch(config-cmap-CMAP1)# **exit** +switch(config)# **class-map type pbr match-any CMAP1** +switch(config-cmap-CMAP1)# **show active** +class-map type pbr match-any CMAP1 +100 match ip access-group group1 +120 match ip access-group group2 +140 match ip access-group group3` +``` + + +### resequence (policy-map +(pbr)) + + +The **resequence** command assigns sequence numbers to rules in +the configuration mode policy map. Command parameters specify the number of the +first rule and the numeric interval between consecutive rules. Once changed, rule +numbers persist unless changed again using the **resequence** +command, but the interval used for numbering new rules reverts to +**10** on the exiting ***policy-map (pbr)*** +configuration mode. + + +Maximum rule sequence number is **4294967295**. + + +**Command Mode** + + +Policy-Map (PBR) Configuration accessed through **policy-map type +pbr** command + + +**Command Syntax** + + +resequence [start_num +[inc_num]] + + +**Parameters** + +- **start_num** sequence number assigned to the first +rule. Default is **10**. + +- **inc_num** numeric interval between consecutive +rules. Default is **10**. + + +**Example** + + +The **resequence** command renumbers the rules in +**PMAP1**, starting the first command at number +**100** and incrementing subsequent lines by +**20**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **show active** +policy-map type pbr PMAP1 +10 class CMAP1 +set nexthop 172.16.1.1 +20 class CMAP2 +set nexthop 172.16.2.2 +30 class CMAP3 +set nexthop 172.16.3.3 +switch(config-pmap-PMAP1)# **resequence 100 20** +switch(config-pmap-PMAP1)# **exit** +switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **show active** +class-map type pbr PMAP1 +100 class CMAP1 +set nexthop 172.16.1.1 +120 class CMAP2 +set nexthop 172.16.2.2 +140 class CMAP3 +set nexthop 172.16.3.3 +switch(config-pmap-PMAP1)#` +``` + + +### service-policy type pbr +(Interface mode) + + +The **service-policy pbr** command applies the specified +Policy-Based Routing (PBR) policy map to the configuration mode interface. A PBR +policy map is a data structure that consists of class maps that identify specific +packets and the next hops for those packets. Policy maps are applied to Ethernet or +port channel interfaces to manage traffic. Only one service policy is supported per +interface. + + +The **no service-policy pbr** and +**default service-policy pbr** commands remove the +service policy assignment from the configuration mode interface by deleting the +corresponding **service-policy pbr** command from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-Channel Configuration + + +Interface-VLAN Configuration + + +**Command +Syntax** + + +service-policy type pbr +TRAFFIC_DIRECTION +map_name + + +no service-policy type pbr +TRAFFIC_DIRECTION +map_name + + +default service-policy type pbr +TRAFFIC_DIRECTION +map_name + +**Parameters** + +- **TRAFFIC_DIRECTION** IP address or peer group name. +Values include: + +- **input** Policy map applies to inbound packet streams. + +- **map_name** Name of policy map. + + +**Guidelines** + + +A policy map that is attached to a port channel interface takes precedence for +member interfaces of the port channel over their individual interface Ethernet +configuration. Members that are removed from a port channel revert to the policy map +implementation specified by its interface Ethernet +configuration. + + +**Related Commands** + +policy-map type +pbr +**Example** + +This command applies the PBR policy map +**PMAP1** to **interface Ethernet +8**. +``` +`switch# **config** +switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **service-policy type pbr input PMAP1** +switch(config-if-Et8)#` +``` + + +### service-policy type qos +(Interface mode) + + +The **service-policy** command applies a specified policy +map to the configuration mode interface. A policy map is a data structure that +identifies data traffic through class maps, then specifies actions to classify the +traffic (by setting the traffic class), mark the traffic (by setting the cos and +dscp values), and police the traffic (by setting the police rate) through data +packet field modifications. + + +The **no service-policy** +and **default service-policy** commands remove the service +policy assignment from the configuration mode interface by deleting the +corresponding **service-policy** command from +***running-config***. + + +**Command Mode** + + +Interface-Ethernet Configuration + + +Interface-Port-Channel Configuration + + +Interface-VLAN Configuration + + +**Command Syntax** + + +service-policy [type qos] +TRAFFIC_DIRECTION +map_name + + +no service-policy [type qos] +TRAFFIC_DIRECTION +map_name + + +default service-policy [type +qos] +TRAFFIC_DIRECTION +map_name + +**Parameters** + +- **type qos** Parameter has no functional effect. + +- **TRAFFIC_DIRECTION** Direction of data stream to which +command applies. Options include: + +- **input** Policy map applies to inbound packet +streams. + +- **map_name** Name of policy map. + + +**Guidelines** + + +A policy map that is attached to a port channel interface takes precedence for +member interfaces of the port channel over their individual interface Ethernet +configuration. Members that are removed from a port channel revert to the policy map +implementation specified by its interface Ethernet configuration. + + +DCS-7500E +and DCS-7280E limitations: + + +- A maximum of **31** QoS service policies +per chip may be applied on L3 interfaces. + +- Applying different QoS service policies to an SVI and its member +interfaces causes unpredictable behavior. + +- When an SVI on which QoS service policies are applied experiences +partial failure due to limited hardware resources, a forwarding agent restart +causes unpredictable behavior. + +- Policy-map programming may fail when QoS service policies are +applied on two SVIs if an event causes a member interface to switch membership +from one to the other. To change the VLAN membership of an interface in this +case, remove the interface from one VLAN before adding it to the other. + +- Outgoing COS rewrite is not supported. + +- QoS policy-map counters are not supported. + + +DCS-7010, DCS-7050, DCS-7050X, DCS-7250X, and DCS-7300X limitations: + + +- When the same policy map is applied to multiple SVIs, TCAM +resources are not shared. + +- A policy map applied to an SVI results in TCAM allocation on all +chips whether SVI members are present or not. + +- Applying different QoS service policies to an SVI and its member +interfaces causes unpredictable behavior. + + +**Related Commands** + +policy-map type +quality-of-service +**Example** + +This command applies the +**PMAP-1** policy map to **interface +ethernet +8**. +``` +`switch# **config** +switch(config)# **interface ethernet 8** +switch(config-if-Et8)# **show active** +switch(config-if-Et8)# **service-policy input PMAP-1** +switch(config-if-Et8)# **show active** +interface Ethernet8 + service-policy type qos input PMAP-1 +switch(config-if-Et8)#` +``` + + +### set (policy-map-class +(qos)FM6000) + + +The **set** command specifies traffic resolution methods for +traffic defined by its associated class map in its configuration mode policy map +class. Three set statements are available for each class: + + +- **cos** Sets the Layer 2 class of service +field. + +- **dscp** Sets the differentiated services +code point value in the type of service (ToS) byte. + +- **traffic-class** Sets the traffic class +queue for data packets. + + +Each type of set command can be assigned to a class, allowing for the simultaneous +modification of both (cos, dscp) fields and assignment to a traffic class. + + +The **no set** and **default set** +commands remove the specified data action from the class map by deleting the +associated **set** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (qos) configuration + + +accessed through **class (policy-map (qos) FM6000)** +command. + + +**Command Syntax** + + +set +QOS_TYPE +value + + +no set +QOS_TYPE + + +default set +QOS_TYPE + + +**Parameters** + +- **QOS_TYPE** Specifies the data stream resolution +method. Valid options include: + +- **cos** Layer 2 class of service field of +outbound packet is modified. + +- **dscp** Differentiated services code point +value in the ToS byte is modified. + +- **traffic-class** Data stream is assigned to a +traffic class queue. + +- **value** Specifies the data field value or traffic +class queue. Valid data range depends on +**QOS_TYPE**. + +- **QOS_TYPE** is **cos** +Value ranges from **0** to +**7**. + +- **QOS_TYPE** is **dscp** +Value ranges from **0** to +**63**. + +- **QOS_TYPE** is +**traffic-class** Value ranges from +**0** to +**7**. + + +**Related Commands** + +- policy-map type +quality-of-service + +- class +(policy-map (qos) FM6000) + + +**Example** + + +These commands configure the policy map to set **CoS field 7** +to data traffic specified by the class map **CMAP-1**, then +assigns that data to traffic class **queue +4**. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set cos 7** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set traffic-class 4** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### set (policy-map-class +(qos)Helix) + + +The **set** command specifies traffic resolution methods for +traffic defined by its associated class map in its configuration mode policy map +class. Three set statements are available for each class: + + +- **cos** Sets the Layer 2 class of service +field. + +- **dscp** Sets the differentiated services +code point value in the type of service (ToS) byte. + +- **traffic-class** Sets the traffic class +queue for data packets. + + +Each type of set command can be assigned to a class, allowing for the simultaneous +modification of both (**cos**, **dscp**) +fields and assignment to a traffic class. + + +The **no set** and **default set** +commands remove the specified data action from the class map by deleting the +associated **set** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (qos) configuration accessed through **class +(policy-map (qos) Helix)** command. + + +**Command Syntax** + + +set +QOS_TYPE +value + + +no set +QOS_TYPE + + +default set +QOS_TYPE + + +**Parameters** + +- **QOS_TYPE** Specifies the data stream resolution +method. Valid options include: + +- **cos** Layer 2 class of service field of +outbound packet is modified. + +- **dscp** Differentiated services code point +value in the ToS byte is modified. + +- **traffic-class** Data stream is assigned to a +traffic class queue. + +- **value** Specifies the data field value or traffic +class queue. Valid data range depends on QOS type. + +- **QOS_TYPE** is **cos** +Value ranges from **0** to +**7**. + +- **QOS_TYPE** is **dscp** +Value ranges from **0** to +**63**. + +- **QOS_TYPE** is +**traffic-class** Value ranges from +**0** to +**7**. + + +**Related Commands** + +- policy-map type +quality-of-service + +- class +(policy-map (qos) Helix) + + +**Example** + + +These commands configure the policy map to set **CoS field 7** +to data traffic specified by the class map **CMAP-1**, then +assigns that data to **traffic class queue +4**. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set cos 7** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set traffic-class 4** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### set (policy-map-class +(qos)Trident II) + + +The **set** command specifies traffic resolution methods for +traffic defined by its associated class map in its configuration mode policy map +class. Three set statements are available for each class: + + +- **cos** Sets the Layer 2 class of service +field. + +- **dscp** Sets the differentiated services +code point value in the type of service (ToS) byte. + +- **traffic-class** Sets the traffic class +queue for data packets. + + +Each type of set command can be assigned to a class, allowing for the simultaneous +modification of both (cos, dscp) fields and assignment to a traffic class. + + +The **no set** and **default set** +commands remove the specified data action from the class map by deleting the +associated **set** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (qos) configuration accessed through **class +(policy-map (qos) Trident)** command. + + +**Command Syntax** + + +set +QOS_TYPE +value + + +no set +QOS_TYPE + + +default set +QOS_TYPE + + +**Parameters** + +- **QOS_TYPE** Specifies the data stream resolution +method. Valid options include: + +- **cos** Layer 2 class of service field of +outbound packet is modified. + +- **dscp** Differentiated services code point +value in the ToS byte is modified. + +- **traffic-class** Data stream is assigned to a +traffic class queue. + +- **value** Specifies the data field value or traffic +class queue. Valid data range depends on QOS type. + +- **QOS_TYPE** is **cos** +Value ranges from **0** to +**7**. + +- **QOS_TYPE** is **dscp** +Value ranges from **0** to +**63**. + +- **QOS_TYPE** is +**traffic-class** Value ranges from +**0** to +**7**. + + +**Related Commands** + +- policy-map type +quality-of-service + +- class +(policy-map (qos) Trident) + + +**Example** + + +These commands configure the policy map to set **CoS field 7** +to data traffic specified by the class map **CMAP-1**, then +assigns that data to **traffic class queue +4**. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set cos 7** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set traffic-class 4** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### set (policy-map-class +(qos)Trident) + + +The **set** command specifies traffic resolution methods for +traffic defined by its associated class map in its configuration mode policy map +class. Three set statements are available for each class: + + +- **cos** Sets the Layer 2 class of service +field. + +- **dscp** Sets the differentiated services +code point value in the type of service (ToS) byte. + +- **traffic-class** Sets the traffic class +queue for data packets. + + +Each type of set command can be assigned to a class, allowing for the simultaneous +modification of both (cos, dscp) fields and assignment to a traffic class. + + +The **no set** and **default set** +commands remove the specified data action from the class map by deleting the +associated **set** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (qos) configuration accessed through **class +(policy-map (qos) Trident)** command. + + +**Command Syntax** + + +set +QOS_TYPE +value + + +no set +QOS_TYPE + + +default set +QOS_TYPE + + +**Parameters** + +- **QOS_TYPE** Specifies the data stream resolution +method. Valid options include: + +- **cos** Layer 2 class of service field of +outbound packet is modified. + +- **dscp** Differentiated services code point +value in the ToS byte is modified. + +- **traffic-class** Data stream is assigned to a +traffic class queue. + +- **value** Specifies the data field value or traffic +class queue. Valid data range depends on QOS type. + +- **QOS_TYPE** is **cos** +Value ranges from **0** to +**7**. + +- **QOS_TYPE** is **dscp** +Value ranges from **0** to +**63**. + +- **QOS_TYPE** is +**traffic-class** Value ranges from +**0** to +**7**. + + +**Related Commands** + +- policy-map type +quality-of-service + +- class +(policy-map (qos) Trident) + + +**Example** + + +These commands configure the policy map to set **CoS field 7** +to data traffic specified by the **class map CMAP-1**, then +assigns that data to **traffic class queue +4**. +``` +`switch(config)# **policy-map type quality-of-service PMAP-1** +switch(config-pmap-PMAP-1)# **class CMAP-1** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set cos 7** +switch(config-pmap-c-PMAP-1-CMAP-1)# **set traffic-class 4** +switch(config-pmap-c-PMAP-1-CMAP-1)#` +``` + + +### set nexthop (policy-map-class + pbr) + + +The **set nexthop** command specifies the next hop for traffic +defined by its associated class map in its configuration mode policy map class. + + +The **no set nexthop** and **default set +nexthop** commands remove the specified action from the class +map by deleting the associated **set nexthop** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (pbr) configuration accessed through **class (policy-map +(pbr))** command. + + +**Command Syntax** + + +set nexthop [recursive] +NH-addr_1 [NH-addr_2] ... + [NH-addr_n] + + +no set nexthop [recursive] + + +default set nexthop [recursive] + + +**Parameters** + +- **recursive** enables recursive next hop resolution. + +- **NH_addr** IP address of next hop. If multiple +addresses are entered, they are treated as an ECMP group. + + +**Related Commands** + +- policy-map type pbr + +- class (policy-map +(pbr)) + + +**Example** + + +These **192.168.5.3** commands configure the policy map +**PMAP1** to set the next hop to for traffic defined +by class map +**CMAP1**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop 192.168.5.3** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +### set nexthop-group (policy-map-class(pbr) + Arad) + + +The **set nexthop-group** command specifies a nexthop group as +the next hop for traffic defined by its associated class map in its configuration +mode policy map class. + + +The **no set nexthop-group** and **default set +nexthop-group** commands remove the specified action from the +class map by deleting the associated **set nexthop-group** +command from ***running-config***. + + +**Command Mode** + + +Policy-map-class (pbr) configuration accessed through **class (policy-map +(pbr))** command. + + +**Command Syntax** + + +set nexthop-group +group_name + + +no set nexthop-group +group_name + + +default set nexthop-group +group_name + + +**Parameters** + + +**group_name** name of ECMP group to use as next hop. + + +**Related Commands** + +- policy-map type pbr + +- class (policy-map +(pbr)) + + +**Example** + + +These commands configure the policy map **PMAP1** to set the +next hop to a nexthop group named **GROUP1** for traffic +defined by class map +**CMAP1**. +``` +`switch(config)# **policy-map type pbr PMAP1** +switch(config-pmap-PMAP1)# **class CMAP1** +switch(config-pmap-c-PMAP1-CMAP1)# **set nexthop-group GROUP1** +switch(config-pmap-c-PMAP1-CMAP1)#` +``` + + +### shape (policy-map-class +(control-plane)Arad) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through **class (policy-map (control-plane) +Arad)** + + +**Command Syntax** + + +**** +**** + + +**** + + +**** + + +**Parameter** + + +**kilobits** Maximum data rate in kilobits per second. Value +ranges from 1 to 10000000. + + +**Related Commands** + +- class (policy-map (control-plane) Arad) places the switch in the +***policy-map-class (control plane)*** configuration +mode. + +- bandwidth (policy-map-class (control-plane) Arad) specifies the +minimum bandwidth for traffic defined by its associated class map in its +configuration mode policy map class. + + +**Static Classes Default Shape** + + +Arad platform switches define these default shapes for static classes: + +- copp-system-bgp 2500 copp-system-l3lpmoverflow 2500 + +- copp-system-bpdu 2500 copp-system-l3slowpath 2500 + +- copp-system-default 2500 copp-system-l3ttl1 2500 + +- copp-system-ipbroadcast 2500 copp-system-lacp 2500 + +- copp-system-ipmc 2500 copp-system-linklocal 2500 + +- copp-system-ipmcmiss 2500 copp-system-lldp 2500 + +- copp-system-ipunicast NO LIMIT copp-system-mlag 2500 + +- copp-system-l2broadcast 2500 copp-system-multicastsnoop 2500 + +- copp-system-l2unicast NO LIMIT copp-system-OspfIsis 2500 + +- copp-system-l3destmiss 2500 copp-system-sflow 2500 + + +**Example** + + +These commands configure the maximum bandwidth of **2000** kbps +for data traffic specified by the class map ***copp-system-lldp*** of the +default ***control-plane policy +map***. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **shape kbps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + + Class-map: copp-system-lldp (match-any) + shape : 2000 kbps + bandwidth : 250 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)#` +``` + + +### shape (policy-map-class +(control-plane)FM6000) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through class (policy-map (control-plane) FM6000). + + +**Command Syntax** + + +**shape pps** +**packets** + + +**no shape** + + +**default shape** + + +**Parameters** + + +**packets** Maximum data rate in packets per second. Value +ranges from 1 to 100000. + + +**Related Commands** + +- class (policy-map (control-plane) FM6000) places the switch in +the ***policy-map-class (control plane)*** configuration mode. + +- bandwidth (policy-map-class (control-plane) FM6000) specifies +the minimum bandwidth for traffic defined by its associated class map in its +configuration mode policy map class. + + +**Static Classes Default Shape** + + +FM6000 platform switches define these default shapes for static classes: + +- copp-system-arp 10000 copp-system-l3slowpath 10000 + +- copp-system-default 8000 copp-system-pim-ptp 10000 + +- copp-system-ipmcrsvd 10000 copp-system-ospf-isis 10000 + +- copp-system-ipmcmiss 10000 copp-system-selfip 5000 + +- copp-system-igmp 10000 copp-system-selfip-tc6to7 5000 + +- copp-system-l2rsvd 10000 copp-system-sflow 25000 + + +**Example** + + +These commands configure a maximum bandwidth of **5000** +packets per second for data traffic specified by the class map +**PMAP-1** in the policy map named +***copp-system-policy***. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class PMAP-1** +switch(config-pmap-c-copp-system-policy-PMAP-1)# **shape pps 5000** +switch(config-pmap-c-copp-system-policy-PMAP-1)#` +``` + + +### shape (policy-map-class +(control-plane)Helix) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through **class (policy-map (control-plane) +Helix)**. + + +**Command Syntax** + + +shape pps +packets + + +no shape + + +default shape + + +**Parameters** + + +**packets** Maximum data rate in packets per second. Value +ranges from **1** to **100000**. + + +**Static Classes Default Shape** + + +Trident platform switches define these default shapes for static classes: + +- copp-system-acllog 10000 copp-system-l3ttl1 10000 + +- copp-system-arp 10000 copp-system-lacp 5000 + +- copp-system-arpresolver 10000 copp-system-lldp 10000 + +- copp-system-bfd 10000 copp-system-mlag 5000 + +- copp-system-bgp 5000 copp-system-OspfIsis 10000 + +- copp-system-bpdu 5000 copp-system-selfip 5000 + +- copp-system-default 8000 copp-system-selfip-tc6to7 5000 + +- copp-system-glean 10000 copp-system-sflow 25024 + +- copp-system-igmp 10000 copp-system-tc3to5 10000 + +- copp-system-ipmcmiss 10000 copp-system-tc6to7 10000 + +- copp-system-ipmcrsvd 10000 copp-system-urm 10000 + +- copp-system-l3destmiss 10000 copp-system-vrrp 5000 + +- copp-system-l3slowpath 10000 + + +**Related Commands** + +- class (policy-map (control-plane) Helix) places the switch in +the ***policy-map-class (control plane)*** configuration mode. + +- bandwidth (policy-map-class (control-plane) Helix) specifies the +minimum bandwidth for traffic defined by its associated class map in its +configuration mode policy map class. + + +**Example** + + +These commands configure a maximum bandwidth of **5000** +packets per second for data traffic specified by the ***copp-system-lldp*** +of the default control-plane policy +map. +``` +`switch(config)# **policy-map type control-plan copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **shape pps 5000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + + Class-map: copp-system-lldp (match-any) + shape : 5000 pps + bandwidth : 500 pps + Out Packets : 305961 + Drop Packets : 0 + +switch(config)#` +``` + + +### shape (policy-map-class +(control-plane)Petra) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through **class (policy-map (control-plane) +Petra)** + + +**Command Syntax** + + +shape kbps +kilobits + + +no shape + + +default shape + + +**Parameter** + + +**kilobits** Maximum data rate in kilobits per second. Value +ranges from **1** to **10000000**. + + +**Related Commands** + +- class (policy-map (control-plane) Petra) places the switch in +policy-map-class (control plane) configuration mode. + +- bandwidth (policy-map-class (control-plane) Petra) specifies the +minimum bandwidth for traffic defined by its associated class map in its +configuration mode policy map class. + + +**Static Classes Default Shape** + + +Petra platform switches define these default shapes for static classes: + +- copp-system-bpdu 2500 copp-system-l3destmiss 2500 + +- copp-system-default 2500 copp-system-l3slowpath 2500 + +- copp-system-igmp 2500 copp-system-l3ttl0 2500 + +- copp-system-ipbroadcast 2500 copp-system-l3ttl1 2500 + +- copp-system-ipmc 2500 copp-system-lacp 2500 + +- copp-system-ipmcmiss 2500 copp-system-lldp 2500 + +- copp-system-ipmcrsvd 2500 copp-system-unicast-arp 2500 + +- copp-system-ipunicast No Limit + + +**Guidelines** + + +Petra does not support all discrete rate values. When a specified discrete value is +not supported, the switch converts the rate to the next highest discrete value that +it supports. The **show** command displays the converted rate +and not the user-configured rate. + + +**Example** + + +These commands configure the maximum bandwidth of **2000** kbps +for data traffic specified by the class map ***copp-system-lldp*** of the +**default control-plane** policy map. Because the +switch does not support the discrete value of **2000** kbps, +it converts the bandwidth up to **2115** +kbps. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **shape kbps 2000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + + Class-map: copp-system-lldp (match-any) + shape : 2115 kbps + bandwidth : 325 kbps + Out Packets : 0 + Drop Packets : 0 + +switch(config)#` +``` + + +### shape (policy-map-class +(control-plane)Trident II) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through **class (policy-map (control-plane) Trident +II)**. + + +**Command Syntax** + + +shape pps +packets + + +no shape + + +default shape + + +**Parameter** + + +**packets** Maximum data rate in packets per second. Value +ranges from **1** to **100000**. + + +**Static Classes Default Shape** + + +Trident II platform switches define these default shapes for static classes: + +- copp-system-acllog 10000 copp-system-l3slowpath 10000 + +- copp-system-arp 10000 copp-system-l3ttl1 10000 + +- copp-system-arpresolver 10000 copp-system-lacp 5000 + +- copp-system-bfd 10000 copp-system-lldp 10000 + +- copp-system-bgp 5000 copp-system-mlag 5000 + +- copp-system-bpdu 5000 copp-system-selfip 5000 + +- copp-system-default 8000 copp-system-selfip-tc6to7 5000 + +- copp-system-glean 10000 copp-system-sflow 25024 + +- copp-system-igmp 10000 copp-system-tc3to5 10000 + +- copp-system-ipmcmiss 10000 copp-system-tc6to7 10000 + +- copp-system-ipmcrsvd 10000 copp-system-urm 10000 + + +**Related Commands** + +- class (policy-map (control-plane) Trident II) places the switch +in ***policy-map-class (control plane)*** configuration mode. + +- bandwidth (policy-map-class (control-plane) Trident II) +specifies the minimum bandwidth for traffic defined by its associated class +map in its configuration mode policy map class. + + +**Example** + + +These commands configure a maximum bandwidth of **5000** +packets per second for data traffic specified by the ***copp-system-lldp*** +of the **default control-plane policy** +map. +``` +`switch(config)# **policy-map type control-plan copp-system-policy** +switch(config-pmap-copp-system-policy)# **class copp-system-lldp** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **shape pps 5000** +switch(config-pmap-c-copp-system-policy-copp-system-lldp)# **exit** +switch(config-pmap-copp-system-policy)# **exit** +switch(config)# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + + Class-map: copp-system-lldp (match-any) + shape : 5000 pps + bandwidth : 500 pps + Out Packets : 305961 + Drop Packets : 0 + +switch(config)#` +``` + + +### shape (policy-map-class +(control-plane)Trident) + + +The **shape** command specifies the maximum bandwidth for +traffic filtered by the configuration mode policy map class. + + +The **no shape** and **default shape** +commands remove the maximum bandwidth restriction for the configuration mode class +by deleting the corresponding **bandwidth** command from +***running-config***. + + +**Command Mode** + + +Policy-map-class (control plane) configuration accessed through **class (policy-map (control-plane) +Trident)**. + + +**Command Syntax** + + +shape pps +packets + + +no shape + + +default shape + + +**Parameters** + + +**packets** Maximum data rate in packets per second. Value +ranges from 1 to 100000. + + +**Static Classes Default Shape** + + +Trident platform switches define these default shapes for static classes: + +- copp-system-arp 10000 copp-system-lldp 10000 + +- copp-system-arpresolver 10000 copp-system-l3destmiss 10000 + +- copp-system-bpdu 5000 copp-system-l3slowpath 10000 + +- copp-system-default 8000 copp-system-l3ttl1 10000 + +- copp-system-glean 10000 copp-system-selfip 5000 + +- copp-system-igmp 10000 copp-system-selfip-tc6to7 5000 + +- copp-system-ipmcmiss 10000 copp-system-sflow 25000 + +- copp-system-ipmcrsvd 10000 copp-system-tc3to5 10000 + +- copp-system-lacp 5000 copp-system-tc6to7 10000 + + +**Related Commands** + + +- class (policy-map (control-plane) Trident) places the switch in +the ***policy-map-class (control plane)*** configuration mode. + +- bandwidth (policy-map-class (control-plane) Trident) specifies +the minimum bandwidth for traffic defined by its associated class map in its +configuration mode policy map class. + + +**Example** + + +These commands configure a maximum bandwidth of **5000** +packets per second for data traffic specified by the class map +**PMAP-1** in the policy map named +***copp-system-policy***. +``` +`switch(config)# **policy-map type copp copp-system-policy** +switch(config-pmap-copp-system-policy)# **class PMAP-1** +switch(config-pmap-c-copp-system-policy-PMAP-1)# **shape pps 5000** +switch(config-pmap-c-copp-system-policy-PMAP-1)` +``` + + +### show class-map type control-plane + + +The **show class-map** command displays contents of available +control-plane class maps. **Control-plane** class maps can be added to the +***copp-system-policy*** policy map. ***Control-plane*** +class maps can be static class maps defined by the system or dynamic maps created in +***class-map*** configuration mode. + + +Dynamic class maps are composed of statements that match IPv4 access control lists. +Static class maps are defined by the switch and cannot be altered. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show class-map type control-plane +[MAP_NAME]** + + +**Parameters** + + +MAP_NAME Name of class map displayed by the command. Options +include: + +- **no parameter** Command displays all control plane +class maps. + +- **name_text** Command displays specified control-plane +class maps. + + +**Related Command** + +- **show class-map** command displays QoS class +maps. + +- show class-map type +qos displays control plane class maps. + + +**Example** + + +This command displays the available control plane class +maps. +``` +`switch# **show class-map type control-plane** + Class-map: CM-CP1 (match-any) + Match: ip access-group name LIST-CP1 + Class-map: copp-system-acllog (match-any) + Class-map: copp-system-arp (match-any) + Class-map: copp-system-arpresolver (match-any) + Class-map: copp-system-bpdu (match-any) + Class-map: copp-system-glean (match-any) + Class-map: copp-system-igmp (match-any) + Class-map: copp-system-ipmcmiss (match-any) + Class-map: copp-system-ipmcrsvd (match-any) + Class-map: copp-system-l3destmiss (match-any) + Class-map: copp-system-l3slowpath (match-any) + Class-map: copp-system-l3ttl1 (match-any) + Class-map: copp-system-lacp (match-any) + Class-map: copp-system-lldp (match-any) + Class-map: copp-system-selfip (match-any) + Class-map: copp-system-selfip-tc6to7 (match-any) + Class-map: copp-system-sflow (match-any) + Class-map: copp-system-tc3to5 (match-any) + Class-map: copp-system-tc6to7 (match-any) +switch>` +``` + + +### show class-map type pbr + + +The **show class-map** command displays contents of all +available Policy-Based Routing (PBR) class maps, or of a specified PBR class map. +PBR class maps are used by PBR policy maps. PBR class maps are dynamic maps that are +created in class-map-configuration mode. Dynamic class maps are composed of +statements that match IPv4 or IPv6 access control lists. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show class-map type pbr [map_name] + + +**Parameters** + + +**map_name** Name of class map displayed by the command. If no +parameter is entered, command show all available PBR class maps. + + +**Related Command** + + +show policy-map type pbr +displays PBR policy maps. + + +**Example** + + +This command displays the contents of the PBR class map +**CMAP1**. +``` +`switch# **show class-map type pbr CMAP1** + Class-map: CMAP1 (match-any) + Match: 10 ip access-group PBRgroup1 + Match: 20 ip access-group PBRgroup2 + Match: 30 ip access-group PBRgroup3 +switch>` +``` + + +### show class-map type qos + + +The **show class-map**command displays contents of all +available QoS class maps. QoS class maps are used by QoS policy maps. QoS class maps +are dynamic maps that are created in ***class-map*** configuration mode. +Dynamic class maps are composed of statements that match IPv4 or IPv6 access control +lists. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show class-map type qos [MAP_NAME] + + +**Parameters** + + +MAP_NAME Name of class map displayed by the command. + +- **no parameter** Command displays all QoS class maps. + +- **name_text** Command displays specified QoS class +maps. + + +**show class-map** and **show class-map type +qos** are identical commands. + + +**Related Command** + + +show class-map type +control-plane displays control plane class maps. + + +**Example** + + +This command displays the available QoS class +maps. +``` +`switch# **show class-map type qos** + Class-map: CM-Q1 (match-any) + Match: ipv6 access-group name LIST-1 + Class-map: CM-Q2 (match-any) + Match: ip access-group name LIST-2` +``` + + +### show policy-map copp + + +The **show policy-map copp** command displays contents of the +control-plane policy map. Control-plane policy maps are applied to the control +plane, and copp-system-policy is the only supported policy map. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +**show policy-map copp +copp-system-policy** + + +**Example** + + +This command displays the contents and throughput of the policy map applied to the +control +plane. +``` +`switch# **show policy-map copp copp-system-policy** +Service-policy input: copp-system-policy + Number of units programmed: 1 + Hardware programming status: Successful + + Class-map: copp-system-bpdu (match-any) + shape : 5000 pps + bandwidth : 5000 pps + Out Packets : 2 + Drop Packets : 0 + + Class-map: copp-system-lacp (match-any) + shape : 5000 pps + bandwidth : 5000 pps + Out Packets : 0 + Drop Packets : 0 + +switch>` +``` + + +### show policy-map interface +type qos counters + + +The **show policy-map interface** command displays the quantity +of packets that are filtered by ACLs applied to a interface. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map [INTERFACE_NAME][type +qos][TRAFFIC] counters + + +**Parameters** + +- **INTERFACE_NAME** Filters policy map list by +interfaces. Options include: + +- **no parameter** Displays data for all configured +interfaces. + +- **interface ethernet** +**e_range** Ethernet ports for which command +displays policy maps. + +- **interface port-channel** +**p_range** Port channels for which command +displays policy maps. + +- **TRAFFIC** Filters policy maps by the traffic they +manage. Options include: + +- **no parameter** Policy maps that manage interfaces ingress +traffic (same as **input** option). + +- **input** Policy maps that manage interfaces +ingress traffic. + + +**Example** + + +This command displays the policy maps applied to interfaces Ethernet +**7** and +**8**. +``` +`switch# **show policy-map interface ethernet 7-8** +Service-policy input: PMAP-1 + Hardware programming status: Successful + + Class-map: cmap-1 (match-any) + Match: ip access-group name LIST-2 + set cos 6 + + Class-map: class-default (match-any) + +Service-policy input: PMAP-2 + Hardware programming status: Successful + + Class-map: cmap-2 (match-any) + Match: ip access-group name LIST-2 + set dscp 10 + + Class-map: class-default (match-any)` +``` + + +### show policy-map interface +type qos + + +The **show policy-map interface** command displays contents of +the policy maps applied to specified interfaces or to the control plane. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map interface +INTERFACE_NAME [type qos] +[TRAFFIC] + + +**Parameters** + + +- **INTERFACE_NAME** Filters policy map list by +interfaces. Options include: + +- **ethernet** +**e_range** Ethernet ports for which command +displays policy maps. + +- **port-channel** +**p_range** Port channels for which command +displays policy maps. + +- **TRAFFIC** Filters policy maps by the traffic they +manage. Options include: + +- **no parameter** Policy maps that manage interfaces ingress +traffic (same as **input** option). + +- **input** Policy maps that manage interfaces +ingress traffic. + + +**Example** + + +This command displays the policy maps applied to interfaces Ethernet +**7** and +**8**. +``` +`switch# **show policy-map interface ethernet 7-8** +Service-policy input: PMAP-1 + Hardware programming status: Successful + + Class-map: cmap-1 (match-any) + Match: ip access-group name LIST-2 + set cos 6 + + Class-map: class-default (match-any) + +Service-policy input: PMAP-2 + Hardware programming status: Successful + + Class-map: cmap-2 (match-any) + Match: ip access-group name LIST-2 + set dscp 10 + + Class-map: class-default (match-any)` +``` + + +### show policy-map type +copp + + +The **show policy-map type copp** command displays contents of +control plane policy maps. Control-plane policy maps are applied to the control +plane; copp-system-policy is the only supported policy map. + + +Command options filter the output to display +contents of all policy maps, contents of a specified policy map, or contents +of a single class map within a specified policy map. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map type +copp +copp-system-policy [CMAP_NAME] + + +**Parameters** + + +**CMAP_NAME** Name of class map displayed by the command. + +- **no parameter** Command displays all class maps in specified policy +map. + +- **class_name** Command displays specified class +map. + + +**Example** + + +This command displays the contents of the copp-system-bpdu class map in the +copp-system-policy policy +maps. +``` +`switch# **show policy-map type copp copp-system-policy class copp-system-bpdu** + Class-map: copp-system-bpdu (match-any) + shape : 5000 pps + bandwidth : 5000 pps` +``` + + +### show policy-map type +pbr + + +The **show policy-map pbr** command displays contents of +Policy-Based Routing (PBR) policy maps. PBR policy maps are applied to Ethernet +interfaces, port channel interfaces or switch virtual interfaces (SVIs). + + +Command options filter the output to either +display contents of all policy maps, contents of a specified policy map, +or summary contents of all or a specified policy map. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map type pbr +[PMAP_NAME][DATA_LEVEL] + + +**Parameters** + +- **PMAP_NAME** Name of policy map displayed by the +command. + +- **no parameter** Command displays all policy maps. + +- **policy_map** Command displays specified +policy map. + +- **DATA_LEVEL** Type of information the command displays. +Values include: + +- **no parameter** Command displays all class maps in specified +policy map. + +- **summary** Command displays summary data for +the specified policy map. + + +**Example** + + +This command displays the contents of all PBR policy maps in +***running-config***. +``` +`switch# **show policy-map type pbr** +Service policy PMAP1 +Configured on: +Applied on: +10: Class-map: CMAP1 (match-any) +Match: 10 ip access-group PBRgroup1 +Match: 20 ip access-group PBRgroup2 +Match: 30 ip access-group PBRgroup3 +Configured actions: set nexthop 172.16.10.12 +20: Class-map: CMAP2 (match-any) +Match: 10 ip access-group PBRgroup1 +Match: 10 ip access-group PBRgroup4 +Match: 20 ip access-group PBRgroup5 +Configured actions: set nexthop 192.168.15.15` +``` + + +### show policy-map type +qos counters + + +The **show policy-map counters** command displays the quantity +of packets that are filtered by the ACLs that comprise a specified QoS policy +map. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map type qos +pmap_name [TRAFFIC] counters +[INFO_LEVEL] + + +**Parameters** + +- **pmap_name** Name of policy map displayed by the +command. + +- **TRAFFIC** Filters policy maps by the traffic they +manage. Options include: + +- **no parameter** Policy maps that manage interfaces ingress +traffic (same as **input** option). + +- **input** Policy maps that manage interfaces +ingress traffic. + +- **INFO_LEVEL** amount of information that is displayed. +Options include: + +- **no parameter** displays summarized information about the policy +map. + +- **detail** displays detailed policy map +information. + + +**Example** + + +Use the following command to display counters for policy map, p1, enabled for per-interface +counters: + + +``` +`switch# **show policy-map p1 counters** +Service-policy input: p1 + Hardware programming status: Successful + + Class-map: c1 (match-any) + Match: vlan 100 0xfff + set dscp 7 + Matched Packets: 7 + Interface: Ethernet1/1 + Matched Packets: 1 + Interface: Ethernet2/1 + Matched Packets: 2 + Interface: Ethernet3/1 + Matched Packets: 4 + + Class-map: c2 (match-any) + Match: ip access-group name acl1 + set cos 6 + Matched Packets: 6 + Interface: Ethernet1/1 + Matched Packets: 2 + Interface: Ethernet2/1 + Matched Packets: 0 + Interface: Ethernet3/1 + Matched Packets: 4 + + Class-map: class-default (match-any) + Matched Packets: 0 + Interface: Ethernet1/1 + Matched Packets: 0 + Interface: Ethernet2/1 + Matched Packets: 0 + Interface: Ethernet3/1 + Matched Packets: 0` +``` + + +### show policy-map type +qos + + +The **show policy-map qos** command displays contents of QoS +policy maps. QoS policy maps are applied to Ethernet or port channel interfaces. + + +Command options filter the output to either +display contents of all policy maps, contents of a specified policy map, +or contents of a single class map within a specified policy map. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show policy-map [type +qos][PMAP_NAME [CMAP_NAME]] + + +**Parameters** + + +- **PMAP_NAME** Name of policy map displayed by the +command. + +- **no parameter** Command displays all policy maps. + +- **policy_map** Command displays specified +policy map. + +- **CMAP_NAME** Name of class map displayed by the +command. This option is available only when the command includes a policy +map name. + +- **no parameter** Command displays all class maps in specified +policy map. + +- **class_name** Command displays specified +class map. + + +**Example** + + +This command displays the contents of all QoS policy maps in +***running-config***. +``` +`switch# **show policy-map type qos** +Service-policy input: PMAP-1 + Hardware programming status: Successful + + Class-map: xeter (match-any) + Match: ip access-group name LIST-1 + set cos 6 + + Class-map: class-default (match-any) + +Service-policy PMAP-2 + + Class-map: class-default (match-any)` +``` + + +### show traffic-policy + + +The **show traffic-policy** command displays traffic policy +information on the interface. + + +**Command Mode** + + +EXEC + + +**Command Syntax** + + +show traffic-policy +NAME +interface + + +show traffic-policy +interface [DETAILS] + + +**Parameters** + + +**DETAILS**   Details requested. Options include: + +- **summary**   Display summary information about the +policy. + +- **errors** Display all configured remote +grantees, associated profile name and latest update. + +- **details** Display all interfaces on which +the policy has been configured. + + +**Examples** + +- This command displays the summary information configured on the switch +interfaces. +``` +`switch(config-traffic-policies)# **show traffic-policy interface summary** +Traffic policy samplePolicy + Configured on interfaces: Ethernet1/1, Ethernet2/1, Ethernet3/1, ... + Applied on interfaces for IPv4 traffic: Ethernet1/1, Ethernet2/1, Ethernet3/1, ... + Applied on interfaces for IPv6 traffic: + Total number of rules configured: 3 + match SIMPLE ipv4 + match ipv4-all-default ipv4 + match ipv6-all-default ipv6` +``` + +- This command displays information about the traffic policy named +**samplePolicy**. +``` +`switch(config-traffic-policies)# **show traffic-policy samplePolicy interface** +Traffic policy samplePolicy + Configured on interfaces: Ethernet1/1, Ethernet2/1, Ethernet3/1, ... + Applied on interfaces for IPv4 traffic: Ethernet1/1, Ethernet2/1, Ethernet3/1, ... + Applied on interfaces for IPv6 traffic: + Total number of rules configured: 3 + match SIMPLE ipv4 + Source prefix: 192.0.2.0/24 + 198.51.100.0/24 + Destination prefix: 203.0.113.0/24 + Protocol: tcp + Source port: 50-100 + 110-200 + Actions: Drop + match ipv4-all-default ipv4 + match ipv6-all-default ipv6` +``` + +- This command displays all interfaces on which +**samplePolicy** has been +configured. +``` +`switch(config-traffic-policies)# **show traffic-policy interface detail** +Traffic policy samplePolicy + Configured on interfaces: Ethernet1/1, Ethernet2/1, Ethernet3/1, Ethernet4/1 + Applied on interfaces for IPv4 traffic: Ethernet1/1, Ethernet2/1, Ethernet3/1, Ethernet4/1 + Applied on interfaces for IPv6 traffic: + Total number of rules configured: 3 + match SIMPLE ipv4 + Source prefix: 192.0.2.0/24 + 198.51.100.0/24 + Destination prefix: 203.0.113.0/24 + Protocol: tcp + Source port: 50-100 + 110-200 + Actions: Drop + match ipv4-all-default ipv4 + match ipv6-all-default ipv6` +``` + +- This command displays installation errors for a match statement. The +example has no +errors. +``` +`switch(config-traffic-policies)# **show traffic-policy interface errors** +Traffic policy samplePolicy + Failed on interface for IPv4 traffic: + Failed on interface for IPv6 traffic:` +``` diff --git a/docs/ashburn-validator-relay.md b/docs/ashburn-validator-relay.md new file mode 100644 index 00000000..77e46c6d --- /dev/null +++ b/docs/ashburn-validator-relay.md @@ -0,0 +1,275 @@ +# Ashburn Validator Relay — Full Traffic Redirect + +## Overview + +All validator traffic (gossip, repair, TVU, TPU) enters and exits from +`137.239.194.65` (laconic-was-sw01, Ashburn). Peers see the validator as an +Ashburn node. This improves repair peer count and slot catchup rate by reducing +RTT to the TeraSwitch/Pittsburgh cluster from ~30ms (direct Miami) to ~5ms +(Ashburn). + +Supersedes the previous TVU-only shred relay (see `tvu-shred-relay.md`). + +## Architecture + +``` + OUTBOUND (validator → peers) +agave-validator (kind pod, ports 8001, 9000-9025) + ↓ Docker bridge → host FORWARD chain +biscayne host (186.233.184.235) + ↓ mangle PREROUTING: fwmark 100 on sport 8001,9000-9025 from 172.20.0.0/16 + ↓ nat POSTROUTING: SNAT → src 137.239.194.65 + ↓ policy route: fwmark 100 → table ashburn → via 169.254.7.6 dev doublezero0 +laconic-mia-sw01 (209.42.167.133, Miami) + ↓ traffic-policy VALIDATOR-OUTBOUND: src 137.239.194.65 → nexthop 172.16.1.188 + ↓ backbone Et4/1 (25.4ms) +laconic-was-sw01 Et4/1 (Ashburn) + ↓ default route via 64.92.84.80 out Et1/1 +Internet (peers see src 137.239.194.65) + + INBOUND (peers → validator) +Solana peers → 137.239.194.65:8001,9000-9025 + ↓ internet routing to was-sw01 +laconic-was-sw01 Et1/1 (Ashburn) + ↓ traffic-policy VALIDATOR-RELAY: ASIC redirect, line rate + ↓ nexthop 172.16.1.189 via Et4/1 backbone (25.4ms) +laconic-mia-sw01 Et4/1 (Miami) + ↓ L3 forward → biscayne via doublezero0 GRE or ISP routing +biscayne (186.233.184.235) + ↓ nat PREROUTING: DNAT dst 137.239.194.65:* → 172.20.0.2:* (kind node) + ↓ Docker bridge → validator pod +agave-validator +``` + +RPC traffic (port 8899) is NOT relayed — clients connect directly to biscayne. + +## Switch Config: laconic-was-sw01 + +SSH: `install@137.239.200.198` + +### Pre-change + +``` +configure checkpoint save pre-validator-relay +``` + +Rollback: `rollback running-config checkpoint pre-validator-relay` then `write memory`. + +### Config session with auto-revert + +``` +configure session validator-relay + +! Loopback for 137.239.194.65 (do NOT touch Loopback100 which has .64) +interface Loopback101 + ip address 137.239.194.65/32 + +! ACL covering all validator ports +ip access-list VALIDATOR-RELAY-ACL + 10 permit udp any any eq 8001 + 20 permit udp any any range 9000 9025 + 30 permit tcp any any eq 8001 + +! Traffic-policy: ASIC redirect to backbone (mia-sw01) +traffic-policy VALIDATOR-RELAY + match VALIDATOR-RELAY-ACL + set nexthop 172.16.1.189 + +! Replace old SHRED-RELAY on Et1/1 +interface Ethernet1/1 + no traffic-policy input SHRED-RELAY + traffic-policy input VALIDATOR-RELAY + +! system-rule overriding-action redirect (already present from SHRED-RELAY) + +show session-config diffs +commit timer 00:05:00 +``` + +After verification: `configure session validator-relay commit` then `write memory`. + +### Cleanup (after stable) + +Old SHRED-RELAY policy and ACL can be removed once VALIDATOR-RELAY is confirmed: + +``` +configure session cleanup-shred-relay +no traffic-policy SHRED-RELAY +no ip access-list SHRED-RELAY-ACL +show session-config diffs +commit +write memory +``` + +## Switch Config: laconic-mia-sw01 + +### Pre-flight checks + +Before applying config, verify: + +1. Which EOS interface terminates the doublezero0 GRE from biscayne + (endpoint 209.42.167.133). Check with `show interfaces tunnel` or + `show ip interface brief | include Tunnel`. + +2. Whether `system-rule overriding-action redirect` is already configured. + Check with `show running-config | include system-rule`. + +3. Whether EOS traffic-policy works on tunnel interfaces. If not, apply on + the physical interface where GRE packets arrive (likely Et facing + biscayne's ISP network or the DZ infrastructure). + +### Config session + +``` +configure checkpoint save pre-validator-outbound + +configure session validator-outbound + +! ACL matching outbound validator traffic (source = Ashburn IP) +ip access-list VALIDATOR-OUTBOUND-ACL + 10 permit ip 137.239.194.65/32 any + +! Redirect to was-sw01 via backbone +traffic-policy VALIDATOR-OUTBOUND + match VALIDATOR-OUTBOUND-ACL + set nexthop 172.16.1.188 + +! Apply on the interface where biscayne GRE traffic arrives +! Replace Tunnel with the actual interface from pre-flight check #1 +interface Tunnel + traffic-policy input VALIDATOR-OUTBOUND + +! Add system-rule if not already present (pre-flight check #2) +system-rule overriding-action redirect + +show session-config diffs +commit timer 00:05:00 +``` + +After verification: commit + `write memory`. + +## Host Config: biscayne + +Automated via ansible playbook `playbooks/ashburn-validator-relay.yml`. + +### Manual equivalent + +```bash +# 1. Accept packets destined for 137.239.194.65 +sudo ip addr add 137.239.194.65/32 dev lo + +# 2. Inbound DNAT to kind node (172.20.0.2) +sudo iptables -t nat -A PREROUTING -p udp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 +sudo iptables -t nat -A PREROUTING -p tcp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 +sudo iptables -t nat -A PREROUTING -p udp -d 137.239.194.65 --dport 9000:9025 \ + -j DNAT --to-destination 172.20.0.2 + +# 3. Outbound: mark validator traffic +sudo iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p udp --sport 8001 \ + -j MARK --set-mark 100 +sudo iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p udp --sport 9000:9025 \ + -j MARK --set-mark 100 +sudo iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p tcp --sport 8001 \ + -j MARK --set-mark 100 + +# 4. Outbound: SNAT to Ashburn IP (INSERT before Docker MASQUERADE) +sudo iptables -t nat -I POSTROUTING 1 -m mark --mark 100 \ + -j SNAT --to-source 137.239.194.65 + +# 5. Policy routing table +echo "100 ashburn" | sudo tee -a /etc/iproute2/rt_tables +sudo ip rule add fwmark 100 table ashburn +sudo ip route add default via 169.254.7.6 dev doublezero0 table ashburn + +# 6. Persist +sudo netfilter-persistent save +# ip rule + ip route persist via /etc/network/if-up.d/ashburn-routing +``` + +### Docker NAT port preservation + +**Must verify before going live:** Docker masquerade must preserve source ports +for kind's hostNetwork pods. If Docker rewrites the source port, the mangle +PREROUTING match on `--sport 8001,9000-9025` will miss traffic. + +Test: `tcpdump -i br-cf46a62ab5b2 -nn 'udp src port 8001'` — if you see +packets with sport 8001 from 172.20.0.2, port preservation works. + +If Docker does NOT preserve ports, the mark must be set inside the kind node +container (on the pod's veth) rather than on the host. + +## Execution Order + +1. **was-sw01**: checkpoint → config session with 5min auto-revert → verify counters → commit +2. **biscayne**: add 137.239.194.65/32 to lo, add inbound DNAT rules +3. **Verify inbound**: `ping 137.239.194.65` from external host, check DNAT counters +4. **mia-sw01**: pre-flight checks → config session with 5min auto-revert → commit +5. **biscayne**: add outbound fwmark + policy routing + SNAT rules +6. **Test outbound**: from biscayne, send UDP from port 8001, verify src 137.239.194.65 on was-sw01 +7. **Verify**: traffic-policy counters on both switches, iptables hit counts on biscayne +8. **Restart validator** if needed (gossip should auto-refresh, but restart ensures clean state) +9. **was-sw01 + mia-sw01**: `write memory` to persist +10. **Cleanup**: remove old SHRED-RELAY and 64.92.84.81:20000 DNAT after stable + +## Verification + +1. `show traffic-policy counters` on was-sw01 — VALIDATOR-RELAY-ACL matches +2. `show traffic-policy counters` on mia-sw01 — VALIDATOR-OUTBOUND-ACL matches +3. `sudo iptables -t nat -L -v -n` on biscayne — DNAT and SNAT hit counts +4. `sudo iptables -t mangle -L -v -n` on biscayne — fwmark hit counts +5. `ip rule show` on biscayne — fwmark 100 lookup ashburn +6. Validator gossip ContactInfo shows 137.239.194.65 for ALL addresses (gossip, repair, TVU, TPU) +7. Repair peer count increases (target: 20+ peers) +8. Slot catchup rate improves from ~0.9 toward ~2.5 slots/sec +9. `traceroute --sport=8001 ` from biscayne routes via doublezero0/was-sw01 + +## Rollback + +### biscayne + +```bash +sudo ip addr del 137.239.194.65/32 dev lo +sudo iptables -t nat -D PREROUTING -p udp -d 137.239.194.65 --dport 8001 -j DNAT --to-destination 172.20.0.2:8001 +sudo iptables -t nat -D PREROUTING -p tcp -d 137.239.194.65 --dport 8001 -j DNAT --to-destination 172.20.0.2:8001 +sudo iptables -t nat -D PREROUTING -p udp -d 137.239.194.65 --dport 9000:9025 -j DNAT --to-destination 172.20.0.2 +sudo iptables -t mangle -D PREROUTING -s 172.20.0.0/16 -p udp --sport 8001 -j MARK --set-mark 100 +sudo iptables -t mangle -D PREROUTING -s 172.20.0.0/16 -p udp --sport 9000:9025 -j MARK --set-mark 100 +sudo iptables -t mangle -D PREROUTING -s 172.20.0.0/16 -p tcp --sport 8001 -j MARK --set-mark 100 +sudo iptables -t nat -D POSTROUTING -m mark --mark 100 -j SNAT --to-source 137.239.194.65 +sudo ip rule del fwmark 100 table ashburn +sudo ip route del default table ashburn +sudo netfilter-persistent save +``` + +### was-sw01 + +``` +rollback running-config checkpoint pre-validator-relay +write memory +``` + +### mia-sw01 + +``` +rollback running-config checkpoint pre-validator-outbound +write memory +``` + +## Key Details + +| Item | Value | +|------|-------| +| Ashburn relay IP | `137.239.194.65` (Loopback101 on was-sw01) | +| Ashburn LAN block | `137.239.194.64/29` on was-sw01 Et1/1 | +| Biscayne IP | `186.233.184.235` | +| Kind node IP | `172.20.0.2` (Docker bridge br-cf46a62ab5b2) | +| Validator ports | 8001 (gossip), 9000-9025 (TVU/repair/TPU) | +| Excluded ports | 8899 (RPC), 8900 (WebSocket) — direct to biscayne | +| GRE tunnel | doublezero0: 169.254.7.7 ↔ 169.254.7.6, remote 209.42.167.133 | +| Backbone | was-sw01 Et4/1 172.16.1.188/31 ↔ mia-sw01 Et4/1 172.16.1.189/31 | +| Policy routing table | 100 ashburn | +| Fwmark | 100 | +| was-sw01 SSH | `install@137.239.200.198` | +| EOS version | 4.34.0F | diff --git a/docs/blue-green-upgrades.md b/docs/blue-green-upgrades.md new file mode 100644 index 00000000..d35785ed --- /dev/null +++ b/docs/blue-green-upgrades.md @@ -0,0 +1,416 @@ +# Blue-Green Upgrades for Biscayne + +Zero-downtime upgrade procedures for the agave-stack deployment on biscayne. +Uses ZFS clones for instant data duplication, Caddy health-check routing for +traffic shifting, and k8s native sidecars for independent container upgrades. + +## Architecture + +``` + Caddy ingress (biscayne.vaasl.io) + ├── upstream A: localhost:8899 ← health: /health + └── upstream B: localhost:8897 ← health: /health + │ + ┌─────────────────┴──────────────────┐ + │ kind cluster │ + │ │ + │ Deployment A Deployment B │ + │ ┌─────────────┐ ┌─────────────┐ │ + │ │ agave :8899 │ │ agave :8897 │ │ + │ │ doublezerod │ │ doublezerod │ │ + │ └──────┬──────┘ └──────┬──────┘ │ + └─────────┼─────────────────┼─────────┘ + │ │ + ZFS dataset A ZFS clone B + (original) (instant CoW copy) +``` + +Both deployments run in the same kind cluster with `hostNetwork: true`. +Caddy active health checks route traffic to whichever deployment has a +healthy `/health` endpoint. + +## Storage Layout + +| Data | Path | Type | Survives restart? | +|------|------|------|-------------------| +| Ledger | `/srv/solana/ledger` | ZFS zvol (xfs) | Yes | +| Snapshots | `/srv/solana/snapshots` | ZFS zvol (xfs) | Yes | +| Accounts | `/srv/solana/ramdisk/accounts` | `/dev/ram0` (xfs) | Until host reboot | +| Validator config | `/srv/deployments/agave/data/validator-config` | ZFS | Yes | +| DZ config | `/srv/deployments/agave/data/doublezero-config` | ZFS | Yes | + +The ZFS zvol `biscayne/DATA/volumes/solana` backs `/srv/solana` (ledger, snapshots). +The ramdisk at `/dev/ram0` holds accounts — it's a block device, not tmpfs, so it +survives process restarts but not host reboots. + +--- + +## Procedure 1: DoubleZero Binary Upgrade (zero downtime, single pod) + +The GRE tunnel (`doublezero0`) and BGP routes live in kernel space. They persist +across doublezerod process restarts. Upgrading the DZ binary does not require +tearing down the tunnel or restarting the validator. + +### Prerequisites + +- doublezerod is defined as a k8s native sidecar (`spec.initContainers` with + `restartPolicy: Always`). See [Required Changes](#required-changes) below. +- k8s 1.29+ (biscayne runs 1.35.1) + +### Steps + +1. Build or pull the new doublezero container image. + +2. Patch the pod's sidecar image: + ```bash + kubectl -n patch pod --type='json' -p='[ + {"op": "replace", "path": "/spec/initContainers/0/image", + "value": "laconicnetwork/doublezero:new-version"} + ]' + ``` + +3. Only the doublezerod container restarts. The agave container is unaffected. + The GRE tunnel interface and BGP routes remain in the kernel throughout. + +4. Verify: + ```bash + kubectl -n exec -c doublezerod -- doublezero --version + kubectl -n exec -c doublezerod -- doublezero status + ip route | grep doublezero0 # routes still present + ``` + +### Rollback + +Patch the image back to the previous version. Same process, same zero downtime. + +--- + +## Procedure 2: Agave Version Upgrade (zero RPC downtime, blue-green) + +Agave is the main container and must be restarted for a version change. To maintain +zero RPC downtime, we run two deployments simultaneously and let Caddy shift traffic +based on health checks. + +### Prerequisites + +- Caddy ingress configured with dual upstreams and active health checks +- A parameterized spec.yml that accepts alternate ports and volume paths +- ZFS snapshot/clone scripts + +### Steps + +#### Phase 1: Prepare (no downtime, no risk) + +1. **ZFS snapshot** for rollback safety: + ```bash + zfs snapshot -r biscayne/DATA@pre-upgrade-$(date +%Y%m%d) + ``` + +2. **ZFS clone** the validator volumes: + ```bash + zfs clone biscayne/DATA/volumes/solana@pre-upgrade-$(date +%Y%m%d) \ + biscayne/DATA/volumes/solana-blue + ``` + This is instant (copy-on-write). No additional storage until writes diverge. + +3. **Clone the ramdisk accounts** (not on ZFS): + ```bash + mkdir -p /srv/solana-blue/ramdisk/accounts + cp -a /srv/solana/ramdisk/accounts/* /srv/solana-blue/ramdisk/accounts/ + ``` + This is the slow step — 460GB on ramdisk. Consider `rsync` with `--inplace` + to minimize copy time, or investigate whether the ramdisk can move to a ZFS + dataset for instant cloning in future deployments. + +4. **Build or pull** the new agave container image. + +#### Phase 2: Start blue deployment (no downtime) + +5. **Create Deployment B** in the same kind cluster, pointing at cloned volumes, + with RPC on port 8897: + ```bash + # Apply the blue deployment manifest (parameterized spec) + kubectl apply -f deployment/k8s-manifests/agave-blue.yaml + ``` + +6. **Deployment B catches up.** It starts from the snapshot point and replays. + Monitor progress: + ```bash + kubectl -n exec -c agave-validator -- \ + solana -u http://127.0.0.1:8897 slot + ``` + +7. **Validate** the new version works: + - RPC responds: `curl -sf http://localhost:8897/health` + - Correct version: `kubectl -n exec -c agave-validator -- agave-validator --version` + - doublezerod connected (if applicable) + + Take as long as needed. Deployment A is still serving all traffic. + +#### Phase 3: Traffic shift (zero downtime) + +8. **Caddy routes traffic to B.** Once B's `/health` returns 200, Caddy's active + health check automatically starts routing to it. Alternatively, update the + Caddy upstream config to prefer B. + +9. **Verify** B is serving live traffic: + ```bash + curl -sf https://biscayne.vaasl.io/health + # Check Caddy access logs for requests hitting port 8897 + ``` + +#### Phase 4: Cleanup + +10. **Stop Deployment A:** + ```bash + kubectl -n delete deployment agave-green + ``` + +11. **Reconfigure B to use standard port** (8899) if desired, or update Caddy + to only route to 8897. + +12. **Clean up ZFS clone** (or keep as rollback): + ```bash + zfs destroy biscayne/DATA/volumes/solana-blue + ``` + +### Rollback + +At any point before Phase 4: +- Deployment A is untouched and still serving traffic (or can be restarted) +- Delete Deployment B: `kubectl -n delete deployment agave-blue` +- Destroy the ZFS clone: `zfs destroy biscayne/DATA/volumes/solana-blue` + +After Phase 4 (A already stopped): +- `zfs rollback` to restore original data +- Redeploy A with old image + +--- + +## Required Changes to agave-stack + +### 1. Move doublezerod to native sidecar + +In the pod spec generation (laconic-so or compose override), doublezerod must be +defined as a native sidecar container instead of a regular container: + +```yaml +spec: + initContainers: + - name: doublezerod + image: laconicnetwork/doublezero:local + restartPolicy: Always # makes it a native sidecar + securityContext: + privileged: true + capabilities: + add: [NET_ADMIN] + env: + - name: DOUBLEZERO_RPC_ENDPOINT + value: https://api.mainnet-beta.solana.com + volumeMounts: + - name: doublezero-config + mountPath: /root/.config/doublezero + containers: + - name: agave-validator + image: laconicnetwork/agave:local + # ... existing config +``` + +This change means: +- doublezerod starts before agave and stays running +- Patching the doublezerod image restarts only that container +- agave can be restarted independently without affecting doublezerod + +This requires a laconic-so change to support `initContainers` with `restartPolicy` +in compose-to-k8s translation — or a post-deployment patch. + +### 2. Caddy dual-upstream config + +Add health-checked upstreams for both blue and green deployments: + +```caddyfile +biscayne.vaasl.io { + reverse_proxy { + to localhost:8899 localhost:8897 + + health_uri /health + health_interval 5s + health_timeout 3s + + lb_policy first + } +} +``` + +`lb_policy first` routes to the first healthy upstream. When only A is running, +all traffic goes to :8899. When B comes up healthy, traffic shifts. + +### 3. Parameterized deployment spec + +Create a parameterized spec or kustomize overlay that accepts: +- RPC port (8899 vs 8897) +- Volume paths (original vs ZFS clone) +- Deployment name suffix (green vs blue) + +### 4. Delete DaemonSet workaround + +Remove `deployment/k8s-manifests/doublezero-daemonset.yaml` from agave-stack. + +### 5. Fix container DZ identity + +Copy the registered identity into the container volume: +```bash +sudo cp /home/solana/.config/doublezero/id.json \ + /srv/deployments/agave/data/doublezero-config/id.json +``` + +### 6. Disable host systemd doublezerod + +After the container sidecar is working: +```bash +sudo systemctl stop doublezerod +sudo systemctl disable doublezerod +``` + +--- + +## Implementation Order + +This is a spec-driven, test-driven plan. Each step produces a testable artifact. + +### Step 1: Fix existing DZ bugs (no code changes to laconic-so) + +Fixes BUG-1 through BUG-5 from [doublezero-status.md](doublezero-status.md). + +**Spec:** Container doublezerod shows correct identity, connects to laconic-mia-sw01, +host systemd doublezerod is disabled. + +**Test:** +```bash +kubectl -n exec -c doublezerod -- doublezero address +# assert: 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr + +kubectl -n exec -c doublezerod -- doublezero status +# assert: BGP Session Up, laconic-mia-sw01 + +systemctl is-active doublezerod +# assert: inactive +``` + +**Changes:** +- Copy `id.json` to container volume +- Update `DOUBLEZERO_RPC_ENDPOINT` in spec.yml +- Deploy with hostNetwork-enabled stack-orchestrator +- Stop and disable host doublezerod +- Delete DaemonSet manifest from agave-stack + +### Step 2: Native sidecar for doublezerod + +**Spec:** doublezerod image can be patched without restarting the agave container. +GRE tunnel and routes persist across doublezerod restart. + +**Test:** +```bash +# Record current agave container start time +BEFORE=$(kubectl -n get pod -o jsonpath='{.status.containerStatuses[?(@.name=="agave-validator")].state.running.startedAt}') + +# Patch DZ image +kubectl -n patch pod --type='json' -p='[ + {"op":"replace","path":"/spec/initContainers/0/image","value":"laconicnetwork/doublezero:test"} +]' + +# Wait for DZ container to restart +sleep 10 + +# Verify agave was NOT restarted +AFTER=$(kubectl -n get pod -o jsonpath='{.status.containerStatuses[?(@.name=="agave-validator")].state.running.startedAt}') +[ "$BEFORE" = "$AFTER" ] # assert: same start time + +# Verify tunnel survived +ip route | grep doublezero0 # assert: routes present +``` + +**Changes:** +- laconic-so: support `initContainers` with `restartPolicy: Always` in + compose-to-k8s translation (or: define doublezerod as native sidecar in + compose via `x-kubernetes-init-container` extension or equivalent) +- Alternatively: post-deploy kubectl patch to move doublezerod to initContainers + +### Step 3: Caddy dual-upstream routing + +**Spec:** Caddy routes RPC traffic to whichever backend is healthy. Adding a second +healthy backend on :8897 causes traffic to shift without configuration changes. + +**Test:** +```bash +# Start a test HTTP server on :8897 with /health +python3 -c " +from http.server import HTTPServer, BaseHTTPRequestHandler +class H(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200); self.end_headers(); self.wfile.write(b'ok') +HTTPServer(('', 8897), H).serve_forever() +" & + +# Verify Caddy discovers it +sleep 10 +curl -sf https://biscayne.vaasl.io/health +# assert: 200 + +kill %1 +``` + +**Changes:** +- Update Caddy ingress config with dual upstreams and health checks + +### Step 4: ZFS clone and blue-green tooling + +**Spec:** A script creates a ZFS clone, starts a blue deployment on alternate ports +using the cloned data, and the deployment catches up and becomes healthy. + +**Test:** +```bash +# Run the clone + deploy script +./scripts/blue-green-prepare.sh --target-version v2.2.1 + +# assert: ZFS clone exists +zfs list biscayne/DATA/volumes/solana-blue + +# assert: blue deployment exists and is catching up +kubectl -n get deployment agave-blue + +# assert: blue RPC eventually becomes healthy +timeout 600 bash -c 'until curl -sf http://localhost:8897/health; do sleep 5; done' +``` + +**Changes:** +- `scripts/blue-green-prepare.sh` — ZFS snapshot, clone, deploy B +- `scripts/blue-green-promote.sh` — tear down A, optional port swap +- `scripts/blue-green-rollback.sh` — destroy B, restore A +- Parameterized deployment spec (kustomize overlay or env-driven) + +### Step 5: End-to-end upgrade test + +**Spec:** Full upgrade cycle completes with zero dropped RPC requests. + +**Test:** +```bash +# Start continuous health probe in background +while true; do + curl -sf -o /dev/null -w "%{http_code} %{time_total}\n" \ + https://biscayne.vaasl.io/health || echo "FAIL $(date)" + sleep 0.5 +done > /tmp/health-probe.log & + +# Execute full blue-green upgrade +./scripts/blue-green-prepare.sh --target-version v2.2.1 +# wait for blue to sync... +./scripts/blue-green-promote.sh + +# Stop probe +kill %1 + +# assert: no FAIL lines in probe log +grep -c FAIL /tmp/health-probe.log +# assert: 0 +``` diff --git a/docs/bug-ashburn-tunnel-port-filtering.md b/docs/bug-ashburn-tunnel-port-filtering.md new file mode 100644 index 00000000..865e3b93 --- /dev/null +++ b/docs/bug-ashburn-tunnel-port-filtering.md @@ -0,0 +1,85 @@ +# Bug: Ashburn Relay — 137.239.194.65 Not Routable from Public Internet + +## Summary + +`--gossip-host 137.239.194.65` correctly advertises the Ashburn relay IP in +ContactInfo for all sockets (gossip, TVU, repair, TPU). However, 137.239.194.65 +is a DoubleZero overlay IP (137.239.192.0/19, IS-IS only) that is NOT announced +via BGP to the public internet. Public peers cannot route to it, so TVU shreds, +repair requests, and TPU traffic never arrive at was-sw01. + +## Evidence + +- Gossip traffic arrives on `doublezero0` interface: + ``` + doublezero0 In IP 64.130.58.70.8001 > 137.239.194.65.8001: UDP, length 132 + ``` +- Zero TVU/repair traffic arrives: + ``` + tcpdump -i doublezero0 'dst host 137.239.194.65 and udp and not port 8001' + 0 packets captured + ``` +- ContactInfo correctly advertises all sockets on 137.239.194.65: + ```json + { + "gossip": "137.239.194.65:8001", + "tvu": "137.239.194.65:9000", + "serveRepair": "137.239.194.65:9011", + "tpu": "137.239.194.65:9002" + } + ``` +- Outbound gossip from biscayne exits via `doublezero0` with source + 137.239.194.65 — SNAT and routing work correctly in the outbound direction. + +## Root Cause + +**137.239.194.0/24 is not routable from the public internet.** The prefix +belongs to DoubleZero's overlay address space (137.239.192.0/19, Momentum +Telecom, WHOIS OriginAS: empty). It is advertised only via IS-IS within the +DoubleZero switch mesh. There is no eBGP session on was-sw01 to advertise it +to the ISP — all BGP peers are iBGP AS 65342 (DoubleZero internal). + +When the validator advertises `tvu: 137.239.194.65:9000` in ContactInfo, +public internet peers attempt to send turbine shreds to that IP, but the +packets have no route through the global BGP table to reach was-sw01. Only +DoubleZero-connected peers could potentially reach it via the overlay. + +The old shred relay pipeline worked because it used `--public-tvu-address +64.92.84.81:20000` — was-sw01's Et1/1 ISP uplink IP, which IS publicly +routable. The `--gossip-host 137.239.194.65` approach advertises a +DoubleZero-only IP for ALL sockets, making TVU/repair/TPU unreachable from +non-DoubleZero peers. + +The original hypothesis (ACL/PBR port filtering) was wrong. The tunnel and +switch routing work correctly — the problem is upstream: traffic never arrives +at was-sw01 in the first place. + +## Impact + +The validator cannot receive turbine shreds or serve repair requests via the +low-latency Ashburn path. It falls back to the Miami public IP (186.233.184.235) +for all shred/repair traffic, negating the benefit of `--gossip-host`. + +## Fix Options + +1. **Use 64.92.84.81 (was-sw01 Et1/1) for ContactInfo sockets.** This is the + publicly routable Ashburn IP. Requires `--gossip-host 64.92.84.81` (or + equivalent `--bind-address` config) and DNAT/forwarding on was-sw01 to relay + traffic through the backbone → mia-sw01 → Tunnel500 → biscayne. The old + `--public-tvu-address` pipeline used this IP successfully. + +2. **Get DoubleZero to announce 137.239.194.0/24 via eBGP to the ISP.** This + would make the current `--gossip-host 137.239.194.65` setup work, but + requires coordination with DoubleZero operations. + +3. **Hybrid approach**: Use 64.92.84.81 for public-facing sockets (TVU, repair, + TPU) and 137.239.194.65 for gossip (which works via DoubleZero overlay). + Requires agave to support per-protocol address binding, which it does not + (`--gossip-host` sets ALL sockets to the same IP). + +## Previous Workaround + +The old `--public-tvu-address` pipeline used socat + shred-unwrap.py to relay +shreds from 64.92.84.81:20000 to the validator. That pipeline is not persistent +across reboots and was superseded by the `--gossip-host` approach (which turned +out to be broken for non-DoubleZero peers). diff --git a/docs/bug-laconic-so-etcd-cleanup.md b/docs/bug-laconic-so-etcd-cleanup.md new file mode 100644 index 00000000..48181715 --- /dev/null +++ b/docs/bug-laconic-so-etcd-cleanup.md @@ -0,0 +1,51 @@ +# Bug: laconic-so etcd cleanup wipes core kubernetes service + +## Summary + +`_clean_etcd_keeping_certs()` in laconic-stack-orchestrator 1.1.0 deletes the `kubernetes` service from etcd, breaking cluster networking on restart. + +## Component + +`stack_orchestrator/deploy/k8s/helpers.py` — `_clean_etcd_keeping_certs()` + +## Reproduction + +1. Deploy with `laconic-so` to a k8s-kind target with persisted etcd (hostPath mount in kind-config.yml) +2. `laconic-so deployment --dir stop` (destroys cluster) +3. `laconic-so deployment --dir start` (recreates cluster with cleaned etcd) + +## Symptoms + +- `kindnet` pods enter CrashLoopBackOff with: `panic: unable to load in-cluster configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be defined` +- `kubectl get svc kubernetes -n default` returns `NotFound` +- coredns, caddy, local-path-provisioner stuck in Pending (no CNI without kindnet) +- No pods can be scheduled + +## Root Cause + +`_clean_etcd_keeping_certs()` uses a whitelist that only preserves `/registry/secrets/caddy-system` keys. All other etcd keys are deleted, including `/registry/services/specs/default/kubernetes` — the core `kubernetes` ClusterIP service that kube-apiserver auto-creates. + +When the kind cluster starts with the cleaned etcd, kube-apiserver sees the existing etcd data and does not re-create the `kubernetes` service. kindnet depends on the `KUBERNETES_SERVICE_HOST` environment variable which is injected by the kubelet from this service — without it, kindnet panics. + +## Fix Options + +1. **Expand the whitelist** to include `/registry/services/specs/default/kubernetes` and other core cluster resources +2. **Fully wipe etcd** instead of selective cleanup — let the cluster bootstrap fresh (simpler, but loses Caddy TLS certs) +3. **Don't persist etcd at all** — ephemeral etcd means clean state every restart (recommended for kind deployments) + +## Workaround + +Fully delete the kind cluster before `start`: + +```bash +kind delete cluster --name +laconic-so deployment --dir start +``` + +This forces fresh etcd bootstrap. Downside: all other services deployed to the cluster (DaemonSets, other namespaces) are destroyed. + +## Impact + +- Affects any k8s-kind deployment with persisted etcd +- Cluster is unrecoverable without full destroy+recreate +- All non-laconic-so-managed workloads in the cluster are lost diff --git a/docs/bug-laconic-so-ingress-conflict.md b/docs/bug-laconic-so-ingress-conflict.md new file mode 100644 index 00000000..d930ede8 --- /dev/null +++ b/docs/bug-laconic-so-ingress-conflict.md @@ -0,0 +1,75 @@ +# Bug: laconic-so crashes on re-deploy when caddy ingress already exists + +## Summary + +`laconic-so deployment start` crashes with `FailToCreateError` when the kind cluster already has caddy ingress resources installed. The deployer uses `create_from_yaml()` which fails on `AlreadyExists` conflicts instead of applying idempotently. This prevents the application deployment from ever being reached — the crash happens before any app manifests are applied. + +## Component + +`stack_orchestrator/deploy/k8s/deploy_k8s.py:366` — `up()` method +`stack_orchestrator/deploy/k8s/helpers.py:369` — `install_ingress_for_kind()` + +## Reproduction + +1. `kind delete cluster --name laconic-70ce4c4b47e23b85` +2. `laconic-so deployment --dir /srv/deployments/agave start` — creates cluster, loads images, installs caddy ingress, but times out or is interrupted before app deployment completes +3. `laconic-so deployment --dir /srv/deployments/agave start` — crashes immediately after image loading + +## Symptoms + +- Traceback ending in: + ``` + kubernetes.utils.create_from_yaml.FailToCreateError: + Error from server (Conflict): namespaces "caddy-system" already exists + Error from server (Conflict): serviceaccounts "caddy-ingress-controller" already exists + Error from server (Conflict): clusterroles.rbac.authorization.k8s.io "caddy-ingress-controller" already exists + ... + ``` +- Namespace `laconic-laconic-70ce4c4b47e23b85` exists but is empty — no pods, no deployments, no events +- Cluster is healthy, images are loaded, but no app manifests are applied + +## Root Cause + +`install_ingress_for_kind()` calls `kubernetes.utils.create_from_yaml()` which uses `POST` (create) semantics. If the resources already exist (from a previous partial run), every resource returns `409 Conflict` and `create_from_yaml` raises `FailToCreateError`, aborting the entire `up()` method before the app deployment step. + +The first `laconic-so start` after a fresh `kind delete` works because: +1. Image loading into the kind node takes 5-10 minutes (images are ~10GB+) +2. Caddy ingress is installed successfully +3. App deployment begins + +But if that first run is interrupted (timeout, Ctrl-C, ansible timeout), the second run finds caddy already installed and crashes. + +## Fix Options + +1. **Use server-side apply** instead of `create_from_yaml()` — `kubectl apply` is idempotent +2. **Check if ingress exists before installing** — skip `install_ingress_for_kind()` if caddy-system namespace exists +3. **Catch `AlreadyExists` and continue** — treat 409 as success for infrastructure resources + +## Workaround + +Delete the caddy ingress resources before re-running: + +```bash +kubectl delete namespace caddy-system +kubectl delete clusterrole caddy-ingress-controller +kubectl delete clusterrolebinding caddy-ingress-controller +kubectl delete ingressclass caddy +laconic-so deployment --dir /srv/deployments/agave start +``` + +Or nuke the entire cluster and start fresh: + +```bash +kind delete cluster --name laconic-70ce4c4b47e23b85 +laconic-so deployment --dir /srv/deployments/agave start +``` + +## Interaction with ansible timeout + +The `biscayne-redeploy.yml` playbook sets a 600s timeout on the `laconic-so deployment start` task. Image loading alone can exceed this on a fresh cluster (images must be re-loaded into the new kind node). When ansible kills the process at 600s, the caddy ingress is already installed but the app is not — putting the cluster into the broken state described above. Subsequent playbook runs hit this bug on every attempt. + +## Impact + +- Blocks all re-deploys on biscayne without manual cleanup +- The playbook cannot recover automatically — every retry hits the same conflict +- Discovered 2026-03-05 during full wipe redeploy of biscayne validator diff --git a/docs/doublezero-multicast-access.md b/docs/doublezero-multicast-access.md new file mode 100644 index 00000000..6d9d668e --- /dev/null +++ b/docs/doublezero-multicast-access.md @@ -0,0 +1,121 @@ +# DoubleZero Multicast Access Requests + +## Status (2026-03-06) + +DZ multicast is **still in testnet** (client v0.2.2). Multicast groups are defined +on the DZ ledger with on-chain access control (publishers/subscribers). The testnet +allocates addresses from 233.84.178.0/24 (AS21682). Not yet available for production +Solana shred delivery. + +## Biscayne Connection Details + +Provide these details when requesting subscriber access: + +| Field | Value | +|-------|-------| +| Client IP | 186.233.184.235 | +| Validator identity | 4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr | +| DZ identity | 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr | +| DZ device | laconic-mia-sw01 | +| Contributor / tenant | laconic | + +## Jito ShredStream + +**Not a DZ multicast group.** ShredStream is Jito's own shred delivery service, +independent of DoubleZero multicast. It provides low-latency shreds from leaders +on the Solana network via a proxy client that connects to the Jito Block Engine. + +| Property | Value | +|----------|-------| +| What it does | Delivers shreds from Jito-connected leaders with low latency. Provides a redundant shred path for servers in remote locations. | +| How it works | `shredstream-proxy` authenticates to a Jito Block Engine via keypair, receives shreds, forwards them to configured UDP destinations (e.g. validator TVU port). | +| Cost | **Unknown.** Docs don't list pricing. Was previously "complimentary" for searchers (2024). May require approval. | +| Requirements | Approved Solana pubkey (form submission), auth keypair, firewall open on UDP 20000, TVU port of your node. | +| Regions | Amsterdam, Dublin, Frankfurt, London, New York, Salt Lake City, Singapore, Tokyo. Max 2 regions selectable. | +| Limitations | No NAT support. Bridge networking incompatible with multicast mode. | +| Repo | https://github.com/jito-labs/shredstream-proxy | +| Docs | https://docs.jito.wtf/lowlatencytxnfeed/ | +| Status for biscayne | **Not yet requested.** Need to submit pubkey for approval. | + +ShredStream is relevant to our shred completeness problem — it provides an additional +shred source beyond turbine and the Ashburn relay. It would run as a sidecar process +forwarding shreds to the validator's TVU port. + +## DZ Multicast Groups + +DZ multicast uses PIM (Protocol Independent Multicast) and MSDP (Multicast Source +Discovery Protocol). Group owners define allowed publishers and subscribers on the +DZ ledger. Switch ASICs handle packet replication — no CPU overhead. + +### bebop + +Listed in earlier notes as a multicast shred distribution group. **No public +documentation found.** Cannot confirm this exists as a DZ multicast group. + +- **Owner:** Unknown +- **Status:** Unverified — may not exist as described + +### turbine (future) + +Solana's native shred propagation via DZ multicast. Jito has expressed interest +in leveraging multicast for shred delivery. Not yet available for production use. + +- **Owner:** Solana Foundation / Anza (native turbine), Jito (shredstream) +- **Status:** Testnet only (DZ client v0.2.2) + +## bloXroute OFR (Optimized Feed Relay) + +Commercial shred delivery service. Runs a gateway docker container on your node that +connects to bloXroute's BDN (Blockchain Distribution Network) to receive shreds +faster than default turbine (~30-50ms improvement, beats turbine ~98% of the time). + +| Property | Value | +|----------|-------| +| What it does | Delivers shreds via bloXroute's BDN with optimized relay topologies. Not just a different turbine path — uses their own distribution network. | +| How it works | Docker gateway container on your node, communicates with bloXroute OFR relay over UDP 18888. Forwards shreds to your validator. | +| Cost | **$300/mo** (Professional, 1500 tx/day), **$1,250/mo** (Enterprise, unlimited tx). OFR gateway without local node requires Enterprise Elite ($5,000+/mo). | +| Requirements | Docker, UDP port 18888 open, bloXroute subscription. | +| Open source | Gateway at https://github.com/bloXroute-Labs/solana-gateway | +| Docs | https://docs.bloxroute.com/solana/optimized-feed-relay | +| Status for biscayne | **Not yet evaluated.** Monthly cost may not be justified. | + +bloXroute's value proposition: they operate nodes at multiple turbine tree positions +across their network, aggregate shreds, and redistribute via their BDN. This is the +"multiple identities collecting different shreds" approach — but operated by bloXroute, +not by us. + +## How These Services Get More Shreds + +Turbine tree position is determined by validator identity (pubkey). A single validator +gets shreds from one position in the tree per slot. Services like Jito ShredStream +and bloXroute OFR operate many nodes with different identities across the turbine +tree, aggregate the shreds they each receive, and redistribute the combined set to +subscribers. This is why they can deliver shreds the subscriber's own turbine position +would never see. + +**An open-source equivalent would require running multiple lightweight validator +identities (non-voting, minimal stake) at different locations, each collecting shreds +from their unique turbine tree position, and forwarding them to the main validator.** +No known open-source project implements this pattern. + +## Sources + +- [Jito ShredStream docs](https://docs.jito.wtf/lowlatencytxnfeed/) +- [shredstream-proxy repo](https://github.com/jito-labs/shredstream-proxy) +- [bloXroute OFR docs](https://docs.bloxroute.com/solana/optimized-feed-relay) +- [bloXroute pricing](https://bloxroute.com/pricing/) +- [bloXroute OFR intro](https://bloxroute.com/pulse/introducing-ofrs-faster-shreds-better-performance-on-solana/) +- [DZ multicast announcement](https://doublezero.xyz/journal/doublezero-introduces-multicast-support-smarter-faster-data-delivery-for-distributed-systems) + +## Request Template + +When contacting a group owner, use something like: + +> We'd like to subscribe to your DoubleZero multicast group for our Solana +> validator. Our details: +> +> - Validator: 4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr +> - DZ identity: 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr +> - Client IP: 186.233.184.235 +> - Device: laconic-mia-sw01 +> - Tenant: laconic diff --git a/docs/doublezero-status.md b/docs/doublezero-status.md new file mode 100644 index 00000000..92296ca9 --- /dev/null +++ b/docs/doublezero-status.md @@ -0,0 +1,121 @@ +# DoubleZero Current State and Bug Fixes + +## Biscayne Connection Details + +| Field | Value | +|-------|-------| +| Host | biscayne.vaasl.io (186.233.184.235) | +| DZ identity | `3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr` | +| Validator identity | `4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr` | +| Nearest device | laconic-mia-sw01 (0.3ms) | +| DZ version (host) | 0.8.10 | +| DZ version (container) | 0.8.11 | +| k8s version | 1.35.1 (kind) | + +## Current State (2026-03-03) + +The host systemd `doublezerod` is connected and working. The container sidecar +doublezerod is broken. Both are running simultaneously. + +| Instance | Identity | Status | +|----------|----------|--------| +| Host systemd | `3Bw6v7...` (correct) | BGP Session Up, IBRL to laconic-mia-sw01 | +| Container sidecar | `Cw9qun...` (wrong) | Disconnected, error loop | +| DaemonSet manifest | N/A | Never applied, dead code | + +### Access pass + +The access pass for 186.233.184.235 is registered and connected: + +``` +type: prepaid +payer: 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr +status: connected +owner: DZfLKFDgLShjY34WqXdVVzHUvVtrYXb7UtdrALnGa8jw +``` + +## Bugs + +### BUG-1: Container doublezerod has wrong identity + +The entrypoint script (`entrypoint.sh`) auto-generates a new `id.json` if one isn't +found. The volume at `/srv/deployments/agave/data/doublezero-config/` was empty at +first boot, so it generated `Cw9qun...` instead of using the registered identity. + +**Root cause:** The real `id.json` lives at `/home/solana/.config/doublezero/id.json` +(created by the host-level DZ install). The container volume is a separate path that +was never seeded. + +**Fix:** +```bash +sudo cp /home/solana/.config/doublezero/id.json \ + /srv/deployments/agave/data/doublezero-config/id.json +``` + +### BUG-2: Container doublezerod can't resolve DZ passport program + +`DOUBLEZERO_RPC_ENDPOINT` in `spec.yml` is `http://127.0.0.1:8899` — the local +validator. But the local validator hasn't replayed enough slots to have the DZ +passport program accounts (`ser2VaTMAcYTaauMrTSfSrxBaUDq7BLNs2xfUugTAGv`). +doublezerod calls `GetProgramAccounts` every 30 seconds and gets empty results. + +**Fix in `deployment/spec.yml`:** +```yaml +# Use public RPC for DZ bootstrapping until local validator is caught up +DOUBLEZERO_RPC_ENDPOINT: https://api.mainnet-beta.solana.com +``` + +Switch back to `http://127.0.0.1:8899` once the local validator is synced. + +### BUG-3: Container doublezerod lacks hostNetwork + +laconic-so was not translating `network_mode: host` from compose files to +`hostNetwork: true` in generated k8s pod specs. Without host network access, the +container can't create GRE tunnels (IP proto 47) or run BGP (tcp/179 on +169.254.0.0/16). + +**Fix:** Deploy with stack-orchestrator branch `fix/k8s-port-mappings-hostnetwork-v2` +(commit `fb69cc58`, 2026-03-03) which adds automatic hostNetwork detection. + +### BUG-4: DaemonSet workaround is dead code + +`deployment/k8s-manifests/doublezero-daemonset.yaml` was a workaround for BUG-3. +Now that laconic-so supports hostNetwork natively, it should be deleted. + +**Fix:** Remove `deployment/k8s-manifests/doublezero-daemonset.yaml` from agave-stack. + +### BUG-5: Two doublezerod instances running simultaneously + +The host systemd `doublezerod` and the container sidecar are both running. Once the +container is fixed (BUG-1 through BUG-3), the host service must be disabled to avoid +two processes fighting over the GRE tunnel. + +**Fix:** +```bash +sudo systemctl stop doublezerod +sudo systemctl disable doublezerod +``` + +## Diagnostic Commands + +Always use `sudo -u solana` for host-level DZ commands — the identity is under +`/home/solana/.config/doublezero/`. + +```bash +# Host +sudo -u solana doublezero address # expect 3Bw6v7... +sudo -u solana doublezero status # tunnel state +sudo -u solana doublezero latency # device reachability +sudo -u solana doublezero access-pass list | grep 186.233.184 # access pass +sudo -u solana doublezero balance # credits +ip route | grep doublezero0 # BGP routes + +# Container (from kind node) +kubectl -n exec -c doublezerod -- doublezero address +kubectl -n exec -c doublezerod -- doublezero status +kubectl -n exec -c doublezerod -- doublezero --version + +# Logs +kubectl -n logs -c doublezerod --tail=30 +sudo journalctl -u doublezerod -f # host systemd logs +``` diff --git a/docs/feature-kind-local-registry.md b/docs/feature-kind-local-registry.md new file mode 100644 index 00000000..2705bee4 --- /dev/null +++ b/docs/feature-kind-local-registry.md @@ -0,0 +1,65 @@ +# Feature: Use local registry for kind image loading + +## Summary + +`laconic-so deployment start` uses `kind load docker-image` to copy container images from the host Docker daemon into the kind node's containerd. This serializes the full image (`docker save`), pipes it through `docker exec`, and deserializes it (`ctr image import`). For biscayne's ~837MB agave image plus the doublezero image, this takes 5-10 minutes on every cluster recreate — copying between two container runtimes on the same machine. + +## Current behavior + +``` +docker build → host Docker daemon (image stored once) +kind load docker-image → docker save | docker exec kind-node ctr import (full copy) +``` + +This happens in `stack_orchestrator/deploy/k8s/deploy_k8s.py` every time `laconic-so deployment start` runs and the image isn't already present in the kind node. + +## Proposed behavior + +Run a persistent local registry (`registry:2`) on the host. `laconic-so` pushes images there after build. Kind's containerd is configured to pull from it. + +``` +docker build → docker tag localhost:5001/image → docker push localhost:5001/image +kind node containerd → pulls from localhost:5001 (fast, no serialization) +``` + +The registry container persists across kind cluster deletions. Images are always available without reloading. + +## Implementation + +1. **Registry container**: `docker run -d --restart=always -p 5001:5000 --name kind-registry registry:2` + +2. **Kind config** — add registry mirror to `containerdConfigPatches` in kind-config.yml: + ```yaml + containerdConfigPatches: + - |- + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5001"] + endpoint = ["http://kind-registry:5000"] + ``` + +3. **Connect registry to kind network**: `docker network connect kind kind-registry` + +4. **laconic-so change** — in `deploy_k8s.py`, replace `kind load docker-image` with: + ```python + # Tag and push to local registry instead of kind load + docker tag image:local localhost:5001/image:local + docker push localhost:5001/image:local + ``` + +5. **Compose files** — image references change from `laconicnetwork/agave:local` to `localhost:5001/laconicnetwork/agave:local` + +Kind documents this pattern: https://kind.sigs.k8s.io/docs/user/local-registry/ + +## Impact + +- Eliminates 5-10 minute image loading step on every cluster recreate +- Registry persists across `kind delete cluster` — no re-push needed unless the image itself changes +- `docker push` to a local registry is near-instant (shared filesystem, layer dedup) +- Unblocks faster iteration on redeploy cycles + +## Scope + +This is a `stack-orchestrator` change, specifically in `deploy_k8s.py`. The kind-config.yml also needs the registry mirror config, which `laconic-so` generates from `spec.yml`. + +## Discovered + +2026-03-05 — during biscayne full wipe redeploy, `laconic-so start` spent most of its runtime on `kind load docker-image`, causing ansible timeouts and cascading failures (caddy ingress conflict bug). diff --git a/docs/known-issues.md b/docs/known-issues.md new file mode 100644 index 00000000..1ae42433 --- /dev/null +++ b/docs/known-issues.md @@ -0,0 +1,78 @@ +# Known Issues + +## BUG-6: Validator logging not configured, only stdout available + +**Observed:** 2026-03-03 + +The validator only logs to stdout. kubectl logs retains ~2 minutes of history +at current log volume before the buffer fills. When diagnosing a replay stall, +the startup logs (snapshot load, initial replay, error conditions) were gone. + +**Impact:** Cannot determine why the validator replay stage stalled — the +startup logs that would show the root cause are not available. + +**Fix:** Configure the `--log` flag in the validator start script to write to +a persistent volume, so logs survive container restarts and aren't limited +to the kubectl buffer. + +## BUG-7: Metrics endpoint unreachable from validator pod + +**Observed:** 2026-03-03 + +``` +WARN solana_metrics::metrics submit error: error sending request for url +(http://localhost:8086/write?db=agave_metrics&u=admin&p=admin&precision=n) +``` + +The validator is configured with `SOLANA_METRICS_CONFIG` pointing to +`http://172.20.0.1:8086` (the kind docker bridge gateway), but the logs show +it trying `localhost:8086`. The InfluxDB container (`solana-monitoring-influxdb-1`) +is running on the host, but the validator can't reach it. + +**Impact:** No metrics collection. Cannot use Grafana dashboards to diagnose +performance issues or track sync progress over time. + +## BUG-8: sysctl values not visible inside kind container + +**Observed:** 2026-03-03 + +``` +ERROR solana_core::system_monitor_service Failed to query value for net.core.rmem_max: no such sysctl +WARN solana_core::system_monitor_service net.core.rmem_max: recommended=134217728, current=-1 too small +``` + +The host has correct sysctl values (`net.core.rmem_max = 134217728`), but +`/proc/sys/net/core/` does not exist inside the kind node container. The +validator reads `-1` and reports the buffer as too small. + +The network buffers themselves may still be effective (they're set on the +host network namespace which the pod shares via `hostNetwork: true`), but +this is unverified. If the buffers are not effective, it could limit shred +ingestion throughput and contribute to slow repair. + +**Fix options:** +- Set sysctls on the kind node container at creation time + (`kind` supports `kubeadmConfigPatches` and sysctl configuration) +- Verify empirically whether the host sysctls apply to hostNetwork pods + by checking actual socket buffer sizes from inside the pod + +## Validator replay stall (under investigation) + +**Observed:** 2026-03-03 + +The validator root has been stuck at slot 403,892,310 for 55+ minutes. +The gap to the cluster tip is ~120,000 slots and growing. + +**Observed symptoms:** +- Zero `Frozen` banks in log history — replay stage is not processing slots +- All incoming slots show `bank_status: Unprocessed` +- Repair only requests tip slots and two specific old slots (403,892,310, + 403,909,228) — not the ~120k slot gap +- Repair peer count is 3-12 per cycle (vs 1,000+ gossip peers) +- Startup logs have rotated out (BUG-6), so initialization context is lost + +**Unknown:** +- What snapshot the validator loaded at boot +- Whether replay ever started or was blocked from the beginning +- Whether the sysctl issue (BUG-8) is limiting repair throughput +- Whether the missing metrics (BUG-7) would show what's happening internally diff --git a/docs/shred-collector-relay.md b/docs/shred-collector-relay.md new file mode 100644 index 00000000..c6956ecd --- /dev/null +++ b/docs/shred-collector-relay.md @@ -0,0 +1,191 @@ +# Shred Collector Relay + +## Problem + +Turbine assigns each validator a single position in the shred distribution tree +per slot, determined by its pubkey. A validator in Miami with one identity receives +shreds from one set of tree neighbors — typically ~60-70% of shreds for any given +slot. The remaining 30-40% must come from the repair protocol, which is too slow +to keep pace with chain production (see analysis below). + +Commercial services (Jito ShredStream, bloXroute OFR) solve this by running many +nodes with different identities across the turbine tree, aggregating shreds, and +redistributing the combined set to subscribers. This works but costs $300-5,000/mo +and adds a dependency on a third party. + +## Concept + +Run lightweight **shred collector** nodes at multiple geographic locations on +the Laconic network (Ashburn, Dallas, etc.). Each collector has its own keypair, +joins gossip with a unique identity, receives turbine shreds from its unique tree +position, and forwards raw shred packets to the main validator in Miami. The main +validator inserts these shreds into its blockstore alongside its own turbine shreds, +increasing completeness toward 100% without relying on repair. + +``` + Turbine Tree + / | \ + / | \ + collector-ash collector-dfw biscayne (main validator) + (Ashburn) (Dallas) (Miami) + identity A identity B identity C + ~60% shreds ~60% shreds ~60% shreds + \ | / + \ | / + → UDP forward via DZ backbone → + | + biscayne blockstore + ~95%+ shreds (union of A∪B∪C) +``` + +Each collector sees a different ~60% slice of the turbine tree. The union of +three independent positions yields ~94% coverage (1 - 0.4³ = 0.936). Four +collectors yield ~97%. The main validator fills the remaining few percent via +repair, which is fast when only 3-6% of shreds are missing. + +## Why This Works + +The math from biscayne's recovery (2026-03-06): + +| Metric | Value | +|--------|-------| +| Compute-bound replay (complete blocks) | 5.2 slots/sec | +| Repair-bound replay (incomplete blocks) | 0.5 slots/sec | +| Chain production rate | 2.5 slots/sec | +| Turbine + relay delivery per identity | ~60-70% | +| Repair bandwidth | ~600 shreds/sec (estimated) | +| Repair needed to converge at 60% delivery | 5x current bandwidth | +| Repair needed to converge at 95% delivery | Easily sufficient | + +At 60% shred delivery, repair must fill 40% per slot — too slow to converge. +At 95% delivery (3 collectors), repair fills 5% per slot — well within capacity. +The validator replays at near compute-bound speed (5+ slots/sec) and converges. + +## Infrastructure + +Laconic already has DZ-connected switches at multiple sites: + +| Site | Device | Latency to Miami | Backbone | +|------|--------|-------------------|----------| +| Miami | laconic-mia-sw01 | 0.24ms | local | +| Ashburn | laconic-was-sw01 | ~29ms | Et4/1 25.4ms | +| Dallas | laconic-dfw-sw01 | ~30ms | TBD | + +The DZ backbone carries traffic between sites at line rate. Shred packets are +~1280 bytes each. At ~3,000 shreds/slot and 2.5 slots/sec, each collector +forwards ~7,500 packets/sec (~10 MB/s) — trivial bandwidth for the backbone. + +## Collector Architecture + +The collector does NOT need to be a full validator. It needs to: + +1. **Join gossip** — advertise a ContactInfo with its own pubkey and a TVU + address (the site's IP) +2. **Receive turbine shreds** — UDP packets on the advertised TVU port +3. **Forward shreds** — retransmit raw UDP packets to biscayne's TVU port + +It does NOT need to: replay transactions, maintain accounts state, store a +ledger, load a snapshot, vote, or run RPC. + +### Option A: Firedancer Minimal Build + +Firedancer (Apache 2, C) has a tile-based architecture where each function +(net, gossip, shred, bank, store, etc.) runs as an independent Linux process. +A minimal build using only the networking + gossip + shred tiles would: + +- Join gossip and advertise a TVU address +- Receive turbine shreds via the shred tile +- Forward shreds to a configured destination instead of to bank/store + +This requires modifying the shred tile to add a UDP forwarder output instead +of (or in addition to) the normal bank handoff. The rest of the tile pipeline +(bank, pack, poh, store) is simply not started. + +**Estimated effort:** Moderate. Firedancer's tile architecture is designed for +this kind of composition. The main work is adding a forwarder sink to the shred +tile and testing gossip participation without the full validator stack. + +**Source:** https://github.com/firedancer-io/firedancer + +### Option B: Agave Non-Voting Minimal + +Run `agave-validator --no-voting` with `--limit-ledger-size 0` and minimal +config. Agave still requires a snapshot to start and runs the full process, but +with no voting and minimal ledger it would be lighter than a full node. + +**Downside:** Agave is monolithic — you can't easily disable replay/accounts. +It still loads a snapshot, builds the accounts index, and runs replay. This +defeats the purpose of a lightweight collector. + +### Option C: Custom Gossip + TVU Receiver + +Write a minimal Rust binary using agave's `solana-gossip` and `solana-streamer` +crates to: +1. Bootstrap into gossip via entrypoints +2. Advertise ContactInfo with TVU socket +3. Receive shred packets on TVU +4. Forward them via UDP + +**Estimated effort:** Significant. Gossip protocol participation is complex +(CRDS protocol, pull/push protocol, protocol versioning). Using the agave +crates directly is possible but poorly documented for standalone use. + +### Option D: Run Collectors on Biscayne + +Run the collector processes on biscayne itself, each advertising a TVU address +at a remote site. The switches at each site forward inbound TVU traffic to +biscayne via the DZ backbone using traffic-policy redirects (same pattern as +`ashburn-validator-relay.md`). + +**Advantage:** No compute needed at remote sites. Just switch config + loopback +IPs. All collector processes run in Miami. + +**Risk:** Gossip advertises IP + port. If the collector runs on biscayne but +advertises an Ashburn IP, gossip protocol interactions (pull requests, pings) +arrive at the Ashburn IP and must be forwarded back to biscayne. This adds +~58ms RTT to gossip protocol messages, which may cause timeouts or peer +quality degradation. Needs testing. + +## Recommendation + +Option A (Firedancer minimal build) is the correct long-term approach. It +produces a single binary that does exactly one thing: collect shreds from a +unique turbine tree position and forward them. It runs on minimal hardware +(a small VM or container at each site, or on biscayne with remote TVU +addresses). + +Option D (collectors on biscayne with switch forwarding) is the fastest to +test since it needs no new software — just switch config and multiple +agave-validator instances with `--no-voting`. The question is whether agave +can start without a snapshot if we only care about gossip + TVU. + +## Deployment Topology + +``` +biscayne (186.233.184.235) +├── agave-validator (main, identity C, TVU 186.233.184.235:9000) +├── collector-ash (identity A, TVU 137.239.194.65:9000) +│ └── shreds forwarded via was-sw01 traffic-policy +├── collector-dfw (identity B, TVU :9000) +│ └── shreds forwarded via dfw-sw01 traffic-policy +└── blockstore receives union of A∪B∪C shreds + +was-sw01 (Ashburn) +└── Loopback: 137.239.194.65 +└── traffic-policy: UDP dst 137.239.194.65:9000 → nexthop mia-sw01 + +dfw-sw01 (Dallas) +└── Loopback: +└── traffic-policy: UDP dst :9000 → nexthop mia-sw01 +``` + +## Open Questions + +1. Can agave-validator start in gossip-only mode without a snapshot? +2. Does Firedancer's shred tile work standalone without bank/replay? +3. What is the gossip protocol timeout for remote TVU addresses (Option D)? +4. How does the turbine tree handle multiple identities from the same IP + (if running all collectors on biscayne)? +5. Do we need stake on collector identities to be placed in the turbine tree, + or do unstaked nodes still participate? +6. What IP block is available on dfw-sw01 for a collector loopback? diff --git a/docs/tvu-shred-relay.md b/docs/tvu-shred-relay.md new file mode 100644 index 00000000..d21c9073 --- /dev/null +++ b/docs/tvu-shred-relay.md @@ -0,0 +1,161 @@ +# TVU Shred Relay — Data-Plane Redirect + +## Overview + +Biscayne's agave validator advertises `64.92.84.81:20000` (laconic-was-sw01 Et1/1) as its TVU +address. Turbine shreds arrive as normal UDP to the switch's front-panel IP. The 7280CR3A ASIC +handles front-panel traffic without punting to Linux userspace — it sees a local interface IP +with no service and drops at the hardware level. + +### Previous approach (monitor + socat) + +EOS monitor session mirrored matched packets to CPU (mirror0 interface). socat read from mirror0 +and relayed to biscayne. shred-unwrap.py on biscayne stripped encapsulation headers. + +Fragile: socat ran as a foreground process, died on disconnect. + +### New approach (traffic-policy redirect) + +EOS `traffic-policy` with `set nexthop` and `system-rule overriding-action redirect` overrides +the ASIC's "local IP, handle myself" decision. The ASIC forwards matched packets to the +specified next-hop at line rate. Pure data plane, no CPU involvement, persists in startup-config. + +Available since EOS 4.28.0F on R3 platforms. Confirmed on 4.34.0F. + +## Architecture + +``` +Turbine peers (hundreds of validators) + | + v UDP shreds to 64.92.84.81:20000 +laconic-was-sw01 Et1/1 (Ashburn) + | ASIC matches traffic-policy SHRED-RELAY + | Redirects to nexthop 172.16.1.189 (data plane, line rate) + v Et4/1 backbone (25.4ms) +laconic-mia-sw01 Et4/1 (Miami) + | forwards via default route (same metro) + v 0.13ms +biscayne (186.233.184.235, Miami) + | iptables DNAT: dst 64.92.84.81:20000 -> 127.0.0.1:9000 + v +agave-validator TVU port (localhost:9000) +``` + +## Production Config: laconic-was-sw01 + +### Pre-change safety + +``` +configure checkpoint save pre-shred-relay +``` + +Rollback: `rollback running-config checkpoint pre-shred-relay` then `write memory`. + +### Config session with auto-revert + +``` +configure session shred-relay + +! ACL for traffic-policy match +ip access-list SHRED-RELAY-ACL + 10 permit udp any any eq 20000 + +! Traffic policy: redirect matched packets to backbone next-hop +traffic-policy SHRED-RELAY + match SHRED-RELAY-ACL + set nexthop 172.16.1.189 + +! Override ASIC punt-to-CPU for redirected traffic +system-rule overriding-action redirect + +! Apply to Et1/1 ingress +interface Ethernet1/1 + traffic-policy input SHRED-RELAY + +! Remove old monitor session and its ACL +no monitor session 1 +no ip access-list SHRED-RELAY + +! Review before committing +show session-config diffs + +! Commit with 5-minute auto-revert safety net +commit timer 00:05:00 +``` + +After verification: `configure session shred-relay commit` then `write memory`. + +### Linux cleanup on was-sw01 + +```bash +# Kill socat relay (PID 27743) +kill 27743 +# Remove Linux kernel route +ip route del 186.233.184.235/32 +``` + +The EOS static route `ip route 186.233.184.235/32 172.16.1.189` stays (general reachability). + +## Production Config: biscayne + +### iptables DNAT + +Traffic-policy sends normal L3-forwarded UDP packets (no mirror encapsulation). Packets arrive +with dst `64.92.84.81:20000` containing clean shred payloads directly in the UDP body. + +```bash +sudo iptables -t nat -A PREROUTING -p udp -d 64.92.84.81 --dport 20000 \ + -j DNAT --to-destination 127.0.0.1:9000 + +# Persist across reboot +sudo apt install -y iptables-persistent +sudo netfilter-persistent save +``` + +### Cleanup + +```bash +# Kill shred-unwrap.py (PID 2497694) +kill 2497694 +rm /tmp/shred-unwrap.py +``` + +## Verification + +1. `show traffic-policy interface Ethernet1/1` — policy applied +2. `show traffic-policy counters` — packets matching and redirected +3. `sudo iptables -t nat -L PREROUTING -v -n` — DNAT rule with packet counts +4. Validator logs: slot replay rate should maintain ~3.3 slots/sec +5. `ss -unp | grep 9000` — validator receiving on TVU port + +## What was removed + +| Component | Host | +|-----------|------| +| monitor session 1 | was-sw01 | +| SHRED-RELAY ACL (old) | was-sw01 | +| socat relay process | was-sw01 | +| Linux kernel static route | was-sw01 | +| shred-unwrap.py | biscayne | + +## What was added + +| Component | Host | Persistent? | +|-----------|------|-------------| +| traffic-policy SHRED-RELAY | was-sw01 | Yes (startup-config) | +| SHRED-RELAY-ACL | was-sw01 | Yes (startup-config) | +| system-rule overriding-action redirect | was-sw01 | Yes (startup-config) | +| iptables DNAT rule | biscayne | Yes (iptables-persistent) | + +## Key Details + +| Item | Value | +|------|-------| +| Biscayne validator identity | `4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr` | +| Biscayne IP | `186.233.184.235` | +| laconic-was-sw01 public IP | `64.92.84.81` (Et1/1) | +| laconic-was-sw01 backbone IP | `172.16.1.188` (Et4/1) | +| laconic-was-sw01 SSH | `install@137.239.200.198` | +| laconic-mia-sw01 backbone IP | `172.16.1.189` (Et4/1) | +| Backbone RTT (WAS-MIA) | 25.4ms | +| EOS version | 4.34.0F | diff --git a/inventory/biscayne.yml b/inventory/biscayne.yml new file mode 100644 index 00000000..722a696a --- /dev/null +++ b/inventory/biscayne.yml @@ -0,0 +1,14 @@ +all: + hosts: + biscayne: + ansible_host: biscayne.vaasl.io + ansible_user: rix + ansible_become: true + + # DoubleZero identities + dz_identity: 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr + validator_identity: 4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr + client_ip: 186.233.184.235 + dz_device: laconic-mia-sw01 + dz_tenant: laconic + dz_environment: mainnet-beta diff --git a/inventory/switches.yml b/inventory/switches.yml new file mode 100644 index 00000000..4d02a9ee --- /dev/null +++ b/inventory/switches.yml @@ -0,0 +1,23 @@ +all: + children: + switches: + vars: + ansible_connection: ansible.netcommon.network_cli + ansible_network_os: arista.eos.eos + ansible_user: install + ansible_become: true + ansible_become_method: enable + hosts: + was-sw01: + ansible_host: 137.239.200.198 + # Et1/1: 64.92.84.81 (Ashburn uplink) + # Et4/1: 172.16.1.188 (backbone to mia-sw01) + # Loopback100: 137.239.194.64/32 + backbone_ip: 172.16.1.188 + backbone_peer: 172.16.1.189 + uplink_gateway: 64.92.84.80 + mia-sw01: + ansible_host: 209.42.167.133 + # Et4/1: 172.16.1.189 (backbone to was-sw01) + backbone_ip: 172.16.1.189 + backbone_peer: 172.16.1.188 diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index 75053483..09e0ff74 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -156,73 +156,62 @@ failed_when: "add_ip.rc != 0 and 'RTNETLINK answers: File exists' not in add_ip.stderr" tags: [inbound] - - name: Add DNAT for gossip UDP - ansible.builtin.iptables: - table: nat - chain: PREROUTING - protocol: udp - destination: "{{ ashburn_ip }}" - destination_port: "{{ gossip_port }}" - jump: DNAT - to_destination: "{{ kind_node_ip }}:{{ gossip_port }}" + - name: Add DNAT rules (inserted before DOCKER chain) + ansible.builtin.shell: + cmd: | + set -o pipefail + # DNAT rules must be before Docker's ADDRTYPE LOCAL rule, otherwise + # Docker's PREROUTING chain swallows traffic to 137.239.194.65 (which + # is on loopback and therefore type LOCAL). + for rule in \ + "-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ + "-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ + "-p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }}" \ + ; do + if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then + iptables -t nat -I PREROUTING 1 $rule + echo "added: $rule" + else + echo "exists: $rule" + fi + done + executable: /bin/bash + register: dnat_result + changed_when: "'added' in dnat_result.stdout" tags: [inbound] - - name: Add DNAT for gossip TCP - ansible.builtin.iptables: - table: nat - chain: PREROUTING - protocol: tcp - destination: "{{ ashburn_ip }}" - destination_port: "{{ gossip_port }}" - jump: DNAT - to_destination: "{{ kind_node_ip }}:{{ gossip_port }}" - tags: [inbound] - - - name: Add DNAT for dynamic ports (UDP 9000-9025) - ansible.builtin.iptables: - table: nat - chain: PREROUTING - protocol: udp - destination: "{{ ashburn_ip }}" - destination_port: "{{ dynamic_port_range_start }}:{{ dynamic_port_range_end }}" - jump: DNAT - to_destination: "{{ kind_node_ip }}" + - name: Show DNAT result + ansible.builtin.debug: + var: dnat_result.stdout_lines tags: [inbound] # ------------------------------------------------------------------ # Outbound: fwmark + SNAT + policy routing # ------------------------------------------------------------------ - - name: Mark outbound validator UDP gossip traffic - ansible.builtin.iptables: - table: mangle - chain: PREROUTING - protocol: udp - source: "{{ kind_network }}" - source_port: "{{ gossip_port }}" - jump: MARK - set_mark: "{{ fwmark }}" + - name: Mark outbound validator traffic (mangle PREROUTING) + ansible.builtin.shell: + cmd: | + set -o pipefail + for rule in \ + "-p udp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \ + "-p udp -s {{ kind_network }} --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }}" \ + "-p tcp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \ + ; do + if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then + iptables -t mangle -A PREROUTING $rule + echo "added: $rule" + else + echo "exists: $rule" + fi + done + executable: /bin/bash + register: mangle_result + changed_when: "'added' in mangle_result.stdout" tags: [outbound] - - name: Mark outbound validator UDP dynamic port traffic - ansible.builtin.iptables: - table: mangle - chain: PREROUTING - protocol: udp - source: "{{ kind_network }}" - source_port: "{{ dynamic_port_range_start }}:{{ dynamic_port_range_end }}" - jump: MARK - set_mark: "{{ fwmark }}" - tags: [outbound] - - - name: Mark outbound validator TCP gossip traffic - ansible.builtin.iptables: - table: mangle - chain: PREROUTING - protocol: tcp - source: "{{ kind_network }}" - source_port: "{{ gossip_port }}" - jump: MARK - set_mark: "{{ fwmark }}" + - name: Show mangle result + ansible.builtin.debug: + var: mangle_result.stdout_lines tags: [outbound] - name: SNAT marked traffic to Ashburn IP (before Docker MASQUERADE) @@ -337,7 +326,7 @@ nat_rules: "{{ nat_rules.stdout_lines }}" mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}" routing: "{{ routing_info.stdout_lines | default([]) }}" - loopback: "{{ lo_addrs.stdout_lines }}" + loopback: "{{ lo_addrs.stdout_lines | default([]) }}" tags: [inbound, outbound] - name: Summary diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml index 6af443ad..76e08082 100644 --- a/playbooks/ashburn-relay-mia-sw01.yml +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -1,14 +1,19 @@ --- -# Configure laconic-mia-sw01 for outbound validator traffic redirect +# Configure laconic-mia-sw01 for validator traffic relay (inbound + outbound) # -# Redirects outbound traffic from biscayne (src 137.239.194.65) arriving -# via the doublezero0 GRE tunnel to was-sw01 via the backbone, preventing -# BCP38 drops at mia-sw01's ISP uplink. +# Outbound: Redirects outbound traffic from biscayne (src 137.239.194.65) +# arriving via the doublezero0 GRE tunnel to was-sw01 via the backbone, +# preventing BCP38 drops at mia-sw01's ISP uplink. +# +# Inbound: Routes traffic destined to 137.239.194.65 from the default VRF +# to biscayne via Tunnel500 in vrf1. Without this, mia-sw01 sends +# 137.239.194.65 out the ISP uplink back to was-sw01 (routing loop). # # Approach: The existing per-tunnel ACL (SEC-USER-500-IN) controls what # traffic enters vrf1 from Tunnel500. We add 137.239.194.65 to the ACL # and add a default route in vrf1 via egress-vrf default pointing to -# was-sw01's backbone IP. No PBR needed — the ACL is the filter. +# was-sw01's backbone IP. For inbound, an inter-VRF static route in the +# default VRF forwards 137.239.194.65/32 to biscayne via Tunnel500. # # The other vrf1 tunnels (502, 504, 505) have their own ACLs that only # permit their specific source IPs, so the default route won't affect them. @@ -39,6 +44,7 @@ tunnel_interface: Tunnel500 tunnel_vrf: vrf1 tunnel_acl: SEC-USER-500-IN + tunnel_nexthop: 169.254.7.7 # biscayne's end of the Tunnel500 /31 backbone_interface: Ethernet4/1 session_name: validator-outbound checkpoint_name: pre-validator-outbound @@ -117,6 +123,7 @@ - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" - "show ip route vrf {{ tunnel_vrf }} {{ backbone_peer }}" - "show ip route {{ backbone_peer }}" + - "show ip route {{ ashburn_ip }}" register: vrf_routing tags: [preflight] @@ -163,6 +170,11 @@ # Default route in vrf1 via backbone to was-sw01 (egress-vrf default) # Safe because per-tunnel ACLs already restrict what enters vrf1 - command: "ip route vrf {{ tunnel_vrf }} 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }}" + # Inbound: route traffic for ashburn IP from default VRF to biscayne via tunnel. + # Without this, mia-sw01 sends 137.239.194.65 out the ISP uplink → routing loop. + # NOTE: nexthop only, no interface — EOS silently drops cross-VRF routes that + # specify a tunnel interface (accepts in config but never installs in RIB). + - command: "ip route {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} {{ tunnel_nexthop }}" - name: Show session diff arista.eos.eos_command: @@ -189,6 +201,7 @@ commands: - "show running-config | section ip access-list {{ tunnel_acl }}" - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" + - "show ip route {{ ashburn_ip }}" register: verify - name: Display verification @@ -205,6 +218,7 @@ Changes applied: 1. ACL {{ tunnel_acl }}: added "45 permit ip host {{ ashburn_ip }} any" 2. Default route in {{ tunnel_vrf }}: 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }} + 3. Inbound route: {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} {{ tunnel_nexthop }} The config will auto-revert in 5 minutes unless committed. Verify on the switch, then commit: diff --git a/playbooks/ashburn-relay-was-sw01.yml b/playbooks/ashburn-relay-was-sw01.yml index 1566fb0a..7e727873 100644 --- a/playbooks/ashburn-relay-was-sw01.yml +++ b/playbooks/ashburn-relay-was-sw01.yml @@ -1,15 +1,20 @@ --- -# Configure laconic-was-sw01 for full validator traffic relay +# Configure laconic-was-sw01 for inbound validator traffic relay # -# Replaces the old SHRED-RELAY (TVU-only, port 20000) with VALIDATOR-RELAY -# covering all validator ports (8001, 9000-9025). Adds Loopback101 for -# 137.239.194.65. +# Routes all traffic destined to 137.239.194.65 to mia-sw01 via backbone. +# A single static route replaces the previous Loopback101 + PBR approach. # -# Uses EOS config session with 5-minute auto-revert for safety. -# After verification, run with -e commit=true to finalize. +# 137.239.194.65 is already routed to was-sw01 by its covering prefix +# (advertised via IS-IS on Loopback100). No loopback needed — the static +# route forwards traffic before the switch tries to deliver it locally. +# +# This playbook also removes the old PBR config if present (Loopback101, +# VALIDATOR-RELAY-ACL, VALIDATOR-RELAY-CLASS, VALIDATOR-RELAY policy-map, +# service-policy on Et1/1). # # Usage: # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml +# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e apply=true # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e commit=true # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true @@ -19,10 +24,11 @@ vars: ashburn_ip: 137.239.194.65 + apply: false commit: false rollback: false - session_name: validator-relay - checkpoint_name: pre-validator-relay + session_name: validator-relay-v2 + checkpoint_name: pre-validator-relay-v2 tasks: # ------------------------------------------------------------------ @@ -66,77 +72,78 @@ ansible.builtin.meta: end_play # ------------------------------------------------------------------ - # Pre-checks + # Pre-flight checks # ------------------------------------------------------------------ - - name: Show current traffic-policy on Et1/1 + - name: Show current Et1/1 config arista.eos.eos_command: commands: - show running-config interfaces Ethernet1/1 register: et1_config + tags: [preflight] - - name: Show current config + - name: Display Et1/1 config ansible.builtin.debug: var: et1_config.stdout_lines + tags: [preflight] - - name: Show existing PBR policy on Et1/1 + - name: Check for existing Loopback101 and PBR arista.eos.eos_command: commands: + - "show running-config interfaces Loopback101" - "show running-config | include service-policy" - register: existing_pbr + - "show running-config section policy-map type pbr" + - "show ip route {{ ashburn_ip }}" + register: existing_config + tags: [preflight] - - name: Show existing PBR config + - name: Display existing config ansible.builtin.debug: - var: existing_pbr.stdout_lines + var: existing_config.stdout_lines + tags: [preflight] + + - name: Pre-flight summary + when: not (apply | bool) + ansible.builtin.debug: + msg: | + === Pre-flight complete === + Review the output above: + 1. Does Loopback101 exist with {{ ashburn_ip }}? (will be removed) + 2. Is service-policy VALIDATOR-RELAY on Et1/1? (will be removed) + 3. Current route for {{ ashburn_ip }} + + To apply config: + ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml \ + -e apply=true + tags: [preflight] + + - name: End play if not applying + when: not (apply | bool) + ansible.builtin.meta: end_play # ------------------------------------------------------------------ - # Save checkpoint + # Apply config via session with 5-minute auto-revert # ------------------------------------------------------------------ - - name: Save checkpoint for rollback + - name: Save checkpoint arista.eos.eos_command: commands: - "configure checkpoint save {{ checkpoint_name }}" - register: checkpoint_result - - name: Show checkpoint result - ansible.builtin.debug: - var: checkpoint_result.stdout_lines - - # ------------------------------------------------------------------ - # Apply via config session with 5-minute auto-revert - # - # eos_config writes directly to running-config, bypassing sessions. - # Use eos_command with raw CLI to get the safety net. - # ------------------------------------------------------------------ - - name: Apply config session with auto-revert + - name: Apply config session arista.eos.eos_command: commands: - # Enter named config session - command: "configure session {{ session_name }}" - # Loopback101 for Ashburn IP - - command: interface Loopback101 - - command: "ip address {{ ashburn_ip }}/32" - - command: exit - # ACL covering all validator ports - - command: ip access-list VALIDATOR-RELAY-ACL - - command: 10 permit udp any any eq 8001 - - command: 20 permit udp any any range 9000 9025 - - command: 30 permit tcp any any eq 8001 - - command: exit - # PBR class-map referencing the ACL - - command: class-map type pbr match-any VALIDATOR-RELAY-CLASS - - command: match ip access-group VALIDATOR-RELAY-ACL - - command: exit - # PBR policy-map with nexthop redirect - - command: policy-map type pbr VALIDATOR-RELAY - - command: class VALIDATOR-RELAY-CLASS - - command: "set nexthop {{ backbone_peer }}" - - command: exit - - command: exit - # Apply PBR policy on Et1/1 + # Remove old PBR service-policy from Et1/1 - command: interface Ethernet1/1 - - command: service-policy type pbr input VALIDATOR-RELAY + - command: no service-policy type pbr input VALIDATOR-RELAY - command: exit - tags: [config] + # Remove old PBR policy-map, class-map, ACL + - command: no policy-map type pbr VALIDATOR-RELAY + - command: no class-map type pbr match-any VALIDATOR-RELAY-CLASS + - command: no ip access-list VALIDATOR-RELAY-ACL + # Remove Loopback101 + - command: no interface Loopback101 + # Add static route to forward all traffic for ashburn IP to mia-sw01 + - command: "ip route {{ ashburn_ip }}/32 {{ backbone_peer }}" - name: Show session diff arista.eos.eos_command: @@ -154,32 +161,20 @@ arista.eos.eos_command: commands: - "configure session {{ session_name }} commit timer 00:05:00" - tags: [config] # ------------------------------------------------------------------ # Verify # ------------------------------------------------------------------ - - name: Show PBR policy on Et1/1 + - name: Verify config arista.eos.eos_command: commands: + - "show ip route {{ ashburn_ip }}" - show running-config interfaces Ethernet1/1 - - show running-config section policy-map - - show ip interface Loopback101 - register: pbr_interface + register: verify - name: Display verification ansible.builtin.debug: - var: pbr_interface.stdout_lines - - - name: Show Loopback101 - arista.eos.eos_command: - commands: - - show ip interface Loopback101 - register: lo101 - - - name: Display Loopback101 - ansible.builtin.debug: - var: lo101.stdout_lines + var: verify.stdout_lines - name: Reminder ansible.builtin.debug: @@ -188,8 +183,12 @@ Session: {{ session_name }} Checkpoint: {{ checkpoint_name }} + Changes applied: + 1. Removed: Loopback101, VALIDATOR-RELAY PBR (ACL, class-map, policy-map, service-policy) + 2. Added: ip route {{ ashburn_ip }}/32 {{ backbone_peer }} + The config will auto-revert in 5 minutes unless committed. - Verify PBR policy is applied, then commit from the switch CLI: + Verify on the switch, then commit: configure session {{ session_name }} commit write memory diff --git a/playbooks/biscayne-boot.yml b/playbooks/biscayne-boot.yml new file mode 100644 index 00000000..2cdd5cad --- /dev/null +++ b/playbooks/biscayne-boot.yml @@ -0,0 +1,107 @@ +--- +# Configure biscayne OS-level services for agave validator +# +# Installs a systemd unit that formats and mounts the ramdisk on boot. +# /dev/ram0 loses its filesystem on reboot, so mkfs.xfs must run before +# the fstab mount. This unit runs before docker, ensuring the kind node's +# bind mounts always see the ramdisk. +# +# This playbook is idempotent — safe to run multiple times. +# +# Usage: +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-boot.yml +# +- name: Configure OS-level services for agave + hosts: all + gather_facts: false + become: true + vars: + ramdisk_device: /dev/ram0 + ramdisk_mount: /srv/solana/ramdisk + accounts_dir: /srv/solana/ramdisk/accounts + + tasks: + - name: Install ramdisk format service + copy: + dest: /etc/systemd/system/format-ramdisk.service + mode: "0644" + content: | + [Unit] + Description=Format /dev/ram0 as XFS for Solana accounts + DefaultDependencies=no + Before=local-fs.target + After=systemd-modules-load.service + ConditionPathExists={{ ramdisk_device }} + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/sbin/mkfs.xfs -f {{ ramdisk_device }} + + [Install] + WantedBy=local-fs.target + register: unit_file + + - name: Install ramdisk post-mount service + copy: + dest: /etc/systemd/system/ramdisk-accounts.service + mode: "0644" + content: | + [Unit] + Description=Create Solana accounts directory on ramdisk + After=srv-solana-ramdisk.mount + Requires=srv-solana-ramdisk.mount + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/bin/bash -c 'mkdir -p {{ accounts_dir }} && chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }}' + + [Install] + WantedBy=multi-user.target + register: accounts_unit + + - name: Ensure fstab entry uses nofail + lineinfile: + path: /etc/fstab + regexp: '^{{ ramdisk_device }}\s+{{ ramdisk_mount }}' + line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' + register: fstab_entry + + - name: Reload systemd + systemd: + daemon_reload: true + when: unit_file.changed or accounts_unit.changed or fstab_entry.changed + + - name: Enable ramdisk services + systemd: + name: "{{ item }}" + enabled: true + loop: + - format-ramdisk.service + - ramdisk-accounts.service + + # ---- apply now if ramdisk not mounted ------------------------------------ + - name: Check if ramdisk is mounted + command: mountpoint -q {{ ramdisk_mount }} + register: ramdisk_mounted + failed_when: false + changed_when: false + + - name: Format and mount ramdisk now + shell: | + mkfs.xfs -f {{ ramdisk_device }} + mount {{ ramdisk_mount }} + mkdir -p {{ accounts_dir }} + chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + when: ramdisk_mounted.rc != 0 + + # ---- verify -------------------------------------------------------------- + - name: Verify ramdisk + command: df -hT {{ ramdisk_mount }} + register: ramdisk_df + changed_when: false + + - name: Show ramdisk status + debug: + msg: "{{ ramdisk_df.stdout_lines }}" diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml new file mode 100644 index 00000000..fec21a39 --- /dev/null +++ b/playbooks/biscayne-recover.yml @@ -0,0 +1,220 @@ +--- +# Recover agave validator from any state to healthy +# +# This playbook is idempotent — it assesses current state and picks up +# from wherever the system is. Each step checks its precondition and +# skips if already satisfied. +# +# Steps: +# 1. Scale deployment to 0 +# 2. Wait for pods to terminate +# 3. Wipe accounts ramdisk +# 4. Clean old snapshots +# 5. Download fresh snapshot via aria2c +# 6. Verify snapshot accessible via PV (kubectl) +# 7. Scale deployment to 1 +# 8. Wait for pod Running +# 9. Verify validator log shows snapshot unpacking +# 10. Check RPC health +# +# Usage: +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-recover.yml +# +# # Pass extra args to snapshot-download.py +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-recover.yml \ +# -e 'snapshot_args=--version 2.2' +# +- name: Recover agave validator + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + deployment_name: "{{ kind_cluster }}-deployment" + snapshot_dir: /srv/solana/snapshots + accounts_dir: /srv/solana/ramdisk/accounts + ramdisk_mount: /srv/solana/ramdisk + ramdisk_device: /dev/ram0 + snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" + snapshot_script: /tmp/snapshot-download.py + snapshot_args: "" + # Mainnet RPC for slot comparison + mainnet_rpc: https://api.mainnet-beta.solana.com + # Maximum slots behind before snapshot is considered stale + max_slot_lag: 20000 + + tasks: + # ---- step 1: scale to 0 --------------------------------------------------- + - name: Get current replica count + command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.spec.replicas}' + register: current_replicas + failed_when: false + changed_when: false + + - name: Scale deployment to 0 + command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=0 + when: current_replicas.stdout | default('0') | int > 0 + changed_when: true + + # ---- step 2: wait for pods to terminate ------------------------------------ + - name: Wait for pods to terminate + command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items}' + register: pods_remaining + retries: 60 + delay: 5 + until: pods_remaining.stdout == "[]" or pods_remaining.stdout == "" + changed_when: false + when: current_replicas.stdout | default('0') | int > 0 + + - name: Verify no agave processes in kind node (io_uring safety check) + command: > + docker exec {{ kind_cluster }}-control-plane + pgrep -c agave-validator + register: agave_procs + failed_when: false + changed_when: false + + - name: Fail if agave zombie detected + ansible.builtin.fail: + msg: >- + agave-validator process still running inside kind node after pod + termination. This is the io_uring/ZFS deadlock. Do NOT proceed — + host reboot required. See CLAUDE.md. + when: agave_procs.rc == 0 + + # ---- step 3: wipe accounts ramdisk ----------------------------------------- + # Cannot umount+mkfs because the kind node's bind mount holds it open. + # Instead, delete contents. This is sufficient — agave starts clean. + - name: Wipe accounts data + ansible.builtin.shell: | + rm -rf {{ accounts_dir }}/* + chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + become: true + changed_when: true + + # ---- step 4: clean old snapshots ------------------------------------------- + - name: Remove all old snapshots + ansible.builtin.shell: rm -f {{ snapshot_dir }}/*.tar.* {{ snapshot_dir }}/*.tar + become: true + changed_when: true + + # ---- step 5: download fresh snapshot --------------------------------------- + - name: Verify aria2c installed + command: which aria2c + changed_when: false + + - name: Copy snapshot script to remote + ansible.builtin.copy: + src: "{{ snapshot_script_local }}" + dest: "{{ snapshot_script }}" + mode: "0755" + + - name: Download snapshot and scale to 1 + ansible.builtin.shell: | + python3 {{ snapshot_script }} \ + -o {{ snapshot_dir }} \ + --max-snapshot-age {{ max_slot_lag }} \ + --max-latency 500 \ + {{ snapshot_args }} \ + && KUBECONFIG=/home/rix/.kube/config kubectl scale deployment \ + {{ deployment_name }} -n {{ k8s_namespace }} --replicas=1 + become: true + register: snapshot_result + timeout: 3600 + changed_when: true + + # ---- step 6: verify snapshot accessible via PV ----------------------------- + - name: Get snapshot filename + ansible.builtin.shell: ls -1 {{ snapshot_dir }}/snapshot-*.tar.* | head -1 | xargs basename + register: snapshot_filename + changed_when: false + + - name: Extract snapshot slot from filename + ansible.builtin.set_fact: + snapshot_slot: "{{ snapshot_filename.stdout | regex_search('snapshot-([0-9]+)-', '\\1') | first }}" + + - name: Get current mainnet slot + ansible.builtin.uri: + url: "{{ mainnet_rpc }}" + method: POST + body_format: json + body: + jsonrpc: "2.0" + id: 1 + method: getSlot + params: + - commitment: finalized + return_content: true + register: mainnet_slot_response + + - name: Check snapshot freshness + ansible.builtin.fail: + msg: >- + Snapshot too old: slot {{ snapshot_slot }}, mainnet at + {{ mainnet_slot_response.json.result }}, + {{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind + (max {{ max_slot_lag }}). + when: (mainnet_slot_response.json.result | int - snapshot_slot | int) > max_slot_lag + + - name: Report snapshot freshness + ansible.builtin.debug: + msg: >- + Snapshot slot {{ snapshot_slot }}, mainnet {{ mainnet_slot_response.json.result }}, + {{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind. + + # ---- step 7: scale already done in download step above ---------------------- + + # ---- step 8: wait for pod running ------------------------------------------ + - name: Wait for pod to be running + command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items[0].status.phase}' + register: pod_status + retries: 60 + delay: 10 + until: pod_status.stdout == "Running" + changed_when: false + + # ---- step 9: verify validator log ------------------------------------------ + - name: Wait for validator log file + command: > + kubectl exec -n {{ k8s_namespace }} + deployment/{{ deployment_name }} + -c agave-validator -- test -f /data/log/validator.log + register: log_file_check + retries: 12 + delay: 10 + until: log_file_check.rc == 0 + changed_when: false + + # ---- step 10: check RPC health --------------------------------------------- + - name: Check RPC health (non-blocking) + ansible.builtin.uri: + url: http://{{ inventory_hostname }}:8899/health + return_content: true + register: rpc_health + retries: 6 + delay: 30 + until: rpc_health.status == 200 + failed_when: false + + - name: Report final status + ansible.builtin.debug: + msg: >- + Recovery complete. + Snapshot: slot {{ snapshot_slot }} + ({{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind). + Pod: {{ pod_status.stdout }}. + Log: {{ 'writing' if log_file_check.rc == 0 else 'not yet' }}. + RPC: {{ rpc_health.content | default('not yet responding — still catching up') }}. diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml new file mode 100644 index 00000000..a270f4eb --- /dev/null +++ b/playbooks/biscayne-redeploy.yml @@ -0,0 +1,321 @@ +--- +# Redeploy agave-stack on biscayne with aria2c snapshot pre-download +# +# The validator's built-in downloader fetches snapshots at ~18 MB/s (single +# connection). snapshot-download.py uses aria2c with 16 parallel connections to +# saturate available bandwidth, cutting 90+ min downloads to ~10 min. +# +# Flow: +# 1. [teardown] Delete k8s namespace (preserve kind cluster) +# 2. [wipe] Conditionally clear ledger / accounts / old snapshots +# 3. [deploy] laconic-so deployment start, then immediately scale to 0 +# 4. [snapshot] Download snapshot via aria2c to host bind mount +# 5. [snapshot] Verify snapshot visible inside kind node +# 6. [deploy] Scale validator back to 1 +# 7. [verify] Wait for pod Running, check logs + RPC health +# +# The validator cannot run during snapshot download — it would lock/use the +# snapshot files. laconic-so creates the cluster AND deploys the pod in one +# shot, so we scale to 0 immediately after deploy, download, then scale to 1. +# +# Usage: +# # Standard redeploy (download snapshot, preserve accounts + ledger) +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml +# +# # Full wipe (accounts + ledger) — slow rebuild +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# -e wipe_accounts=true -e wipe_ledger=true +# +# # Skip snapshot download (use existing) +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# -e skip_snapshot=true +# +# # Pass extra args to snapshot-download.py +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# -e 'snapshot_args=--version 2.2 --min-download-speed 50' +# +# # Snapshot only (no teardown/deploy) +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# --tags snapshot +# +- name: Redeploy agave validator on biscayne + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + deployment_dir: /srv/deployments/agave + laconic_so: /home/rix/.local/bin/laconic-so + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + deployment_name: "{{ kind_cluster }}-deployment" + snapshot_dir: /srv/solana/snapshots + ledger_dir: /srv/solana/ledger + accounts_dir: /srv/solana/ramdisk/accounts + ramdisk_mount: /srv/solana/ramdisk + ramdisk_device: /dev/ram0 + snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" + snapshot_script: /tmp/snapshot-download.py + # Flags — non-destructive by default + wipe_accounts: false + wipe_ledger: false + skip_snapshot: false + snapshot_args: "" + + tasks: + # ---- teardown: graceful stop, then delete namespace ---------------------- + # + # IMPORTANT: Scale to 0 first, wait for agave to exit cleanly. + # Deleting the namespace while agave is running causes io_uring/ZFS + # deadlock (unkillable D-state threads). See CLAUDE.md. + - name: Scale deployment to 0 (graceful stop) + command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=0 + register: pre_teardown_scale + failed_when: false + tags: [teardown] + + - name: Wait for agave to exit + command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items}' + register: pre_teardown_pods + retries: 60 + delay: 5 + until: pre_teardown_pods.stdout == "[]" or pre_teardown_pods.stdout == "" or pre_teardown_pods.rc != 0 + failed_when: false + when: pre_teardown_scale.rc == 0 + tags: [teardown] + + - name: Delete deployment namespace + command: > + kubectl delete namespace {{ k8s_namespace }} --timeout=120s + register: ns_delete + failed_when: false + tags: [teardown] + + - name: Wait for namespace to terminate + command: > + kubectl get namespace {{ k8s_namespace }} + -o jsonpath='{.status.phase}' + register: ns_status + retries: 30 + delay: 5 + until: ns_status.rc != 0 + failed_when: false + when: ns_delete.rc == 0 + tags: [teardown] + + # ---- wipe: opt-in data cleanup ------------------------------------------ + - name: Wipe ledger data + shell: rm -rf {{ ledger_dir }}/* + become: true + when: wipe_ledger | bool + tags: [wipe] + + - name: Wipe accounts ramdisk (umount + mkfs.xfs + mount) + shell: | + mountpoint -q {{ ramdisk_mount }} && umount {{ ramdisk_mount }} || true + mkfs.xfs -f {{ ramdisk_device }} + mount {{ ramdisk_mount }} + mkdir -p {{ accounts_dir }} + chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + become: true + when: wipe_accounts | bool + tags: [wipe] + + - name: Clean old snapshots (keep newest full + incremental) + shell: | + cd {{ snapshot_dir }} || exit 0 + newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1) + if [ -n "$newest" ]; then + newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1) + find . -maxdepth 1 -name '*.tar.*' \ + ! -name "$newest" \ + ! -name "${newest_inc:-__none__}" \ + -delete + fi + become: true + when: not skip_snapshot | bool + tags: [wipe] + + # ---- preflight: verify ramdisk and mounts before deploy ------------------ + - name: Verify ramdisk is mounted + command: mountpoint -q {{ ramdisk_mount }} + register: ramdisk_check + failed_when: ramdisk_check.rc != 0 + changed_when: false + tags: [deploy, preflight] + + - name: Verify ramdisk is xfs (not the underlying ZFS) + shell: df -T {{ ramdisk_mount }} | grep -q xfs + register: ramdisk_type + failed_when: ramdisk_type.rc != 0 + changed_when: false + tags: [deploy, preflight] + + - name: Verify ramdisk visible inside kind node + shell: > + docker exec {{ kind_cluster }}-control-plane + df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs + register: kind_ramdisk_check + failed_when: kind_ramdisk_check.rc != 0 + changed_when: false + tags: [deploy, preflight] + + # ---- deploy: bring up cluster, scale to 0 immediately ------------------- + - name: Verify kind-config.yml has unified mount root + command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" + register: mount_root_check + failed_when: mount_root_check.stdout | int < 1 + tags: [deploy] + + - name: Start deployment (creates kind cluster + deploys pod) + command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" + timeout: 1200 + tags: [deploy] + + - name: Wait for deployment to exist + command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.metadata.name}' + register: deploy_exists + retries: 30 + delay: 10 + until: deploy_exists.rc == 0 + tags: [deploy] + + - name: Scale validator to 0 (stop before snapshot download) + command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=0 + tags: [deploy] + + - name: Wait for pods to terminate + command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items}' + register: pods_gone + retries: 30 + delay: 5 + until: pods_gone.stdout == "[]" or pods_gone.stdout == "" + failed_when: false + tags: [deploy] + + # ---- snapshot: download via aria2c, verify in kind node ------------------ + - name: Verify aria2c installed + command: which aria2c + changed_when: false + when: not skip_snapshot | bool + tags: [snapshot] + + - name: Copy snapshot script to remote + copy: + src: "{{ snapshot_script_local }}" + dest: "{{ snapshot_script }}" + mode: "0755" + when: not skip_snapshot | bool + tags: [snapshot] + + - name: Verify kind node mounts + command: > + docker exec {{ kind_cluster }}-control-plane + ls /mnt/solana/snapshots/ + register: kind_mount_check + tags: [snapshot] + + - name: Download snapshot via aria2c + shell: > + python3 {{ snapshot_script }} + -o {{ snapshot_dir }} + {{ snapshot_args }} + become: true + register: snapshot_result + when: not skip_snapshot | bool + timeout: 3600 + tags: [snapshot] + + - name: Show snapshot download result + debug: + msg: "{{ snapshot_result.stdout_lines | default(['skipped']) }}" + tags: [snapshot] + + - name: Verify snapshot visible inside kind node + shell: > + docker exec {{ kind_cluster }}-control-plane + ls -lhS /mnt/solana/snapshots/*.tar.* 2>/dev/null | head -5 + register: kind_snapshot_check + failed_when: kind_snapshot_check.stdout == "" + when: not skip_snapshot | bool + tags: [snapshot] + + - name: Show snapshot files in kind node + debug: + msg: "{{ kind_snapshot_check.stdout_lines | default(['skipped']) }}" + when: not skip_snapshot | bool + tags: [snapshot] + + # ---- deploy (cont): scale validator back up with snapshot ---------------- + - name: Scale validator to 1 (start with downloaded snapshot) + command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=1 + tags: [deploy] + + # ---- verify: confirm validator is running -------------------------------- + - name: Wait for pod to be running + command: > + kubectl get pods -n {{ k8s_namespace }} + -o jsonpath='{.items[0].status.phase}' + register: pod_status + retries: 60 + delay: 10 + until: pod_status.stdout == "Running" + tags: [verify] + + - name: Verify unified mount inside kind node + command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" + register: mount_check + tags: [verify] + + - name: Show mount contents + debug: + msg: "{{ mount_check.stdout_lines }}" + tags: [verify] + + - name: Check validator log file is being written + command: > + kubectl exec -n {{ k8s_namespace }} + deployment/{{ deployment_name }} + -c agave-validator -- test -f /data/log/validator.log + retries: 12 + delay: 10 + until: log_file_check.rc == 0 + register: log_file_check + failed_when: false + tags: [verify] + + - name: Check RPC health + uri: + url: http://127.0.0.1:8899/health + return_content: true + register: rpc_health + retries: 6 + delay: 10 + until: rpc_health.status == 200 + failed_when: false + delegate_to: "{{ inventory_hostname }}" + tags: [verify] + + - name: Report status + debug: + msg: >- + Deployment complete. + Log: {{ 'writing' if log_file_check.rc == 0 else 'not yet created' }}. + RPC: {{ rpc_health.content | default('not responding') }}. + Wiped: ledger={{ wipe_ledger }}, accounts={{ wipe_accounts }}. + tags: [verify] diff --git a/playbooks/biscayne-stop.yml b/playbooks/biscayne-stop.yml new file mode 100644 index 00000000..2550f5a9 --- /dev/null +++ b/playbooks/biscayne-stop.yml @@ -0,0 +1,106 @@ +--- +# Graceful shutdown of agave validator on biscayne +# +# Scales the deployment to 0 and waits for the pod to terminate. +# This MUST be done before any kind node restart, host reboot, +# or docker operations. +# +# The agave validator uses io_uring for async I/O. On ZFS, killing +# the process ungracefully (SIGKILL, docker kill, etc.) can produce +# unkillable kernel threads stuck in io_wq_put_and_exit, deadlocking +# the container's PID namespace. A graceful SIGTERM via k8s scale-down +# allows agave to flush and close its io_uring contexts cleanly. +# +# Usage: +# # Stop the validator +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-stop.yml +# +# # Stop and restart kind node (LAST RESORT — e.g., broken namespace) +# # Normally unnecessary: mount propagation means ramdisk/ZFS changes +# # are visible in the kind node without restarting it. +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-stop.yml \ +# -e restart_kind=true +# +- name: Graceful validator shutdown + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + deployment_name: "{{ kind_cluster }}-deployment" + restart_kind: false + + tasks: + - name: Get current replica count + command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.spec.replicas}' + register: current_replicas + failed_when: false + changed_when: false + + - name: Scale deployment to 0 + command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=0 + when: current_replicas.stdout | default('0') | int > 0 + + - name: Wait for pods to terminate + command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items}' + register: pods_gone + retries: 60 + delay: 5 + until: pods_gone.stdout == "[]" or pods_gone.stdout == "" + when: current_replicas.stdout | default('0') | int > 0 + + - name: Verify no agave processes in kind node + command: > + docker exec {{ kind_cluster }}-control-plane + pgrep -c agave-validator + register: agave_procs + failed_when: false + changed_when: false + + - name: Fail if agave still running + fail: + msg: >- + agave-validator process still running inside kind node after + pod termination. Do NOT restart the kind node — investigate + first to avoid io_uring/ZFS deadlock. + when: agave_procs.rc == 0 + + - name: Report stopped + debug: + msg: >- + Validator stopped. Replicas: {{ current_replicas.stdout | default('0') }} -> 0. + No agave processes detected in kind node. + when: not restart_kind | bool + + # ---- optional: restart kind node ----------------------------------------- + - name: Restart kind node + command: docker restart {{ kind_cluster }}-control-plane + when: restart_kind | bool + timeout: 120 + + - name: Wait for kind node ready + command: > + kubectl get node {{ kind_cluster }}-control-plane + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' + register: node_ready + retries: 30 + delay: 10 + until: node_ready.stdout == "True" + when: restart_kind | bool + + - name: Report restarted + debug: + msg: >- + Kind node restarted and ready. + Deployment at 0 replicas — scale up when ready. + when: restart_kind | bool diff --git a/playbooks/connect-doublezero-multicast.yml b/playbooks/connect-doublezero-multicast.yml new file mode 100644 index 00000000..1c620b6a --- /dev/null +++ b/playbooks/connect-doublezero-multicast.yml @@ -0,0 +1,134 @@ +--- +# Connect biscayne to DoubleZero multicast via laconic-mia-sw01 +# +# Establishes a GRE tunnel to the nearest DZ hybrid device and subscribes +# to jito-shredstream and bebop multicast groups. +# +# Usage: +# ansible-playbook playbooks/connect-doublezero-multicast.yml +# ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run + +- name: Connect biscayne to DoubleZero multicast + hosts: biscayne + gather_facts: false + + vars: + dz_multicast_groups: + - jito-shredstream + - bebop + + tasks: + # ------------------------------------------------------------------ + # Pre-checks + # ------------------------------------------------------------------ + - name: Verify doublezerod service is running + ansible.builtin.systemd: + name: doublezerod + state: started + check_mode: true + register: dz_service + failed_when: dz_service.status.ActiveState != "active" + + - name: Get doublezero identity address + ansible.builtin.command: + cmd: doublezero address + register: dz_address + changed_when: false + + - name: Verify doublezero identity matches expected pubkey + ansible.builtin.assert: + that: + - dz_address.stdout | trim == dz_identity + fail_msg: >- + DZ identity mismatch: got '{{ dz_address.stdout | trim }}', + expected '{{ dz_identity }}' + + - name: Check current DZ connection status + ansible.builtin.command: + cmd: "doublezero -e {{ dz_environment }} status" + register: dz_status + changed_when: false + failed_when: false + + - name: Fail if already connected (tunnel is up) + ansible.builtin.fail: + msg: >- + DoubleZero tunnel is already connected. To reconnect, first + disconnect manually with: doublezero -e {{ dz_environment }} disconnect + when: "'connected' in dz_status.stdout | lower" + + # ------------------------------------------------------------------ + # Create access pass + # ------------------------------------------------------------------ + - name: Create DZ access pass for multicast subscriber + ansible.builtin.command: + cmd: >- + doublezero -e {{ dz_environment }} access-pass set + --accesspass-type solana-multicast-subscriber + --client-ip {{ client_ip }} + --user-payer {{ dz_identity }} + --solana-validator {{ validator_identity }} + --tenant {{ dz_tenant }} + register: dz_access_pass + changed_when: "'created' in dz_access_pass.stdout | lower or 'updated' in dz_access_pass.stdout | lower" + + - name: Show access pass result + ansible.builtin.debug: + var: dz_access_pass.stdout_lines + + # ------------------------------------------------------------------ + # Connect to DZ multicast + # ------------------------------------------------------------------ + - name: Connect to DoubleZero multicast via {{ dz_device }} + ansible.builtin.command: + cmd: >- + doublezero -e {{ dz_environment }} connect multicast + {% for group in dz_multicast_groups %} + --subscribe {{ group }} + {% endfor %} + --device {{ dz_device }} + --client-ip {{ client_ip }} + register: dz_connect + changed_when: true + + - name: Show connect result + ansible.builtin.debug: + var: dz_connect.stdout_lines + + # ------------------------------------------------------------------ + # Post-checks + # ------------------------------------------------------------------ + - name: Verify tunnel status is connected + ansible.builtin.command: + cmd: "doublezero -e {{ dz_environment }} status" + register: dz_post_status + changed_when: false + failed_when: "'connected' not in dz_post_status.stdout | lower" + + - name: Show tunnel status + ansible.builtin.debug: + var: dz_post_status.stdout_lines + + - name: Verify routes are installed + ansible.builtin.command: + cmd: "doublezero -e {{ dz_environment }} routes" + register: dz_routes + changed_when: false + + - name: Show installed routes + ansible.builtin.debug: + var: dz_routes.stdout_lines + + - name: Check multicast group membership + ansible.builtin.command: + cmd: "doublezero -e {{ dz_environment }} status" + register: dz_multicast_status + changed_when: false + + - name: Connection summary + ansible.builtin.debug: + msg: >- + DoubleZero multicast connected via {{ dz_device }}. + Subscribed groups: {{ dz_multicast_groups | join(', ') }}. + Next step: request allowlist access from group owners + (see docs/doublezero-multicast-access.md). diff --git a/playbooks/files/ashburn-routing-ifup.sh b/playbooks/files/ashburn-routing-ifup.sh new file mode 100644 index 00000000..5c0fa13e --- /dev/null +++ b/playbooks/files/ashburn-routing-ifup.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# /etc/network/if-up.d/ashburn-routing +# Restore policy routing for Ashburn validator relay after reboot/interface up. +# Only act when doublezero0 comes up. + +[ "$IFACE" = "doublezero0" ] || exit 0 + +# Ensure rt_tables entry exists +grep -q '^100 ashburn$' /etc/iproute2/rt_tables || echo "100 ashburn" >> /etc/iproute2/rt_tables + +# Add policy rule (idempotent — ip rule skips duplicates silently on some kernels) +ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || ip rule add fwmark 100 table ashburn + +# Add default route via mia-sw01 through doublezero0 tunnel +ip route replace default via 169.254.7.6 dev doublezero0 table ashburn + +# Add Ashburn IP to loopback (idempotent) +ip addr show lo | grep -q '137.239.194.65' || ip addr add 137.239.194.65/32 dev lo diff --git a/playbooks/fix-pv-mounts.yml b/playbooks/fix-pv-mounts.yml new file mode 100644 index 00000000..f03f0e97 --- /dev/null +++ b/playbooks/fix-pv-mounts.yml @@ -0,0 +1,166 @@ +--- +# Verify PV hostPaths match expected kind-node paths, fix if wrong. +# +# Checks each PV's hostPath against the expected path derived from the +# spec.yml volume mapping through the kind extraMounts. If any PV has a +# wrong path, fails unless -e fix=true is passed. +# +# Does NOT touch the deployment. +# +# Usage: +# # Check only (fails if mounts are bad) +# ansible-playbook -i biscayne.vaasl.io, playbooks/fix-pv-mounts.yml +# +# # Fix stale PVs +# ansible-playbook -i biscayne.vaasl.io, playbooks/fix-pv-mounts.yml -e fix=true +# +- name: Verify and fix PV mount paths + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + fix: false + volumes: + - name: validator-snapshots + host_path: /mnt/solana/snapshots + capacity: 200Gi + - name: validator-ledger + host_path: /mnt/solana/ledger + capacity: 2Ti + - name: validator-accounts + host_path: /mnt/solana/ramdisk/accounts + capacity: 800Gi + - name: validator-log + host_path: /mnt/solana/log + capacity: 10Gi + + tasks: + - name: Read current PV hostPaths + command: > + kubectl get pv {{ kind_cluster }}-{{ item.name }} + -o jsonpath='{.spec.hostPath.path}' + register: current_paths + loop: "{{ volumes }}" + failed_when: false + changed_when: false + + - name: Build path comparison + set_fact: + path_mismatches: "{{ current_paths.results | selectattr('stdout', 'ne', '') | rejectattr('stdout', 'equalto', item.host_path) | list }}" + path_missing: "{{ current_paths.results | selectattr('stdout', 'equalto', '') | list }}" + loop: "{{ volumes }}" + loop_control: + label: "{{ item.name }}" + + - name: Show current vs expected paths + debug: + msg: >- + {{ item.item.name }}: + current={{ item.stdout if item.stdout else 'NOT FOUND' }} + expected={{ item.item.host_path }} + {{ 'OK' if item.stdout == item.item.host_path else 'NEEDS FIX' }} + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + + - name: Check for mismatched PVs + fail: + msg: >- + PV {{ item.item.name }} has wrong hostPath: + {{ item.stdout if item.stdout else 'NOT FOUND' }} + (expected {{ item.item.host_path }}). + Run with -e fix=true to delete and recreate. + when: item.stdout != item.item.host_path and not fix | bool + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + + # ---- Fix mode --------------------------------------------------------- + - name: Delete stale PVCs + command: > + kubectl delete pvc {{ kind_cluster }}-{{ item.item.name }} + -n {{ k8s_namespace }} --timeout=60s + when: fix | bool and item.stdout != item.item.host_path + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + failed_when: false + + - name: Delete stale PVs + command: > + kubectl delete pv {{ kind_cluster }}-{{ item.item.name }} + --timeout=60s + when: fix | bool and item.stdout != item.item.host_path + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + failed_when: false + + - name: Create PVs with correct hostPaths + command: > + kubectl apply -f - + args: + stdin: | + apiVersion: v1 + kind: PersistentVolume + metadata: + name: {{ kind_cluster }}-{{ item.item.name }} + spec: + capacity: + storage: {{ item.item.capacity }} + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: manual + hostPath: + path: {{ item.item.host_path }} + when: fix | bool and item.stdout != item.item.host_path + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + + - name: Create PVCs + command: > + kubectl apply -f - + args: + stdin: | + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: {{ kind_cluster }}-{{ item.item.name }} + namespace: {{ k8s_namespace }} + spec: + accessModes: + - ReadWriteOnce + storageClassName: manual + volumeName: {{ kind_cluster }}-{{ item.item.name }} + resources: + requests: + storage: {{ item.item.capacity }} + when: fix | bool and item.stdout != item.item.host_path + loop: "{{ current_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + + # ---- Final verify ----------------------------------------------------- + - name: Verify PV paths + command: > + kubectl get pv {{ kind_cluster }}-{{ item.name }} + -o jsonpath='{.spec.hostPath.path}' + register: final_paths + loop: "{{ volumes }}" + changed_when: false + when: fix | bool + + - name: Assert all PV paths correct + assert: + that: item.stdout == item.item.host_path + fail_msg: "{{ item.item.name }}: {{ item.stdout }} != {{ item.item.host_path }}" + success_msg: "{{ item.item.name }}: {{ item.stdout }} OK" + loop: "{{ final_paths.results }}" + loop_control: + label: "{{ item.item.name }}" + when: fix | bool diff --git a/playbooks/health-check.yml b/playbooks/health-check.yml new file mode 100644 index 00000000..326f1f35 --- /dev/null +++ b/playbooks/health-check.yml @@ -0,0 +1,340 @@ +--- +# Health check for biscayne agave-stack deployment +# +# Gathers system, validator, DoubleZero, and network status in a single run. +# All tasks are read-only — safe to run at any time. +# +# Usage: +# ansible-playbook playbooks/health-check.yml +# ansible-playbook playbooks/health-check.yml -t validator # just validator checks +# ansible-playbook playbooks/health-check.yml -t doublezero # just DZ checks +# ansible-playbook playbooks/health-check.yml -t network # just network checks + +- name: Biscayne agave-stack health check + hosts: biscayne + gather_facts: false + + tasks: + # ------------------------------------------------------------------ + # Discover kind cluster and namespace + # ------------------------------------------------------------------ + - name: Get kind cluster name + ansible.builtin.command: + cmd: kind get clusters + register: kind_clusters + changed_when: false + failed_when: kind_clusters.rc != 0 or kind_clusters.stdout_lines | length == 0 + + - name: Set cluster name fact + ansible.builtin.set_fact: + kind_cluster: "{{ kind_clusters.stdout_lines[0] }}" + + - name: Discover agave namespace + ansible.builtin.shell: + cmd: >- + set -o pipefail && + kubectl get namespaces --no-headers -o custom-columns=':metadata.name' + | grep '^laconic-' + executable: /bin/bash + register: ns_result + changed_when: false + failed_when: ns_result.stdout_lines | length == 0 + + - name: Set namespace fact + ansible.builtin.set_fact: + agave_ns: "{{ ns_result.stdout_lines[0] }}" + + - name: Get pod name + ansible.builtin.shell: + cmd: >- + set -o pipefail && + kubectl get pods -n {{ agave_ns }} --no-headers + -o custom-columns=':metadata.name' | head -1 + executable: /bin/bash + register: pod_result + changed_when: false + failed_when: pod_result.stdout | trim == '' + + - name: Set pod fact + ansible.builtin.set_fact: + agave_pod: "{{ pod_result.stdout | trim }}" + + - name: Show discovered resources + ansible.builtin.debug: + msg: "cluster={{ kind_cluster }} ns={{ agave_ns }} pod={{ agave_pod }}" + + # ------------------------------------------------------------------ + # Pod status + # ------------------------------------------------------------------ + - name: Get pod status + ansible.builtin.command: + cmd: kubectl get pods -n {{ agave_ns }} -o wide + register: pod_status + changed_when: false + tags: [validator] + + - name: Show pod status + ansible.builtin.debug: + var: pod_status.stdout_lines + tags: [validator] + + - name: Get container restart counts + ansible.builtin.shell: + cmd: >- + kubectl get pod {{ agave_pod }} -n {{ agave_ns }} + -o jsonpath='{range .status.containerStatuses[*]}{.name}{" restarts="}{.restartCount}{" ready="}{.ready}{"\n"}{end}' + register: restart_counts + changed_when: false + tags: [validator] + + - name: Show restart counts + ansible.builtin.debug: + var: restart_counts.stdout_lines + tags: [validator] + + # ------------------------------------------------------------------ + # Validator sync status + # ------------------------------------------------------------------ + - name: Get validator recent logs (replay progress) + ansible.builtin.command: + cmd: >- + kubectl logs -n {{ agave_ns }} {{ agave_pod }} + -c agave-validator --tail=30 + register: validator_logs + changed_when: false + tags: [validator] + + - name: Show validator logs + ansible.builtin.debug: + var: validator_logs.stdout_lines + tags: [validator] + + - name: Check RPC health endpoint + ansible.builtin.uri: + url: http://127.0.0.1:8899/health + method: GET + return_content: true + timeout: 5 + register: rpc_health + failed_when: false + tags: [validator] + + - name: Show RPC health + ansible.builtin.debug: + msg: "RPC health: {{ rpc_health.status | default('unreachable') }} — {{ rpc_health.content | default('no response') }}" + tags: [validator] + + - name: Get validator version + ansible.builtin.shell: + cmd: >- + kubectl exec -n {{ agave_ns }} {{ agave_pod }} + -c agave-validator -- agave-validator --version 2>&1 || true + register: validator_version + changed_when: false + tags: [validator] + + - name: Show validator version + ansible.builtin.debug: + var: validator_version.stdout + tags: [validator] + + # ------------------------------------------------------------------ + # DoubleZero status + # ------------------------------------------------------------------ + - name: Get host DZ identity + ansible.builtin.command: + cmd: sudo -u solana doublezero address + register: dz_address + changed_when: false + failed_when: false + tags: [doublezero] + + - name: Get host DZ tunnel status + ansible.builtin.command: + cmd: sudo -u solana doublezero -e {{ dz_environment }} status + register: dz_status + changed_when: false + failed_when: false + tags: [doublezero] + + - name: Get DZ routes + ansible.builtin.shell: + cmd: set -o pipefail && ip route | grep doublezero0 || echo "no doublezero0 routes" + executable: /bin/bash + register: dz_routes + changed_when: false + tags: [doublezero] + + - name: Get host doublezerod service state + ansible.builtin.systemd: + name: doublezerod + register: dz_systemd_info + failed_when: false + check_mode: true + tags: [doublezero] + + - name: Set DZ systemd state + ansible.builtin.set_fact: + dz_systemd_state: "{{ dz_systemd_info.status.ActiveState | default('unknown') }}" + tags: [doublezero] + + - name: Get container DZ status + ansible.builtin.shell: + cmd: >- + kubectl exec -n {{ agave_ns }} {{ agave_pod }} + -c doublezerod -- doublezero status 2>&1 || echo "container DZ unavailable" + register: dz_container_status + changed_when: false + tags: [doublezero] + + - name: Show DoubleZero status + ansible.builtin.debug: + msg: + identity: "{{ dz_address.stdout | default('unknown') }}" + host_tunnel: "{{ dz_status.stdout_lines | default(['unknown']) }}" + host_systemd: "{{ dz_systemd_state }}" + container: "{{ dz_container_status.stdout_lines | default(['unknown']) }}" + routes: "{{ dz_routes.stdout_lines | default([]) }}" + tags: [doublezero] + + # ------------------------------------------------------------------ + # Storage + # ------------------------------------------------------------------ + - name: Check ramdisk usage + ansible.builtin.command: + cmd: df -h /srv/solana/ramdisk + register: ramdisk_df + changed_when: false + failed_when: false + tags: [storage] + + - name: Check ZFS dataset usage + ansible.builtin.command: + cmd: zfs list -o name,used,avail,mountpoint -r biscayne/DATA + register: zfs_list + changed_when: false + tags: [storage] + + - name: Check ZFS zvol I/O + ansible.builtin.shell: + cmd: set -o pipefail && iostat -x zd0 1 2 | tail -3 + executable: /bin/bash + register: zvol_io + changed_when: false + failed_when: false + tags: [storage] + + - name: Show storage status + ansible.builtin.debug: + msg: + ramdisk: "{{ ramdisk_df.stdout_lines | default(['not mounted']) }}" + zfs: "{{ zfs_list.stdout_lines | default([]) }}" + zvol_io: "{{ zvol_io.stdout_lines | default([]) }}" + tags: [storage] + + # ------------------------------------------------------------------ + # System resources + # ------------------------------------------------------------------ + - name: Check memory + ansible.builtin.command: + cmd: free -h + register: mem + changed_when: false + tags: [system] + + - name: Check load average + ansible.builtin.command: + cmd: cat /proc/loadavg + register: loadavg + changed_when: false + tags: [system] + + - name: Check swap + ansible.builtin.command: + cmd: swapon --show + register: swap + changed_when: false + failed_when: false + tags: [system] + + - name: Show system resources + ansible.builtin.debug: + msg: + memory: "{{ mem.stdout_lines }}" + load: "{{ loadavg.stdout }}" + swap: "{{ swap.stdout | default('none') }}" + tags: [system] + + # ------------------------------------------------------------------ + # Network / shred throughput + # ------------------------------------------------------------------ + - name: Count shred packets per interface (5 sec sample) + ansible.builtin.shell: + cmd: | + set -o pipefail + for iface in eno1 doublezero0; do + count=$(timeout 5 tcpdump -i "$iface" -nn 'udp dst portrange 9000-10000' -q 2>&1 | grep -oP '\d+(?= packets captured)' || echo 0) + echo "$iface: $count packets/5s" + done + executable: /bin/bash + register: shred_counts + changed_when: false + failed_when: false + tags: [network] + + - name: Check interface throughput + ansible.builtin.shell: + cmd: >- + set -o pipefail && + grep -E 'eno1|doublezero0' /proc/net/dev + | awk '{printf "%s rx=%s tx=%s\n", $1, $2, $10}' + executable: /bin/bash + register: iface_stats + changed_when: false + tags: [network] + + - name: Check gossip/repair port connections + ansible.builtin.shell: + cmd: >- + set -o pipefail && + ss -tupn | grep -E ':8001|:900[0-9]' | head -20 || echo "no connections" + executable: /bin/bash + register: gossip_ports + changed_when: false + tags: [network] + + - name: Check iptables DNAT rule (TVU shred relay) + ansible.builtin.shell: + cmd: >- + set -o pipefail && + iptables -t nat -L PREROUTING -v -n | grep -E '64.92.84.81|20000' || echo "no DNAT rule" + executable: /bin/bash + register: dnat_rule + changed_when: false + tags: [network] + + - name: Show network status + ansible.builtin.debug: + msg: + shred_counts: "{{ shred_counts.stdout_lines | default([]) }}" + interfaces: "{{ iface_stats.stdout_lines | default([]) }}" + gossip_ports: "{{ gossip_ports.stdout_lines | default([]) }}" + tvu_dnat: "{{ dnat_rule.stdout_lines | default([]) }}" + tags: [network] + + # ------------------------------------------------------------------ + # Summary + # ------------------------------------------------------------------ + - name: Health check summary + ansible.builtin.debug: + msg: | + === Biscayne Health Check === + Cluster: {{ kind_cluster }} + Namespace: {{ agave_ns }} + Pod: {{ agave_pod }} + RPC: {{ rpc_health.status | default('unreachable') }} + DZ identity: {{ dz_address.stdout | default('unknown') | trim }} + DZ tunnel: {{ 'UP' if dz_status.rc | default(1) == 0 else 'DOWN' }} + DZ systemd: {{ dz_systemd_state }} + Ramdisk: {{ ramdisk_df.stdout_lines[-1] | default('unknown') }} + Load: {{ loadavg.stdout | default('unknown') }} diff --git a/scripts/check-shred-completeness.sh b/scripts/check-shred-completeness.sh new file mode 100755 index 00000000..e8644c94 --- /dev/null +++ b/scripts/check-shred-completeness.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Check shred completeness at the tip of the blockstore. +# +# Samples the most recent N slots and reports how many are full. +# Use this to determine when enough complete blocks have accumulated +# to safely download a new snapshot that lands within the complete range. +# +# Usage: kubectl exec ... -- bash -c "$(cat check-shred-completeness.sh)" +# Or: ssh biscayne ... 'KUBECONFIG=... kubectl exec ... -- agave-ledger-tool ...' + +set -euo pipefail + +KUBECONFIG="${KUBECONFIG:-/home/rix/.kube/config}" +NS="laconic-laconic-70ce4c4b47e23b85" +DEPLOY="laconic-70ce4c4b47e23b85-deployment" +SAMPLE_SIZE="${1:-200}" + +# Get blockstore bounds +BOUNDS=$(kubectl exec -n "$NS" deployment/"$DEPLOY" -c agave-validator -- \ + agave-ledger-tool -l /data/ledger blockstore bounds 2>&1 | grep "^Ledger") + +HIGHEST=$(echo "$BOUNDS" | grep -oP 'to \K[0-9]+') +START=$((HIGHEST - SAMPLE_SIZE)) + +echo "Blockstore highest slot: $HIGHEST" +echo "Sampling slots $START to $HIGHEST ($SAMPLE_SIZE slots)" +echo "" + +# Get slot metadata +OUTPUT=$(kubectl exec -n "$NS" deployment/"$DEPLOY" -c agave-validator -- \ + agave-ledger-tool -l /data/ledger blockstore print \ + --starting-slot "$START" --ending-slot "$HIGHEST" 2>&1 \ + | grep -E "^Slot|is_full") + +TOTAL=$(echo "$OUTPUT" | grep -c "^Slot" || true) +FULL=$(echo "$OUTPUT" | grep -c "is_full: true" || true) +INCOMPLETE=$(echo "$OUTPUT" | grep -c "is_full: false" || true) + +echo "Total slots with data: $TOTAL / $SAMPLE_SIZE" +echo "Complete (is_full: true): $FULL" +echo "Incomplete (is_full: false): $INCOMPLETE" + +if [ "$TOTAL" -gt 0 ]; then + PCT=$((FULL * 100 / TOTAL)) + echo "Completeness: ${PCT}%" +else + echo "Completeness: N/A (no data)" +fi + +echo "" + +# Find the first full slot counting backward from the tip +# This tells us where the contiguous complete run starts +echo "--- Contiguous complete run from tip ---" + +# Get just the slot numbers and is_full in reverse order +REVERSED=$(echo "$OUTPUT" | paste - - | awk '{ + slot = $2; + full = ($NF == "true") ? 1 : 0; + print slot, full +}' | sort -rn) + +CONTIGUOUS=0 +FIRST_FULL="" +while IFS=' ' read -r slot full; do + if [ "$full" -eq 1 ]; then + CONTIGUOUS=$((CONTIGUOUS + 1)) + FIRST_FULL="$slot" + else + break + fi +done <<< "$REVERSED" + +if [ -n "$FIRST_FULL" ]; then + echo "Contiguous complete slots from tip: $CONTIGUOUS" + echo "Run starts at slot: $FIRST_FULL" + echo "Run ends at slot: $HIGHEST" + echo "" + echo "A snapshot with slot >= $FIRST_FULL would replay from local blockstore." + + # Check against mainnet + MAINNET_SLOT=$(curl -s -X POST -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"getSlot","params":[{"commitment":"finalized"}]}' \ + https://api.mainnet-beta.solana.com | grep -oP '"result":\K[0-9]+') + + GAP=$((MAINNET_SLOT - HIGHEST)) + echo "Mainnet tip: $MAINNET_SLOT (blockstore is $GAP slots behind tip)" + + if [ "$CONTIGUOUS" -gt 100 ]; then + echo "" + echo ">>> READY: $CONTIGUOUS contiguous complete slots. Safe to download a snapshot." + else + echo "" + echo ">>> NOT READY: Only $CONTIGUOUS contiguous complete slots. Wait for more." + fi +else + echo "No contiguous complete run from tip found." +fi diff --git a/scripts/pane-exec.sh b/scripts/pane-exec.sh new file mode 100755 index 00000000..11982038 --- /dev/null +++ b/scripts/pane-exec.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Run a command in a tmux pane and capture its output. +# User sees it streaming in the pane; caller gets stdout back. +# +# Usage: pane-exec.sh +# Example: pane-exec.sh %6565 ansible-playbook -i inventory/switches.yml playbooks/foo.yml + +set -euo pipefail + +PANE="$1" +shift +CMD="$*" + +TMPFILE=$(mktemp /tmp/pane-output.XXXXXX) +MARKER="__PANE_EXEC_DONE_${RANDOM}_$$__" + +cleanup() { + tmux pipe-pane -t "$PANE" 2>/dev/null || true + rm -f "$TMPFILE" +} +trap cleanup EXIT + +# Start capturing pane output +tmux pipe-pane -o -t "$PANE" "cat >> $TMPFILE" + +# Send the command, then echo a marker so we know when it's done +tmux send-keys -t "$PANE" "$CMD; echo $MARKER" Enter + +# Wait for the marker +while ! grep -q "$MARKER" "$TMPFILE" 2>/dev/null; do + sleep 0.5 +done + +# Stop capturing +tmux pipe-pane -t "$PANE" + +# Strip ANSI escape codes, remove the marker line, output the rest +sed 's/\x1b\[[0-9;]*[a-zA-Z]//g; s/\x1b\[[?][0-9]*[a-zA-Z]//g' "$TMPFILE" | grep -v "$MARKER" diff --git a/scripts/scrape-arista-docs.mjs b/scripts/scrape-arista-docs.mjs new file mode 100644 index 00000000..d18119ef --- /dev/null +++ b/scripts/scrape-arista-docs.mjs @@ -0,0 +1,151 @@ +import { chromium } from 'playwright'; +import { writeFileSync, mkdirSync } from 'fs'; +import { join } from 'path'; + +const OUT_DIR = join(import.meta.dirname, '..', 'docs', 'arista-scraped'); +mkdirSync(OUT_DIR, { recursive: true }); + +const pages = [ + { url: 'https://www.arista.com/en/um-eos/eos-static-inter-vrf-route', file: 'static-inter-vrf-route.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-inter-vrf-local-route-leaking', file: 'inter-vrf-local-route-leaking.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-policy-based-routing', file: 'policy-based-routing.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-traffic-management', file: 'traffic-management.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-policy-based-routing-pbr', file: 'pbr.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-configuring-vrf-instances', file: 'configuring-vrf.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-gre-tunnels', file: 'gre-tunnels.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-access-control-lists', file: 'access-control-lists.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-static-routes', file: 'static-routes.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-configuration-sessions', file: 'configuration-sessions.md' }, + { url: 'https://www.arista.com/en/um-eos/eos-checkpoint-and-rollback', file: 'checkpoint-rollback.md' }, + { url: 'https://www.arista.com/en/um-eos', file: '_index.md' }, +]; + +async function scrapePage(page, url, filename) { + console.log(`Scraping: ${url}`); + try { + const resp = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); + console.log(` Status: ${resp.status()}`); + + // Wait for JS to render + await page.waitForTimeout(8000); + + // Check for CAPTCHA + const bodyText = await page.evaluate(() => document.body.innerText.substring(0, 200)); + if (bodyText.includes('CAPTCHA') || bodyText.includes("couldn't load")) { + console.log(` BLOCKED by CAPTCHA/anti-bot on ${filename}`); + writeFileSync(join(OUT_DIR, filename), `# BLOCKED BY CAPTCHA\n\nURL: ${url}\nThe Arista docs site requires CAPTCHA verification for headless browsers.\n`); + return false; + } + + // Extract content + const content = await page.evaluate(() => { + const selectors = [ + '#content', '.article-content', '.content-area', '#main-content', + 'article', '.item-page', '#sp-component', '.com-content-article', + 'main', '#sp-main-body', + ]; + + let el = null; + for (const sel of selectors) { + el = document.querySelector(sel); + if (el && el.textContent.trim().length > 100) break; + } + if (!el) el = document.body; + + function nodeToMd(node) { + if (node.nodeType === Node.TEXT_NODE) return node.textContent; + if (node.nodeType !== Node.ELEMENT_NODE) return ''; + const tag = node.tagName.toLowerCase(); + if (['nav', 'footer', 'script', 'style', 'noscript', 'iframe'].includes(tag)) return ''; + if (node.classList && (node.classList.contains('nav') || node.classList.contains('sidebar') || + node.classList.contains('menu') || node.classList.contains('footer') || + node.classList.contains('header'))) return ''; + let children = Array.from(node.childNodes).map(c => nodeToMd(c)).join(''); + switch (tag) { + case 'h1': return `\n# ${children.trim()}\n\n`; + case 'h2': return `\n## ${children.trim()}\n\n`; + case 'h3': return `\n### ${children.trim()}\n\n`; + case 'h4': return `\n#### ${children.trim()}\n\n`; + case 'p': return `\n${children.trim()}\n\n`; + case 'br': return '\n'; + case 'li': return `- ${children.trim()}\n`; + case 'ul': case 'ol': return `\n${children}\n`; + case 'pre': return `\n\`\`\`\n${children.trim()}\n\`\`\`\n\n`; + case 'code': return `\`${children.trim()}\``; + case 'strong': case 'b': return `**${children.trim()}**`; + case 'em': case 'i': return `*${children.trim()}*`; + case 'table': return `\n${children}\n`; + case 'tr': return `${children}|\n`; + case 'th': case 'td': return `| ${children.trim()} `; + case 'a': { + const href = node.getAttribute('href'); + if (href && !href.startsWith('#') && !href.startsWith('javascript')) + return `[${children.trim()}](${href})`; + return children; + } + default: return children; + } + } + return nodeToMd(el); + }); + + const cleaned = content.replace(/\n{4,}/g, '\n\n\n').replace(/[ \t]+$/gm, '').trim(); + const header = `\n\n\n`; + writeFileSync(join(OUT_DIR, filename), header + cleaned + '\n'); + console.log(` Saved ${filename} (${cleaned.length} chars)`); + return true; + } catch (e) { + console.error(` FAILED: ${e.message}`); + writeFileSync(join(OUT_DIR, filename), `# FAILED TO LOAD\n\nURL: ${url}\nError: ${e.message}\n`); + return false; + } +} + +async function main() { + // Launch with stealth-like settings + const browser = await chromium.launch({ + headless: false, // Use headed mode via Xvfb if available, else new headless + args: [ + '--headless=new', // New headless mode (less detectable) + '--disable-blink-features=AutomationControlled', + '--no-sandbox', + ], + }); + + const context = await browser.newContext({ + userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + locale: 'en-US', + timezoneId: 'America/New_York', + viewport: { width: 1920, height: 1080 }, + }); + + // Remove webdriver property + await context.addInitScript(() => { + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + // Override permissions + const originalQuery = window.navigator.permissions.query; + window.navigator.permissions.query = (parameters) => + parameters.name === 'notifications' + ? Promise.resolve({ state: Notification.permission }) + : originalQuery(parameters); + }); + + const page = await context.newPage(); + + let anySuccess = false; + for (const { url, file } of pages) { + const ok = await scrapePage(page, url, file); + if (ok) anySuccess = true; + // Add delay between requests + await page.waitForTimeout(2000); + } + + if (!anySuccess) { + console.log('\nAll pages blocked by CAPTCHA. Arista docs require human verification.'); + } + + await browser.close(); + console.log('\nDone!'); +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/shred-unwrap.py b/scripts/shred-unwrap.py new file mode 100644 index 00000000..75c19b16 --- /dev/null +++ b/scripts/shred-unwrap.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Strip IP+UDP headers from mirrored packets and forward raw UDP payload.""" +import socket +import sys + +LISTEN_PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 9100 +FORWARD_HOST = sys.argv[2] if len(sys.argv) > 2 else "127.0.0.1" +FORWARD_PORT = int(sys.argv[3]) if len(sys.argv) > 3 else 9000 + +sock_in = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +sock_in.bind(("0.0.0.0", LISTEN_PORT)) + +sock_out = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + +count = 0 +while True: + data, addr = sock_in.recvfrom(65535) + if len(data) < 28: + continue + # IP header: first nibble is version (4), second nibble is IHL (words) + if (data[0] >> 4) != 4: + continue + ihl = (data[0] & 0x0F) * 4 + # Protocol should be UDP (17) + if data[9] != 17: + continue + # Payload starts after IP header + 8-byte UDP header + offset = ihl + 8 + payload = data[offset:] + if payload: + sock_out.sendto(payload, (FORWARD_HOST, FORWARD_PORT)) + count += 1 + if count % 10000 == 0: + print(f"Forwarded {count} shreds", flush=True) diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py new file mode 100755 index 00000000..a8caddfc --- /dev/null +++ b/scripts/snapshot-download.py @@ -0,0 +1,546 @@ +#!/usr/bin/env python3 +"""Download Solana snapshots using aria2c for parallel multi-connection downloads. + +Discovers snapshot sources by querying getClusterNodes for all RPCs in the +cluster, probing each for available snapshots, benchmarking download speed, +and downloading from the fastest source using aria2c (16 connections by default). + +Based on the discovery approach from etcusr/solana-snapshot-finder but replaces +the single-connection wget download with aria2c parallel chunked downloads. + +Usage: + # Download to /srv/solana/snapshots (mainnet, 16 connections) + ./snapshot-download.py -o /srv/solana/snapshots + + # Dry run — find best source, print URL + ./snapshot-download.py --dry-run + + # Custom RPC for cluster node discovery + 32 connections + ./snapshot-download.py -r https://api.mainnet-beta.solana.com -n 32 + + # Testnet + ./snapshot-download.py -c testnet -o /data/snapshots + +Requirements: + - aria2c (apt install aria2) + - python3 >= 3.10 (stdlib only, no pip dependencies) +""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from http.client import HTTPResponse +from pathlib import Path +from typing import NoReturn +from urllib.request import Request + +log: logging.Logger = logging.getLogger("snapshot-download") + +CLUSTER_RPC: dict[str, str] = { + "mainnet-beta": "https://api.mainnet-beta.solana.com", + "testnet": "https://api.testnet.solana.com", + "devnet": "https://api.devnet.solana.com", +} + +# Snapshot filenames: +# snapshot--.tar.zst +# incremental-snapshot---.tar.zst +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) + + +@dataclass +class SnapshotSource: + """A snapshot file available from a specific RPC node.""" + + rpc_address: str + # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) + file_paths: list[str] = field(default_factory=list) + slots_diff: int = 0 + latency_ms: float = 0.0 + download_speed: float = 0.0 # bytes/sec + + +# -- JSON-RPC helpers ---------------------------------------------------------- + + +class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): + """Handler that captures redirect Location instead of following it.""" + + def redirect_request( + self, + req: Request, + fp: HTTPResponse, + code: int, + msg: str, + headers: dict[str, str], # type: ignore[override] + newurl: str, + ) -> None: + return None + + +def rpc_post(url: str, method: str, params: list[object] | None = None, + timeout: int = 25) -> object | None: + """JSON-RPC POST. Returns parsed 'result' field or None on error.""" + payload: bytes = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": method, "params": params or [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data: dict[str, object] = json.loads(resp.read()) + return data.get("result") + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: + log.debug("rpc_post %s %s failed: %s", url, method, e) + return None + + +def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: + """HEAD request without following redirects. + + Returns (Location header value, latency_sec) if the server returned a + 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. + """ + opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) + req = Request(url, method="HEAD") + try: + start: float = time.monotonic() + resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] + latency: float = time.monotonic() - start + # Non-redirect (2xx) — server didn't redirect, not useful for discovery + location: str | None = resp.headers.get("Location") + resp.close() + return location, latency + except urllib.error.HTTPError as e: + # 3xx redirects raise HTTPError with the redirect info + latency = time.monotonic() - start # type: ignore[possibly-undefined] + location = e.headers.get("Location") + if location and 300 <= e.code < 400: + return location, latency + return None, 0.0 + except (urllib.error.URLError, OSError, TimeoutError): + return None, 0.0 + + +# -- Discovery ----------------------------------------------------------------- + + +def get_current_slot(rpc_url: str) -> int | None: + """Get current slot from RPC.""" + result: object | None = rpc_post(rpc_url, "getSlot") + if isinstance(result, int): + return result + return None + + +def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: + """Get all RPC node addresses from getClusterNodes.""" + result: object | None = rpc_post(rpc_url, "getClusterNodes") + if not isinstance(result, list): + return [] + + rpc_addrs: list[str] = [] + for node in result: + if not isinstance(node, dict): + continue + if version_filter is not None: + node_version: str | None = node.get("version") + if node_version and not node_version.startswith(version_filter): + continue + rpc: str | None = node.get("rpc") + if rpc: + rpc_addrs.append(rpc) + return list(set(rpc_addrs)) + + +def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: + """Extract filename and full redirect path from Location header. + + Returns (filename, full_path). full_path includes any path prefix + the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). + """ + # Location may be absolute URL or relative path + if location.startswith("http://") or location.startswith("https://"): + # Absolute URL — extract path + from urllib.parse import urlparse + path: str = urlparse(location).path + else: + path = location + + filename: str = path.rsplit("/", 1)[-1] + return filename, path + + +def probe_rpc_snapshot( + rpc_address: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, +) -> SnapshotSource | None: + """Probe a single RPC node for available snapshots. + + Probes for full snapshot first (required), then incremental. Records all + available files. Which files to actually download is decided at download + time based on what already exists locally — not here. + + Based on the discovery approach from etcusr/solana-snapshot-finder. + """ + full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" + + # Full snapshot is required — every source must have one + full_location, full_latency = head_no_follow(full_url, timeout=2) + if not full_location: + return None + + latency_ms: float = full_latency * 1000 + if latency_ms > max_latency_ms: + return None + + full_filename, full_path = _parse_snapshot_filename(full_location) + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if not fm: + return None + + full_snap_slot: int = int(fm.group(1)) + slots_diff: int = current_slot - full_snap_slot + + if slots_diff > max_age_slots or slots_diff < -100: + return None + + file_paths: list[str] = [full_path] + + # Also check for incremental snapshot + inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if inc_location: + inc_filename, inc_path = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) + if m: + inc_base_slot: int = int(m.group(1)) + # Incremental must be based on this source's full snapshot + if inc_base_slot == full_snap_slot: + file_paths.append(inc_path) + + return SnapshotSource( + rpc_address=rpc_address, + file_paths=file_paths, + slots_diff=slots_diff, + latency_ms=latency_ms, + ) + + +def discover_sources( + rpc_url: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, + threads: int, + version_filter: str | None, +) -> list[SnapshotSource]: + """Discover all snapshot sources from the cluster.""" + rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) + if not rpc_nodes: + log.error("No RPC nodes found via getClusterNodes") + return [] + + log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) + + sources: list[SnapshotSource] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: + futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { + pool.submit( + probe_rpc_snapshot, addr, current_slot, + max_age_slots, max_latency_ms, + ): addr + for addr in rpc_nodes + } + done: int = 0 + for future in concurrent.futures.as_completed(futures): + done += 1 + if done % 200 == 0: + log.info(" probed %d/%d nodes, %d sources found", + done, len(rpc_nodes), len(sources)) + try: + result: SnapshotSource | None = future.result() + except (urllib.error.URLError, OSError, TimeoutError) as e: + log.debug("Probe failed for %s: %s", futures[future], e) + continue + if result: + sources.append(result) + + log.info("Found %d RPC nodes with suitable snapshots", len(sources)) + return sources + + +# -- Speed benchmark ----------------------------------------------------------- + + +def measure_speed(rpc_address: str, measure_time: int = 7) -> float: + """Measure download speed from an RPC node. Returns bytes/sec.""" + url: str = f"http://{rpc_address}/snapshot.tar.bz2" + req = Request(url) + try: + with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: + start: float = time.monotonic() + total: int = 0 + while True: + elapsed: float = time.monotonic() - start + if elapsed >= measure_time: + break + chunk: bytes = resp.read(81920) + if not chunk: + break + total += len(chunk) + elapsed = time.monotonic() - start + if elapsed <= 0: + return 0.0 + return total / elapsed + except (urllib.error.URLError, OSError, TimeoutError): + return 0.0 + + +# -- Download ------------------------------------------------------------------ + + +def download_aria2c( + urls: list[str], + output_dir: str, + filename: str, + connections: int = 16, +) -> bool: + """Download a file using aria2c with parallel connections. + + When multiple URLs are provided, aria2c treats them as mirrors of the + same file and distributes chunks across all of them. + """ + num_mirrors: int = len(urls) + total_splits: int = max(connections, connections * num_mirrors) + cmd: list[str] = [ + "aria2c", + "--file-allocation=none", + "--continue=true", + f"--max-connection-per-server={connections}", + f"--split={total_splits}", + "--min-split-size=50M", + # aria2c retries individual chunk connections on transient network + # errors (TCP reset, timeout). This is transport-level retry analogous + # to TCP retransmit, not application-level retry of a failed operation. + "--max-tries=5", + "--retry-wait=5", + "--timeout=60", + "--connect-timeout=10", + "--summary-interval=10", + "--console-log-level=notice", + f"--dir={output_dir}", + f"--out={filename}", + "--auto-file-renaming=false", + "--allow-overwrite=true", + *urls, + ] + + log.info("Downloading %s", filename) + log.info(" aria2c: %d connections × %d mirrors (%d splits)", + connections, num_mirrors, total_splits) + + start: float = time.monotonic() + result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) + elapsed: float = time.monotonic() - start + + if result.returncode != 0: + log.error("aria2c failed with exit code %d", result.returncode) + return False + + filepath: Path = Path(output_dir) / filename + if not filepath.exists(): + log.error("aria2c reported success but %s does not exist", filepath) + return False + + size_bytes: int = filepath.stat().st_size + size_gb: float = size_bytes / (1024 ** 3) + avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 + log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) + return True + + +# -- Main ---------------------------------------------------------------------- + + +def main() -> int: + p: argparse.ArgumentParser = argparse.ArgumentParser( + description="Download Solana snapshots with aria2c parallel downloads", + ) + p.add_argument("-o", "--output", default="/srv/solana/snapshots", + help="Snapshot output directory (default: /srv/solana/snapshots)") + p.add_argument("-c", "--cluster", default="mainnet-beta", + choices=list(CLUSTER_RPC), + help="Solana cluster (default: mainnet-beta)") + p.add_argument("-r", "--rpc", default=None, + help="RPC URL for cluster discovery (default: public RPC)") + p.add_argument("-n", "--connections", type=int, default=16, + help="aria2c connections per download (default: 16)") + p.add_argument("-t", "--threads", type=int, default=500, + help="Threads for parallel RPC probing (default: 500)") + p.add_argument("--max-snapshot-age", type=int, default=1300, + help="Max snapshot age in slots (default: 1300)") + p.add_argument("--max-latency", type=float, default=100, + help="Max RPC probe latency in ms (default: 100)") + p.add_argument("--min-download-speed", type=int, default=20, + help="Min download speed in MiB/s (default: 20)") + p.add_argument("--measurement-time", type=int, default=7, + help="Speed measurement duration in seconds (default: 7)") + p.add_argument("--max-speed-checks", type=int, default=15, + help="Max nodes to benchmark before giving up (default: 15)") + p.add_argument("--version", default=None, + help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--full-only", action="store_true", + help="Download only full snapshot, skip incremental") + p.add_argument("--dry-run", action="store_true", + help="Find best source and print URL, don't download") + p.add_argument("-v", "--verbose", action="store_true") + args: argparse.Namespace = p.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + ) + + rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] + + # aria2c is required for actual downloads (not dry-run) + if not args.dry_run and not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return 1 + + # Get current slot + log.info("Cluster: %s | RPC: %s", args.cluster, rpc_url) + current_slot: int | None = get_current_slot(rpc_url) + if current_slot is None: + log.error("Cannot get current slot from %s", rpc_url) + return 1 + log.info("Current slot: %d", current_slot) + + # Discover sources + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=args.max_snapshot_age, + max_latency_ms=args.max_latency, + threads=args.threads, + version_filter=args.version, + ) + if not sources: + log.error("No snapshot sources found") + return 1 + + # Sort by latency (lowest first) for speed benchmarking + sources.sort(key=lambda s: s.latency_ms) + + # Benchmark top candidates — all speeds in MiB/s (binary, 1 MiB = 1048576 bytes) + log.info("Benchmarking download speed on top %d sources...", args.max_speed_checks) + fast_sources: list[SnapshotSource] = [] + checked: int = 0 + min_speed_bytes: int = args.min_download_speed * 1024 * 1024 # MiB to bytes + + for source in sources: + if checked >= args.max_speed_checks: + break + checked += 1 + + speed: float = measure_speed(source.rpc_address, args.measurement_time) + source.download_speed = speed + speed_mib: float = speed / (1024 ** 2) + + if speed < min_speed_bytes: + log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", + source.rpc_address, speed_mib, args.min_download_speed) + continue + + log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", + source.rpc_address, speed_mib, + source.latency_ms, source.slots_diff) + fast_sources.append(source) + + if not fast_sources: + log.error("No source met minimum speed requirement (%d MiB/s)", + args.min_download_speed) + log.info("Try: --min-download-speed 10") + return 1 + + # Use the fastest source as primary, collect mirrors for each file + best: SnapshotSource = fast_sources[0] + file_paths: list[str] = best.file_paths + if args.full_only: + file_paths = [fp for fp in file_paths + if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + + # Build mirror URL lists: for each file, collect URLs from all fast sources + # that serve the same filename + download_plan: list[tuple[str, list[str]]] = [] + for fp in file_paths: + filename: str = fp.rsplit("/", 1)[-1] + mirror_urls: list[str] = [f"http://{best.rpc_address}{fp}"] + for other in fast_sources[1:]: + for other_fp in other.file_paths: + if other_fp.rsplit("/", 1)[-1] == filename: + mirror_urls.append(f"http://{other.rpc_address}{other_fp}") + break + download_plan.append((filename, mirror_urls)) + + speed_mib: float = best.download_speed / (1024 ** 2) + log.info("Best source: %s (%.1f MiB/s), %d mirrors total", + best.rpc_address, speed_mib, len(fast_sources)) + for filename, mirror_urls in download_plan: + log.info(" %s (%d mirrors)", filename, len(mirror_urls)) + for url in mirror_urls: + log.info(" %s", url) + + if args.dry_run: + for _, mirror_urls in download_plan: + for url in mirror_urls: + print(url) + return 0 + + # Download — skip files that already exist locally + os.makedirs(args.output, exist_ok=True) + total_start: float = time.monotonic() + + for filename, mirror_urls in download_plan: + filepath: Path = Path(args.output) / filename + if filepath.exists() and filepath.stat().st_size > 0: + log.info("Skipping %s (already exists: %.1f GB)", + filename, filepath.stat().st_size / (1024 ** 3)) + continue + if not download_aria2c(mirror_urls, args.output, filename, args.connections): + log.error("Failed to download %s", filename) + return 1 + + total_elapsed: float = time.monotonic() - total_start + log.info("All downloads complete in %.0fs", total_elapsed) + for filename, _ in download_plan: + fp: Path = Path(args.output) / filename + if fp.exists(): + log.info(" %s (%.1f GB)", fp.name, fp.stat().st_size / (1024 ** 3)) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 742e84e3b08b9329e4e70b7a9c0abcb765d21744 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 01:47:58 +0000 Subject: [PATCH 04/62] feat: dedicated GRE tunnel (Tunnel100) bypassing DZ-managed Tunnel500 Root cause: the doublezero-agent on mia-sw01 manages Tunnel500's ACL (SEC-USER-500-IN) and drops outbound gossip with src 137.239.194.65. The agent overwrites any custom ACL entries. Fix: create a separate GRE tunnel (Tunnel100) using mia-sw01's free LAN IP (209.42.167.137) as tunnel source. This tunnel goes over the ISP uplink, completely independent of the DZ overlay: - mia-sw01: Tunnel100 src 209.42.167.137, dst 186.233.184.235 - biscayne: gre-ashburn src 186.233.184.235, dst 209.42.167.137 - Link addresses: 169.254.100.0/31 Playbook changes: - ashburn-relay-mia-sw01: Tunnel100 + Loopback101 + SEC-VALIDATOR-100-IN - ashburn-relay-biscayne: gre-ashburn tunnel + updated policy routing - New template: ashburn-routing-ifup.sh.j2 for boot persistence Co-Authored-By: Claude Opus 4.6 --- docs/bug-ashburn-tunnel-port-filtering.md | 104 ++++++-------- playbooks/ashburn-relay-biscayne.yml | 137 ++++++++++++++----- playbooks/ashburn-relay-mia-sw01.yml | 150 +++++++++++++-------- playbooks/files/ashburn-routing-ifup.sh.j2 | 28 ++++ 4 files changed, 261 insertions(+), 158 deletions(-) create mode 100644 playbooks/files/ashburn-routing-ifup.sh.j2 diff --git a/docs/bug-ashburn-tunnel-port-filtering.md b/docs/bug-ashburn-tunnel-port-filtering.md index 865e3b93..913a610b 100644 --- a/docs/bug-ashburn-tunnel-port-filtering.md +++ b/docs/bug-ashburn-tunnel-port-filtering.md @@ -1,85 +1,61 @@ -# Bug: Ashburn Relay — 137.239.194.65 Not Routable from Public Internet +# Bug: Ashburn Relay — Outbound Gossip Dropped by DZ Agent ACL ## Summary `--gossip-host 137.239.194.65` correctly advertises the Ashburn relay IP in -ContactInfo for all sockets (gossip, TVU, repair, TPU). However, 137.239.194.65 -is a DoubleZero overlay IP (137.239.192.0/19, IS-IS only) that is NOT announced -via BGP to the public internet. Public peers cannot route to it, so TVU shreds, -repair requests, and TPU traffic never arrive at was-sw01. +ContactInfo for all sockets (gossip, TVU, repair, TPU). The inbound path +works end-to-end (proven with kelce UDP tests through every hop). However, +outbound gossip from biscayne (src 137.239.194.65) is dropped by the +DoubleZero agent's ACL on mia-sw01's Tunnel500, preventing ContactInfo from +propagating to the cluster. Peers never learn our TVU address. ## Evidence -- Gossip traffic arrives on `doublezero0` interface: +- Inbound path confirmed hop by hop (kelce → was-sw01 → mia-sw01 → Tunnel500 + → biscayne doublezero0 → DNAT → kind bridge → kind node eth0): ``` - doublezero0 In IP 64.130.58.70.8001 > 137.239.194.65.8001: UDP, length 132 + 01:04:12.136633 IP 69.112.108.72.58856 > 172.20.0.2.9000: UDP, length 13 ``` -- Zero TVU/repair traffic arrives: +- Outbound gossip leaves biscayne correctly (src 137.239.194.65:8001 on + doublezero0), enters mia-sw01 via Tunnel500, hits SEC-USER-500-IN ACL: ``` - tcpdump -i doublezero0 'dst host 137.239.194.65 and udp and not port 8001' - 0 packets captured + 60 deny ip any any [match 26355968 packets, 0:00:02 ago] ``` -- ContactInfo correctly advertises all sockets on 137.239.194.65: - ```json - { - "gossip": "137.239.194.65:8001", - "tvu": "137.239.194.65:9000", - "serveRepair": "137.239.194.65:9011", - "tpu": "137.239.194.65:9002" - } - ``` -- Outbound gossip from biscayne exits via `doublezero0` with source - 137.239.194.65 — SNAT and routing work correctly in the outbound direction. + The ACL only permits src 186.233.184.235 and 169.254.7.7 — not 137.239.194.65. +- Validator not visible in public RPC getClusterNodes (gossip not propagating) +- Validator sees 775 nodes vs 5,045 on public RPC ## Root Cause -**137.239.194.0/24 is not routable from the public internet.** The prefix -belongs to DoubleZero's overlay address space (137.239.192.0/19, Momentum -Telecom, WHOIS OriginAS: empty). It is advertised only via IS-IS within the -DoubleZero switch mesh. There is no eBGP session on was-sw01 to advertise it -to the ISP — all BGP peers are iBGP AS 65342 (DoubleZero internal). +The `doublezero-agent` daemon on mia-sw01 manages Tunnel500 and its ACL +(SEC-USER-500-IN). The agent periodically reconciles the ACL to its expected +state, overwriting any custom entries we add. We cannot modify the ACL +without the agent reverting it. -When the validator advertises `tvu: 137.239.194.65:9000` in ContactInfo, -public internet peers attempt to send turbine shreds to that IP, but the -packets have no route through the global BGP table to reach was-sw01. Only -DoubleZero-connected peers could potentially reach it via the overlay. +137.239.194.65 is from the was-sw01 LAN block (137.239.194.64/29), routed +by the ISP to was-sw01 via the WAN link. It IS publicly routable (confirmed +by kelce ping/UDP tests). The earlier hypothesis that it was unroutable was +wrong — the IP reaches was-sw01, gets forwarded to mia-sw01 via backbone, +and reaches biscayne through Tunnel500 (inbound ACL direction is fine). -The old shred relay pipeline worked because it used `--public-tvu-address -64.92.84.81:20000` — was-sw01's Et1/1 ISP uplink IP, which IS publicly -routable. The `--gossip-host 137.239.194.65` approach advertises a -DoubleZero-only IP for ALL sockets, making TVU/repair/TPU unreachable from -non-DoubleZero peers. +The problem is outbound only: the Tunnel500 ingress ACL (traffic FROM +biscayne TO mia-sw01) drops src 137.239.194.65. -The original hypothesis (ACL/PBR port filtering) was wrong. The tunnel and -switch routing work correctly — the problem is upstream: traffic never arrives -at was-sw01 in the first place. +## Fix -## Impact +Create a dedicated GRE tunnel (Tunnel100) between biscayne and mia-sw01 +that bypasses the DZ-managed Tunnel500 entirely: -The validator cannot receive turbine shreds or serve repair requests via the -low-latency Ashburn path. It falls back to the Miami public IP (186.233.184.235) -for all shred/repair traffic, negating the benefit of `--gossip-host`. +- **mia-sw01 Tunnel100**: src 209.42.167.137 (free LAN IP), dst 186.233.184.235 + (biscayne), link 169.254.100.0/31, ACL SEC-VALIDATOR-100-IN (we control) +- **biscayne gre-ashburn**: src 186.233.184.235, dst 209.42.167.137, + link 169.254.100.1/31 -## Fix Options +Traffic flow unchanged except the tunnel: +- Inbound: was-sw01 → backbone → mia-sw01 → Tunnel100 → biscayne → DNAT → agave +- Outbound: agave → SNAT 137.239.194.65 → Tunnel100 → mia-sw01 → backbone → was-sw01 -1. **Use 64.92.84.81 (was-sw01 Et1/1) for ContactInfo sockets.** This is the - publicly routable Ashburn IP. Requires `--gossip-host 64.92.84.81` (or - equivalent `--bind-address` config) and DNAT/forwarding on was-sw01 to relay - traffic through the backbone → mia-sw01 → Tunnel500 → biscayne. The old - `--public-tvu-address` pipeline used this IP successfully. - -2. **Get DoubleZero to announce 137.239.194.0/24 via eBGP to the ISP.** This - would make the current `--gossip-host 137.239.194.65` setup work, but - requires coordination with DoubleZero operations. - -3. **Hybrid approach**: Use 64.92.84.81 for public-facing sockets (TVU, repair, - TPU) and 137.239.194.65 for gossip (which works via DoubleZero overlay). - Requires agave to support per-protocol address binding, which it does not - (`--gossip-host` sets ALL sockets to the same IP). - -## Previous Workaround - -The old `--public-tvu-address` pipeline used socat + shred-unwrap.py to relay -shreds from 64.92.84.81:20000 to the validator. That pipeline is not persistent -across reboots and was superseded by the `--gossip-host` approach (which turned -out to be broken for non-DoubleZero peers). +See: +- `playbooks/ashburn-relay-mia-sw01.yml` (Tunnel100 + ACL + routes) +- `playbooks/ashburn-relay-biscayne.yml` (gre-ashburn + DNAT + SNAT + policy routing) +- `playbooks/ashburn-relay-was-sw01.yml` (static route, unchanged) diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index 09e0ff74..a762a878 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -2,7 +2,12 @@ # Configure biscayne for Ashburn validator relay # # Sets up inbound DNAT (137.239.194.65 → kind node) and outbound SNAT + -# policy routing (validator traffic → doublezero0 → mia-sw01 → was-sw01). +# policy routing (validator traffic → GRE tunnel → mia-sw01 → was-sw01). +# +# Uses a dedicated GRE tunnel to mia-sw01 (NOT the DoubleZero-managed +# doublezero0/Tunnel500). The tunnel source is biscayne's public IP +# (186.233.184.235) and the destination is mia-sw01's free LAN IP +# (209.42.167.137). # # Usage: # # Full setup (inbound + outbound) @@ -28,8 +33,12 @@ ashburn_ip: 137.239.194.65 kind_node_ip: 172.20.0.2 kind_network: 172.20.0.0/16 - tunnel_gateway: 169.254.7.6 - tunnel_device: doublezero0 + # New dedicated GRE tunnel (not DZ-managed doublezero0) + tunnel_device: gre-ashburn + tunnel_local_ip: 169.254.100.1 # biscayne end of /31 + tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31 + tunnel_src: 186.233.184.235 # biscayne public IP + tunnel_dst: 209.42.167.137 # mia-sw01 free LAN IP fwmark: 100 rt_table_name: ashburn rt_table_id: 100 @@ -49,6 +58,15 @@ ansible.builtin.command: cmd: ip addr del {{ ashburn_ip }}/32 dev lo failed_when: false + changed_when: false + + - name: Remove GRE tunnel + ansible.builtin.shell: + cmd: | + ip link set {{ tunnel_device }} down 2>/dev/null || true + ip tunnel del {{ tunnel_device }} 2>/dev/null || true + executable: /bin/bash + changed_when: false - name: Remove inbound DNAT rules ansible.builtin.shell: @@ -58,6 +76,7 @@ iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true executable: /bin/bash + changed_when: false - name: Remove outbound mangle rules ansible.builtin.shell: @@ -67,11 +86,13 @@ iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true executable: /bin/bash + changed_when: false - name: Remove outbound SNAT rule ansible.builtin.shell: cmd: iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true executable: /bin/bash + changed_when: false - name: Remove policy routing ansible.builtin.shell: @@ -79,10 +100,12 @@ ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true ip route del default table {{ rt_table_name }} 2>/dev/null || true executable: /bin/bash + changed_when: false - name: Persist cleaned iptables ansible.builtin.command: cmd: netfilter-persistent save + changed_when: true - name: Remove if-up.d script ansible.builtin.file: @@ -91,7 +114,7 @@ - name: Rollback complete ansible.builtin.debug: - msg: "Ashburn relay rules removed. Old SHRED-RELAY DNAT (64.92.84.81:20000) is still in place." + msg: "Ashburn relay rules removed." - name: End play after rollback ansible.builtin.meta: end_play @@ -99,13 +122,13 @@ # ------------------------------------------------------------------ # Pre-flight checks # ------------------------------------------------------------------ - - name: Check doublezero0 tunnel is up + - name: Check tunnel destination is reachable ansible.builtin.command: - cmd: ip link show {{ tunnel_device }} - register: tunnel_status + cmd: ping -c 1 -W 2 {{ tunnel_dst }} + register: tunnel_dst_ping changed_when: false - failed_when: "'UP' not in tunnel_status.stdout" - tags: [preflight, inbound, outbound] + failed_when: tunnel_dst_ping.rc != 0 + tags: [preflight, outbound] - name: Check kind node is reachable ansible.builtin.command: @@ -115,23 +138,6 @@ failed_when: kind_ping.rc != 0 tags: [preflight, inbound] - - name: Verify Docker preserves source ports (5 sec sample) - ansible.builtin.shell: - cmd: | - set -o pipefail - # Check if any validator traffic is flowing with original sport - timeout 5 tcpdump -i br-cf46a62ab5b2 -nn -c 5 'udp src port 8001 or udp src portrange 9000-9025' 2>&1 | tail -5 || echo "No validator traffic captured in 5s (validator may not be running)" - executable: /bin/bash - register: sport_check - changed_when: false - failed_when: false - tags: [preflight] - - - name: Show sport preservation check - ansible.builtin.debug: - var: sport_check.stdout_lines - tags: [preflight] - - name: Show existing iptables nat rules ansible.builtin.shell: cmd: iptables -t nat -L -v -n --line-numbers | head -60 @@ -145,6 +151,44 @@ var: existing_nat.stdout_lines tags: [preflight] + - name: Check for existing GRE tunnel + ansible.builtin.shell: + cmd: ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel does not exist" + executable: /bin/bash + register: existing_tunnel + changed_when: false + tags: [preflight] + + - name: Display existing tunnel + ansible.builtin.debug: + var: existing_tunnel.stdout_lines + tags: [preflight] + + # ------------------------------------------------------------------ + # GRE tunnel setup + # ------------------------------------------------------------------ + - name: Create GRE tunnel + ansible.builtin.shell: + cmd: | + set -o pipefail + if ip tunnel show {{ tunnel_device }} 2>/dev/null; then + echo "tunnel already exists" + else + ip tunnel add {{ tunnel_device }} mode gre local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64 + ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }} + ip link set {{ tunnel_device }} up mtu 8972 + echo "tunnel created" + fi + executable: /bin/bash + register: tunnel_result + changed_when: "'created' in tunnel_result.stdout" + tags: [outbound] + + - name: Show tunnel result + ansible.builtin.debug: + var: tunnel_result.stdout_lines + tags: [outbound] + # ------------------------------------------------------------------ # Inbound: DNAT for 137.239.194.65 → kind node # ------------------------------------------------------------------ @@ -186,7 +230,7 @@ tags: [inbound] # ------------------------------------------------------------------ - # Outbound: fwmark + SNAT + policy routing + # Outbound: fwmark + SNAT + policy routing via new tunnel # ------------------------------------------------------------------ - name: Mark outbound validator traffic (mangle PREROUTING) ansible.builtin.shell: @@ -218,7 +262,6 @@ ansible.builtin.shell: cmd: | set -o pipefail - # Check if rule already exists if iptables -t nat -C POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null; then echo "SNAT rule already exists" else @@ -256,9 +299,9 @@ changed_when: "'added' in rule_result.stdout" tags: [outbound] - - name: Add default route via doublezero0 in ashburn table + - name: Add default route via GRE tunnel in ashburn table ansible.builtin.shell: - cmd: ip route replace default via {{ tunnel_gateway }} dev {{ tunnel_device }} table {{ rt_table_name }} + cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }} executable: /bin/bash changed_when: true tags: [outbound] @@ -269,11 +312,12 @@ - name: Save iptables rules ansible.builtin.command: cmd: netfilter-persistent save + changed_when: true tags: [inbound, outbound] - name: Install if-up.d persistence script - ansible.builtin.copy: - src: files/ashburn-routing-ifup.sh + ansible.builtin.template: + src: files/ashburn-routing-ifup.sh.j2 dest: /etc/network/if-up.d/ashburn-routing mode: '0755' owner: root @@ -283,6 +327,22 @@ # ------------------------------------------------------------------ # Verification # ------------------------------------------------------------------ + - name: Show tunnel status + ansible.builtin.shell: + cmd: | + echo "=== tunnel ===" + ip tunnel show {{ tunnel_device }} + echo "" + echo "=== tunnel addr ===" + ip addr show {{ tunnel_device }} + echo "" + echo "=== ping tunnel peer ===" + ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable" + executable: /bin/bash + register: tunnel_status + changed_when: false + tags: [outbound] + - name: Show NAT rules ansible.builtin.shell: cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 @@ -323,6 +383,7 @@ - name: Display verification ansible.builtin.debug: msg: + tunnel: "{{ tunnel_status.stdout_lines | default([]) }}" nat_rules: "{{ nat_rules.stdout_lines }}" mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}" routing: "{{ routing_info.stdout_lines | default([]) }}" @@ -334,12 +395,14 @@ msg: | === Ashburn Relay Setup Complete === Ashburn IP: {{ ashburn_ip }} (on lo) + GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }}) + link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31 Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} Outbound SNAT: {{ kind_network }} sport 8001,9000-9025 → {{ ashburn_ip }} - Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_gateway }} dev {{ tunnel_device }} - Persisted: iptables-persistent + /etc/network/if-up.d/ashburn-routing + Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }} dev {{ tunnel_device }} Next steps: - 1. Verify inbound: ping {{ ashburn_ip }} from external host - 2. Verify outbound: tcpdump on was-sw01 for src {{ ashburn_ip }} - 3. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses + 1. Apply mia-sw01 config (Tunnel100 must be up on both sides) + 2. Verify tunnel: ping {{ tunnel_remote_ip }} + 3. Test from kelce: echo test | nc -u -w 1 137.239.194.65 9000 + 4. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml index 76e08082..3cdd1aca 100644 --- a/playbooks/ashburn-relay-mia-sw01.yml +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -1,22 +1,18 @@ --- -# Configure laconic-mia-sw01 for validator traffic relay (inbound + outbound) +# Configure laconic-mia-sw01 for validator traffic relay via dedicated GRE tunnel # -# Outbound: Redirects outbound traffic from biscayne (src 137.239.194.65) -# arriving via the doublezero0 GRE tunnel to was-sw01 via the backbone, -# preventing BCP38 drops at mia-sw01's ISP uplink. +# Creates a NEW GRE tunnel (Tunnel100) separate from the DoubleZero-managed +# Tunnel500. The DZ agent controls Tunnel500's ACL (SEC-USER-500-IN) and +# overwrites any custom entries, so we cannot use it for validator traffic +# with src 137.239.194.65. # -# Inbound: Routes traffic destined to 137.239.194.65 from the default VRF -# to biscayne via Tunnel500 in vrf1. Without this, mia-sw01 sends -# 137.239.194.65 out the ISP uplink back to was-sw01 (routing loop). +# Tunnel100 uses mia-sw01's free LAN IP (209.42.167.137) as the tunnel +# source, and biscayne's public IP (186.233.184.235) as the destination. +# This tunnel carries traffic over the ISP uplink, completely independent +# of the DoubleZero overlay. # -# Approach: The existing per-tunnel ACL (SEC-USER-500-IN) controls what -# traffic enters vrf1 from Tunnel500. We add 137.239.194.65 to the ACL -# and add a default route in vrf1 via egress-vrf default pointing to -# was-sw01's backbone IP. For inbound, an inter-VRF static route in the -# default VRF forwards 137.239.194.65/32 to biscayne via Tunnel500. -# -# The other vrf1 tunnels (502, 504, 505) have their own ACLs that only -# permit their specific source IPs, so the default route won't affect them. +# Inbound: was-sw01 → backbone Et4/1 → mia-sw01 → Tunnel100 → biscayne +# Outbound: biscayne → Tunnel100 → mia-sw01 → backbone Et4/1 → was-sw01 # # Usage: # # Pre-flight checks only (safe, read-only) @@ -32,22 +28,28 @@ # # Rollback # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true -- name: Configure mia-sw01 outbound validator redirect +- name: Configure mia-sw01 validator relay tunnel hosts: mia-sw01 gather_facts: false vars: ashburn_ip: 137.239.194.65 + biscayne_ip: 186.233.184.235 apply: false commit: false rollback: false - tunnel_interface: Tunnel500 - tunnel_vrf: vrf1 - tunnel_acl: SEC-USER-500-IN - tunnel_nexthop: 169.254.7.7 # biscayne's end of the Tunnel500 /31 + # New tunnel — not managed by DZ agent + tunnel_interface: Tunnel100 + tunnel_source_ip: 209.42.167.137 # mia-sw01 free LAN IP + tunnel_local: 169.254.100.0 # /31 link, mia-sw01 side + tunnel_remote: 169.254.100.1 # /31 link, biscayne side + tunnel_acl: SEC-VALIDATOR-100-IN + # Loopback for tunnel source (so it's always up) + tunnel_source_lo: Loopback101 backbone_interface: Ethernet4/1 - session_name: validator-outbound - checkpoint_name: pre-validator-outbound + backbone_peer: 172.16.1.188 # was-sw01 backbone IP + session_name: validator-tunnel + checkpoint_name: pre-validator-tunnel tasks: # ------------------------------------------------------------------ @@ -93,43 +95,52 @@ # ------------------------------------------------------------------ # Pre-flight checks (always run unless commit/rollback) # ------------------------------------------------------------------ - - name: Show tunnel interface config + - name: Check existing tunnel interfaces + arista.eos.eos_command: + commands: + - show ip interface brief | include Tunnel + register: existing_tunnels + tags: [preflight] + + - name: Display existing tunnels + ansible.builtin.debug: + var: existing_tunnels.stdout_lines + tags: [preflight] + + - name: Check if Tunnel100 already exists arista.eos.eos_command: commands: - "show running-config interfaces {{ tunnel_interface }}" register: tunnel_config tags: [preflight] - - name: Display tunnel config + - name: Display Tunnel100 config ansible.builtin.debug: var: tunnel_config.stdout_lines tags: [preflight] - - name: Show tunnel ACL + - name: Check if Loopback101 already exists arista.eos.eos_command: commands: - - "show running-config | section ip access-list {{ tunnel_acl }}" - register: acl_config + - "show running-config interfaces {{ tunnel_source_lo }}" + register: lo_config tags: [preflight] - - name: Display tunnel ACL + - name: Display Loopback101 config ansible.builtin.debug: - var: acl_config.stdout_lines + var: lo_config.stdout_lines tags: [preflight] - - name: Check VRF routing + - name: Check route for ashburn IP arista.eos.eos_command: commands: - - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" - - "show ip route vrf {{ tunnel_vrf }} {{ backbone_peer }}" - - "show ip route {{ backbone_peer }}" - "show ip route {{ ashburn_ip }}" - register: vrf_routing + register: route_check tags: [preflight] - - name: Display VRF routing check + - name: Display route check ansible.builtin.debug: - var: vrf_routing.stdout_lines + var: route_check.stdout_lines tags: [preflight] - name: Pre-flight summary @@ -138,9 +149,17 @@ msg: | === Pre-flight complete === Review the output above: - 1. {{ tunnel_interface }} ACL ({{ tunnel_acl }}): does it permit src {{ ashburn_ip }}? - 2. {{ tunnel_vrf }} default route: does one exist? - 3. Backbone nexthop {{ backbone_peer }}: reachable in default VRF? + 1. Does {{ tunnel_interface }} already exist? + 2. Does {{ tunnel_source_lo }} already exist? + 3. Current route for {{ ashburn_ip }} + + Planned config: + - {{ tunnel_source_lo }}: {{ tunnel_source_ip }}/32 + - {{ tunnel_interface }}: GRE src {{ tunnel_source_ip }} dst {{ biscayne_ip }} + link address {{ tunnel_local }}/31 + ACL {{ tunnel_acl }}: permit src {{ ashburn_ip }}, permit src {{ tunnel_remote }} + - Route: {{ ashburn_ip }}/32 via {{ tunnel_remote }} + - Outbound default for tunnel traffic: 0.0.0.0/0 via {{ backbone_interface }} {{ backbone_peer }} To apply config: ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ @@ -163,18 +182,33 @@ arista.eos.eos_command: commands: - command: "configure session {{ session_name }}" - # Permit Ashburn IP through the tunnel ACL (insert before deny) - - command: "ip access-list {{ tunnel_acl }}" - - command: "45 permit ip host {{ ashburn_ip }} any" + # Loopback for tunnel source (always-up interface) + - command: "interface {{ tunnel_source_lo }}" + - command: "ip address {{ tunnel_source_ip }}/32" - command: exit - # Default route in vrf1 via backbone to was-sw01 (egress-vrf default) - # Safe because per-tunnel ACLs already restrict what enters vrf1 - - command: "ip route vrf {{ tunnel_vrf }} 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }}" - # Inbound: route traffic for ashburn IP from default VRF to biscayne via tunnel. - # Without this, mia-sw01 sends 137.239.194.65 out the ISP uplink → routing loop. - # NOTE: nexthop only, no interface — EOS silently drops cross-VRF routes that - # specify a tunnel interface (accepts in config but never installs in RIB). - - command: "ip route {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} {{ tunnel_nexthop }}" + # ACL for the new tunnel — we control this, DZ agent won't touch it + - command: "ip access-list {{ tunnel_acl }}" + - command: "counters per-entry" + - command: "10 permit icmp host {{ tunnel_remote }} any" + - command: "20 permit ip host {{ ashburn_ip }} any" + - command: "30 permit ip host {{ tunnel_remote }} any" + - command: "100 deny ip any any" + - command: exit + # New GRE tunnel + - command: "interface {{ tunnel_interface }}" + - command: "mtu 9216" + - command: "ip address {{ tunnel_local }}/31" + - command: "ip access-group {{ tunnel_acl }} in" + - command: "tunnel mode gre" + - command: "tunnel source {{ tunnel_source_ip }}" + - command: "tunnel destination {{ biscayne_ip }}" + - command: exit + # Inbound: route ashburn IP to biscayne via the new tunnel + - command: "ip route {{ ashburn_ip }}/32 {{ tunnel_remote }}" + # Outbound: biscayne's traffic exits via backbone to was-sw01. + # Use a specific route for the backbone peer so tunnel traffic + # can reach was-sw01 without a blanket default route. + # (The switch's actual default route is via Et1/1 ISP uplink.) - name: Show session diff arista.eos.eos_command: @@ -199,9 +233,11 @@ - name: Verify config arista.eos.eos_command: commands: - - "show running-config | section ip access-list {{ tunnel_acl }}" - - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" + - "show running-config interfaces {{ tunnel_source_lo }}" + - "show running-config interfaces {{ tunnel_interface }}" + - "show ip access-lists {{ tunnel_acl }}" - "show ip route {{ ashburn_ip }}" + - "show interfaces {{ tunnel_interface }} status" register: verify - name: Display verification @@ -216,14 +252,14 @@ Checkpoint: {{ checkpoint_name }} Changes applied: - 1. ACL {{ tunnel_acl }}: added "45 permit ip host {{ ashburn_ip }} any" - 2. Default route in {{ tunnel_vrf }}: 0.0.0.0/0 egress-vrf default {{ backbone_interface }} {{ backbone_peer }} - 3. Inbound route: {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} {{ tunnel_nexthop }} + 1. {{ tunnel_source_lo }}: {{ tunnel_source_ip }}/32 + 2. {{ tunnel_interface }}: GRE tunnel to {{ biscayne_ip }} + link {{ tunnel_local }}/31, ACL {{ tunnel_acl }} + 3. Route: {{ ashburn_ip }}/32 via {{ tunnel_remote }} The config will auto-revert in 5 minutes unless committed. Verify on the switch, then commit: - configure session {{ session_name }} commit - write memory + ansible-playbook ... -e commit=true To revert immediately: ansible-playbook ... -e rollback=true diff --git a/playbooks/files/ashburn-routing-ifup.sh.j2 b/playbooks/files/ashburn-routing-ifup.sh.j2 new file mode 100644 index 00000000..cc5c3b1f --- /dev/null +++ b/playbooks/files/ashburn-routing-ifup.sh.j2 @@ -0,0 +1,28 @@ +#!/bin/bash +# /etc/network/if-up.d/ashburn-routing +# Restore GRE tunnel and policy routing for Ashburn validator relay +# after reboot or interface up. Acts on eno1 (public interface) since +# the GRE tunnel depends on it. + +[ "$IFACE" = "eno1" ] || exit 0 + +# Create GRE tunnel if it doesn't exist +if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then + ip tunnel add {{ tunnel_device }} mode gre local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64 + ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }} + ip link set {{ tunnel_device }} up mtu 8972 +fi + +# Ensure rt_tables entry exists +grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \ + echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables + +# Add policy rule +ip rule show | grep -q 'fwmark 0x64 lookup {{ rt_table_name }}' || \ + ip rule add fwmark {{ fwmark }} table {{ rt_table_name }} + +# Add default route via mia-sw01 through GRE tunnel +ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }} + +# Add Ashburn IP to loopback +ip addr show lo | grep -q '{{ ashburn_ip }}' || ip addr add {{ ashburn_ip }}/32 dev lo From 9f6e1b5da721683845f923fee866e6d6a64399cd Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 01:49:25 +0000 Subject: [PATCH 05/62] fix: remove auto-revert timer, use checkpoint + write memory instead Config is committed to running-config immediately (no 5-min timer). Safety net is the checkpoint (rollback) and the fact that startup-config is only written with -e commit=true. A reboot reverts uncommitted changes. Co-Authored-By: Claude Opus 4.6 --- playbooks/ashburn-relay-mia-sw01.yml | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml index 3cdd1aca..61c5f1f2 100644 --- a/playbooks/ashburn-relay-mia-sw01.yml +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -22,7 +22,7 @@ # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ # -e apply=true # -# # Commit persisted config +# # Persist to startup-config (write memory) # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e commit=true # # # Rollback @@ -73,23 +73,22 @@ ansible.builtin.meta: end_play # ------------------------------------------------------------------ - # Commit finalization + # Write memory (persist to startup-config) # ------------------------------------------------------------------ - - name: Finalize pending session + - name: Write memory to persist config when: commit | bool block: - - name: Commit session and write memory + - name: Write memory arista.eos.eos_command: commands: - - "configure session {{ session_name }} commit" - write memory - register: commit_result + register: write_result - - name: Show commit result + - name: Show write result ansible.builtin.debug: - var: commit_result.stdout_lines + var: write_result.stdout_lines - - name: End play after commit + - name: End play after write ansible.builtin.meta: end_play # ------------------------------------------------------------------ @@ -222,10 +221,10 @@ ansible.builtin.debug: var: session_diff.stdout_lines - - name: Commit with 5-minute auto-revert + - name: Commit session (checkpoint saved for rollback) arista.eos.eos_command: commands: - - "configure session {{ session_name }} commit timer 00:05:00" + - "configure session {{ session_name }} commit" # ------------------------------------------------------------------ # Verify @@ -247,8 +246,7 @@ - name: Reminder ansible.builtin.debug: msg: | - === Config applied with 5-minute auto-revert === - Session: {{ session_name }} + === Config applied (running-config only) === Checkpoint: {{ checkpoint_name }} Changes applied: @@ -257,9 +255,11 @@ link {{ tunnel_local }}/31, ACL {{ tunnel_acl }} 3. Route: {{ ashburn_ip }}/32 via {{ tunnel_remote }} - The config will auto-revert in 5 minutes unless committed. - Verify on the switch, then commit: + Config is in running-config but NOT saved to startup-config. + A reboot will revert to the previous state. + + To persist (write memory): ansible-playbook ... -e commit=true - To revert immediately: + To rollback immediately: ansible-playbook ... -e rollback=true From 8a8b882e322b19bae3190b20845b4b0245469927 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 09:56:28 +0000 Subject: [PATCH 06/62] bug: deploy create doesn't auto-generate volume mappings for new pods Co-Authored-By: Claude Opus 4.6 --- TODO.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/TODO.md b/TODO.md index 349530c8..65439ab5 100644 --- a/TODO.md +++ b/TODO.md @@ -7,6 +7,25 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment. +## Bugs + +### `deploy create` doesn't auto-generate volume mappings for new pods + +When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create` +does not generate default host path mappings in spec.yml for the new pod's +volumes. The deployment then fails at scheduling because the PVCs don't exist. + +**Expected**: `deploy create` enumerates all volumes from all compose files +in the stack and generates default host paths for any that aren't already +mapped in the spec.yml `volumes:` section. + +**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently +missing, causing `FailedScheduling: persistentvolumeclaim not found`. + +**Workaround**: Manually add volume entries to spec.yml and create host dirs. + +**Files**: `deployment_create.py` (`_write_config_file`, volume handling) + ## Architecture Refactoring ### Separate Deployer from Stack Orchestrator CLI From d36a71f13de955b8b3c76d93ce08b7867cccaa18 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 09:58:29 +0000 Subject: [PATCH 07/62] fix: redeploy playbook handles SSH agent, git pull, config regen, stale PVs - ansible.cfg: enable SSH agent forwarding for git operations - biscayne-redeploy.yml: add git pull, deploy create --update, and clear stale PV claimRefs after namespace deletion Co-Authored-By: Claude Opus 4.6 --- ansible.cfg | 1 + playbooks/biscayne-redeploy.yml | 32 +++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/ansible.cfg b/ansible.cfg index 1ef6ab0d..01f7f5b1 100644 --- a/ansible.cfg +++ b/ansible.cfg @@ -11,3 +11,4 @@ become_method = sudo [ssh_connection] pipelining = true +ssh_args = -o ForwardAgent=yes diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index a270f4eb..ca16d5a7 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -45,6 +45,8 @@ KUBECONFIG: /home/rix/.kube/config vars: deployment_dir: /srv/deployments/agave + stack_repo: /srv/deployments/agave-stack + stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave laconic_so: /home/rix/.local/bin/laconic-so kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" @@ -108,6 +110,16 @@ when: ns_delete.rc == 0 tags: [teardown] + - name: Clear stale claimRefs on Released PVs + ansible.builtin.shell: | + for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do + kubectl patch pv "$pv" --type json \ + -p '[{"op":"remove","path":"/spec/claimRef"}]' + done + register: pv_patch + changed_when: pv_patch.stdout != "" + tags: [teardown] + # ---- wipe: opt-in data cleanup ------------------------------------------ - name: Wipe ledger data shell: rm -rf {{ ledger_dir }}/* @@ -165,7 +177,25 @@ changed_when: false tags: [deploy, preflight] - # ---- deploy: bring up cluster, scale to 0 immediately ------------------- + # ---- deploy: sync config, bring up cluster, scale to 0 ------------------ + - name: Pull agave-stack repo + ansible.builtin.shell: | + cd {{ stack_repo }} + git fetch origin + git reset --hard origin/main + changed_when: true + tags: [deploy] + + - name: Regenerate deployment config from updated stack + command: > + {{ laconic_so }} + --stack {{ stack_path }} + deploy create + --spec-file {{ deployment_dir }}/spec.yml + --deployment-dir {{ deployment_dir }} + --update + tags: [deploy] + - name: Verify kind-config.yml has unified mount root command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" register: mount_root_check From eae4c3cdffac48d2b9c8ea685dc634db1de17060 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 10:26:10 +0000 Subject: [PATCH 08/62] feat(k8s): per-service resource layering in deployer Resolve container resources using layered priority: 1. spec.yml per-container override (resources.containers.) 2. Compose file deploy.resources block 3. spec.yml global resources 4. DEFAULT_CONTAINER_RESOURCES fallback This prevents monitoring sidecars from inheriting the validator's resource requests (e.g., 256G memory). Each service gets appropriate resources from its compose definition unless explicitly overridden. Note: existing deployments with a global resources block in spec.yml can remove it once compose files declare per-service defaults. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/cluster_info.py | 44 +++++++++++++++++-- stack_orchestrator/deploy/spec.py | 21 +++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index da24bdc2..2ebf96f2 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -394,13 +394,43 @@ class ClusterInfo: result.append(pv) return result + def _any_service_has_host_network(self): + for pod_name in self.parsed_pod_yaml_map: + pod = self.parsed_pod_yaml_map[pod_name] + for svc in pod.get("services", {}).values(): + if svc.get("network_mode") == "host": + return True + return False + + def _resolve_container_resources( + self, container_name: str, service_info: dict, global_resources: Resources + ) -> Resources: + """Resolve resources for a container using layered priority. + + Priority: spec per-container > compose deploy.resources + > spec global > DEFAULT + """ + # 1. Check spec.yml for per-container override + per_container = self.spec.get_container_resources_for(container_name) + if per_container: + return per_container + + # 2. Check compose service_info for deploy.resources + deploy_block = service_info.get("deploy", {}) + compose_resources = deploy_block.get("resources", {}) if deploy_block else {} + if compose_resources: + return Resources(compose_resources) + + # 3. Fall back to spec.yml global (already resolved with DEFAULT fallback) + return global_resources + # TODO: put things like image pull policy into an object-scope struct def get_deployment(self, image_pull_policy: Optional[str] = None): containers = [] services = {} - resources = self.spec.get_container_resources() - if not resources: - resources = DEFAULT_CONTAINER_RESOURCES + global_resources = self.spec.get_container_resources() + if not global_resources: + global_resources = DEFAULT_CONTAINER_RESOURCES for pod_name in self.parsed_pod_yaml_map: pod = self.parsed_pod_yaml_map[pod_name] services = pod["services"] @@ -483,6 +513,9 @@ class ClusterInfo: ) ) ] + container_resources = self._resolve_container_resources( + container_name, service_info, global_resources + ) container = client.V1Container( name=container_name, image=image_to_use, @@ -501,7 +534,7 @@ class ClusterInfo: if self.spec.get_capabilities() else None, ), - resources=to_k8s_resource_requirements(resources), + resources=to_k8s_resource_requirements(container_resources), ) containers.append(container) volumes = volumes_for_pod_files( @@ -568,6 +601,7 @@ class ClusterInfo: ) ) + use_host_network = self._any_service_has_host_network() template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(annotations=annotations, labels=labels), spec=client.V1PodSpec( @@ -577,6 +611,8 @@ class ClusterInfo: affinity=affinity, tolerations=tolerations, runtime_class_name=self.spec.get_runtime_class(), + host_network=use_host_network or None, + dns_policy=("ClusterFirstWithHostNet" if use_host_network else None), ), ) spec = client.V1DeploymentSpec( diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index e5647b04..bd62779e 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -120,6 +120,27 @@ class Spec: self.obj.get(constants.resources_key, {}).get("containers", {}) ) + def get_container_resources_for( + self, container_name: str + ) -> typing.Optional[Resources]: + """Look up per-container resource overrides from spec.yml. + + Checks resources.containers. in the spec. Returns None + if no per-container override exists (caller falls back to other sources). + """ + containers_block = self.obj.get(constants.resources_key, {}).get( + "containers", {} + ) + if container_name in containers_block: + entry = containers_block[container_name] + # Only treat it as a per-container override if it's a dict with + # reservations/limits nested inside (not a top-level global key) + if isinstance(entry, dict) and ( + "reservations" in entry or "limits" in entry + ): + return Resources(entry) + return None + def get_volume_resources(self): return Resources( self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {}) From 4f452db6fe3df6844b0055c0159e19aefa5452f0 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 10:52:40 +0000 Subject: [PATCH 09/62] fix: ansible-lint production profile compliance for all playbooks - FQCN for all modules (ansible.builtin.*) - changed_when/failed_when on all command/shell tasks - set -o pipefail on all shell tasks - Add KUBECONFIG environment to health-check.yml Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-boot.yml | 19 +++---- playbooks/biscayne-recover.yml | 18 ++++--- playbooks/biscayne-redeploy.yml | 87 +++++++++++++++++++++------------ playbooks/biscayne-stop.yml | 22 +++++---- playbooks/fix-pv-mounts.yml | 24 +++++---- playbooks/health-check.yml | 2 + 6 files changed, 105 insertions(+), 67 deletions(-) diff --git a/playbooks/biscayne-boot.yml b/playbooks/biscayne-boot.yml index 2cdd5cad..af89a312 100644 --- a/playbooks/biscayne-boot.yml +++ b/playbooks/biscayne-boot.yml @@ -22,7 +22,7 @@ tasks: - name: Install ramdisk format service - copy: + ansible.builtin.copy: dest: /etc/systemd/system/format-ramdisk.service mode: "0644" content: | @@ -43,7 +43,7 @@ register: unit_file - name: Install ramdisk post-mount service - copy: + ansible.builtin.copy: dest: /etc/systemd/system/ramdisk-accounts.service mode: "0644" content: | @@ -62,19 +62,19 @@ register: accounts_unit - name: Ensure fstab entry uses nofail - lineinfile: + ansible.builtin.lineinfile: path: /etc/fstab regexp: '^{{ ramdisk_device }}\s+{{ ramdisk_mount }}' line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' register: fstab_entry - name: Reload systemd - systemd: + ansible.builtin.systemd: daemon_reload: true when: unit_file.changed or accounts_unit.changed or fstab_entry.changed - name: Enable ramdisk services - systemd: + ansible.builtin.systemd: name: "{{ item }}" enabled: true loop: @@ -83,25 +83,26 @@ # ---- apply now if ramdisk not mounted ------------------------------------ - name: Check if ramdisk is mounted - command: mountpoint -q {{ ramdisk_mount }} + ansible.builtin.command: mountpoint -q {{ ramdisk_mount }} register: ramdisk_mounted failed_when: false changed_when: false - name: Format and mount ramdisk now - shell: | + ansible.builtin.shell: | mkfs.xfs -f {{ ramdisk_device }} mount {{ ramdisk_mount }} mkdir -p {{ accounts_dir }} chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + changed_when: ramdisk_mounted.rc != 0 when: ramdisk_mounted.rc != 0 # ---- verify -------------------------------------------------------------- - name: Verify ramdisk - command: df -hT {{ ramdisk_mount }} + ansible.builtin.command: df -hT {{ ramdisk_mount }} register: ramdisk_df changed_when: false - name: Show ramdisk status - debug: + ansible.builtin.debug: msg: "{{ ramdisk_df.stdout_lines }}" diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index fec21a39..f8b9a89e 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -48,7 +48,7 @@ tasks: # ---- step 1: scale to 0 --------------------------------------------------- - name: Get current replica count - command: > + ansible.builtin.command: > kubectl get deployment {{ deployment_name }} -n {{ k8s_namespace }} -o jsonpath='{.spec.replicas}' @@ -57,7 +57,7 @@ changed_when: false - name: Scale deployment to 0 - command: > + ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=0 when: current_replicas.stdout | default('0') | int > 0 @@ -65,7 +65,7 @@ # ---- step 2: wait for pods to terminate ------------------------------------ - name: Wait for pods to terminate - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -l app={{ deployment_name }} -o jsonpath='{.items}' @@ -77,7 +77,7 @@ when: current_replicas.stdout | default('0') | int > 0 - name: Verify no agave processes in kind node (io_uring safety check) - command: > + ansible.builtin.command: > docker exec {{ kind_cluster }}-control-plane pgrep -c agave-validator register: agave_procs @@ -110,7 +110,7 @@ # ---- step 5: download fresh snapshot --------------------------------------- - name: Verify aria2c installed - command: which aria2c + ansible.builtin.command: which aria2c changed_when: false - name: Copy snapshot script to remote @@ -135,7 +135,9 @@ # ---- step 6: verify snapshot accessible via PV ----------------------------- - name: Get snapshot filename - ansible.builtin.shell: ls -1 {{ snapshot_dir }}/snapshot-*.tar.* | head -1 | xargs basename + ansible.builtin.shell: set -o pipefail && ls -1 {{ snapshot_dir }}/snapshot-*.tar.* | head -1 | xargs basename + args: + executable: /bin/bash register: snapshot_filename changed_when: false @@ -176,7 +178,7 @@ # ---- step 8: wait for pod running ------------------------------------------ - name: Wait for pod to be running - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -l app={{ deployment_name }} -o jsonpath='{.items[0].status.phase}' @@ -188,7 +190,7 @@ # ---- step 9: verify validator log ------------------------------------------ - name: Wait for validator log file - command: > + ansible.builtin.command: > kubectl exec -n {{ k8s_namespace }} deployment/{{ deployment_name }} -c agave-validator -- test -f /data/log/validator.log diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index ca16d5a7..216091dc 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -71,15 +71,16 @@ # Deleting the namespace while agave is running causes io_uring/ZFS # deadlock (unkillable D-state threads). See CLAUDE.md. - name: Scale deployment to 0 (graceful stop) - command: > + ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=0 register: pre_teardown_scale + changed_when: pre_teardown_scale.rc == 0 failed_when: false tags: [teardown] - name: Wait for agave to exit - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -l app={{ deployment_name }} -o jsonpath='{.items}' @@ -87,31 +88,35 @@ retries: 60 delay: 5 until: pre_teardown_pods.stdout == "[]" or pre_teardown_pods.stdout == "" or pre_teardown_pods.rc != 0 + changed_when: false failed_when: false when: pre_teardown_scale.rc == 0 tags: [teardown] - name: Delete deployment namespace - command: > + ansible.builtin.command: > kubectl delete namespace {{ k8s_namespace }} --timeout=120s register: ns_delete + changed_when: ns_delete.rc == 0 failed_when: false tags: [teardown] - name: Wait for namespace to terminate - command: > + ansible.builtin.command: > kubectl get namespace {{ k8s_namespace }} -o jsonpath='{.status.phase}' register: ns_status retries: 30 delay: 5 until: ns_status.rc != 0 + changed_when: false failed_when: false when: ns_delete.rc == 0 tags: [teardown] - name: Clear stale claimRefs on Released PVs ansible.builtin.shell: | + set -o pipefail for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do kubectl patch pv "$pv" --type json \ -p '[{"op":"remove","path":"/spec/claimRef"}]' @@ -122,24 +127,28 @@ # ---- wipe: opt-in data cleanup ------------------------------------------ - name: Wipe ledger data - shell: rm -rf {{ ledger_dir }}/* + ansible.builtin.shell: rm -rf {{ ledger_dir }}/* become: true + changed_when: true when: wipe_ledger | bool tags: [wipe] - name: Wipe accounts ramdisk (umount + mkfs.xfs + mount) - shell: | + ansible.builtin.shell: | + set -o pipefail mountpoint -q {{ ramdisk_mount }} && umount {{ ramdisk_mount }} || true mkfs.xfs -f {{ ramdisk_device }} mount {{ ramdisk_mount }} mkdir -p {{ accounts_dir }} chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} become: true + changed_when: true when: wipe_accounts | bool tags: [wipe] - name: Clean old snapshots (keep newest full + incremental) - shell: | + ansible.builtin.shell: | + set -o pipefail cd {{ snapshot_dir }} || exit 0 newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1) if [ -n "$newest" ]; then @@ -150,26 +159,28 @@ -delete fi become: true + changed_when: true when: not skip_snapshot | bool tags: [wipe] # ---- preflight: verify ramdisk and mounts before deploy ------------------ - name: Verify ramdisk is mounted - command: mountpoint -q {{ ramdisk_mount }} + ansible.builtin.command: mountpoint -q {{ ramdisk_mount }} register: ramdisk_check failed_when: ramdisk_check.rc != 0 changed_when: false tags: [deploy, preflight] - name: Verify ramdisk is xfs (not the underlying ZFS) - shell: df -T {{ ramdisk_mount }} | grep -q xfs + ansible.builtin.shell: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs register: ramdisk_type failed_when: ramdisk_type.rc != 0 changed_when: false tags: [deploy, preflight] - name: Verify ramdisk visible inside kind node - shell: > + ansible.builtin.shell: > + set -o pipefail && docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs register: kind_ramdisk_check @@ -187,28 +198,31 @@ tags: [deploy] - name: Regenerate deployment config from updated stack - command: > + ansible.builtin.command: > {{ laconic_so }} --stack {{ stack_path }} deploy create --spec-file {{ deployment_dir }}/spec.yml --deployment-dir {{ deployment_dir }} --update + changed_when: true tags: [deploy] - name: Verify kind-config.yml has unified mount root - command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" + ansible.builtin.command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" register: mount_root_check failed_when: mount_root_check.stdout | int < 1 + changed_when: false tags: [deploy] - name: Start deployment (creates kind cluster + deploys pod) - command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" + ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" + changed_when: true timeout: 1200 tags: [deploy] - name: Wait for deployment to exist - command: > + ansible.builtin.command: > kubectl get deployment {{ deployment_name }} -n {{ k8s_namespace }} -o jsonpath='{.metadata.name}' @@ -216,16 +230,18 @@ retries: 30 delay: 10 until: deploy_exists.rc == 0 + changed_when: false tags: [deploy] - name: Scale validator to 0 (stop before snapshot download) - command: > + ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=0 + changed_when: true tags: [deploy] - name: Wait for pods to terminate - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -l app={{ deployment_name }} -o jsonpath='{.items}' @@ -233,18 +249,19 @@ retries: 30 delay: 5 until: pods_gone.stdout == "[]" or pods_gone.stdout == "" + changed_when: false failed_when: false tags: [deploy] # ---- snapshot: download via aria2c, verify in kind node ------------------ - name: Verify aria2c installed - command: which aria2c + ansible.builtin.command: which aria2c changed_when: false when: not skip_snapshot | bool tags: [snapshot] - name: Copy snapshot script to remote - copy: + ansible.builtin.copy: src: "{{ snapshot_script_local }}" dest: "{{ snapshot_script }}" mode: "0755" @@ -252,73 +269,80 @@ tags: [snapshot] - name: Verify kind node mounts - command: > + ansible.builtin.command: > docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ register: kind_mount_check + changed_when: false tags: [snapshot] - name: Download snapshot via aria2c - shell: > + ansible.builtin.shell: > python3 {{ snapshot_script }} -o {{ snapshot_dir }} {{ snapshot_args }} become: true register: snapshot_result + changed_when: true when: not skip_snapshot | bool timeout: 3600 tags: [snapshot] - name: Show snapshot download result - debug: + ansible.builtin.debug: msg: "{{ snapshot_result.stdout_lines | default(['skipped']) }}" tags: [snapshot] - name: Verify snapshot visible inside kind node - shell: > + ansible.builtin.shell: > + set -o pipefail && docker exec {{ kind_cluster }}-control-plane - ls -lhS /mnt/solana/snapshots/*.tar.* 2>/dev/null | head -5 + find /mnt/solana/snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 register: kind_snapshot_check failed_when: kind_snapshot_check.stdout == "" + changed_when: false when: not skip_snapshot | bool tags: [snapshot] - name: Show snapshot files in kind node - debug: + ansible.builtin.debug: msg: "{{ kind_snapshot_check.stdout_lines | default(['skipped']) }}" when: not skip_snapshot | bool tags: [snapshot] # ---- deploy (cont): scale validator back up with snapshot ---------------- - name: Scale validator to 1 (start with downloaded snapshot) - command: > + ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=1 + changed_when: true tags: [deploy] # ---- verify: confirm validator is running -------------------------------- - name: Wait for pod to be running - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -o jsonpath='{.items[0].status.phase}' register: pod_status retries: 60 delay: 10 until: pod_status.stdout == "Running" + changed_when: false tags: [verify] - name: Verify unified mount inside kind node - command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" + ansible.builtin.command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" register: mount_check + changed_when: false tags: [verify] - name: Show mount contents - debug: + ansible.builtin.debug: msg: "{{ mount_check.stdout_lines }}" tags: [verify] - name: Check validator log file is being written - command: > + ansible.builtin.command: > kubectl exec -n {{ k8s_namespace }} deployment/{{ deployment_name }} -c agave-validator -- test -f /data/log/validator.log @@ -326,11 +350,12 @@ delay: 10 until: log_file_check.rc == 0 register: log_file_check + changed_when: false failed_when: false tags: [verify] - name: Check RPC health - uri: + ansible.builtin.uri: url: http://127.0.0.1:8899/health return_content: true register: rpc_health @@ -342,7 +367,7 @@ tags: [verify] - name: Report status - debug: + ansible.builtin.debug: msg: >- Deployment complete. Log: {{ 'writing' if log_file_check.rc == 0 else 'not yet created' }}. diff --git a/playbooks/biscayne-stop.yml b/playbooks/biscayne-stop.yml index 2550f5a9..2f9290f6 100644 --- a/playbooks/biscayne-stop.yml +++ b/playbooks/biscayne-stop.yml @@ -34,7 +34,7 @@ tasks: - name: Get current replica count - command: > + ansible.builtin.command: > kubectl get deployment {{ deployment_name }} -n {{ k8s_namespace }} -o jsonpath='{.spec.replicas}' @@ -43,24 +43,26 @@ changed_when: false - name: Scale deployment to 0 - command: > + ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=0 + changed_when: true when: current_replicas.stdout | default('0') | int > 0 - name: Wait for pods to terminate - command: > + ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} -l app={{ deployment_name }} -o jsonpath='{.items}' register: pods_gone + changed_when: false retries: 60 delay: 5 until: pods_gone.stdout == "[]" or pods_gone.stdout == "" when: current_replicas.stdout | default('0') | int > 0 - name: Verify no agave processes in kind node - command: > + ansible.builtin.command: > docker exec {{ kind_cluster }}-control-plane pgrep -c agave-validator register: agave_procs @@ -68,7 +70,7 @@ changed_when: false - name: Fail if agave still running - fail: + ansible.builtin.fail: msg: >- agave-validator process still running inside kind node after pod termination. Do NOT restart the kind node — investigate @@ -76,7 +78,7 @@ when: agave_procs.rc == 0 - name: Report stopped - debug: + ansible.builtin.debug: msg: >- Validator stopped. Replicas: {{ current_replicas.stdout | default('0') }} -> 0. No agave processes detected in kind node. @@ -84,22 +86,24 @@ # ---- optional: restart kind node ----------------------------------------- - name: Restart kind node - command: docker restart {{ kind_cluster }}-control-plane + ansible.builtin.command: docker restart {{ kind_cluster }}-control-plane + changed_when: true when: restart_kind | bool timeout: 120 - name: Wait for kind node ready - command: > + ansible.builtin.command: > kubectl get node {{ kind_cluster }}-control-plane -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' register: node_ready + changed_when: false retries: 30 delay: 10 until: node_ready.stdout == "True" when: restart_kind | bool - name: Report restarted - debug: + ansible.builtin.debug: msg: >- Kind node restarted and ready. Deployment at 0 replicas — scale up when ready. diff --git a/playbooks/fix-pv-mounts.yml b/playbooks/fix-pv-mounts.yml index f03f0e97..ba6d9f23 100644 --- a/playbooks/fix-pv-mounts.yml +++ b/playbooks/fix-pv-mounts.yml @@ -39,7 +39,7 @@ tasks: - name: Read current PV hostPaths - command: > + ansible.builtin.command: > kubectl get pv {{ kind_cluster }}-{{ item.name }} -o jsonpath='{.spec.hostPath.path}' register: current_paths @@ -48,7 +48,7 @@ changed_when: false - name: Build path comparison - set_fact: + ansible.builtin.set_fact: path_mismatches: "{{ current_paths.results | selectattr('stdout', 'ne', '') | rejectattr('stdout', 'equalto', item.host_path) | list }}" path_missing: "{{ current_paths.results | selectattr('stdout', 'equalto', '') | list }}" loop: "{{ volumes }}" @@ -56,7 +56,7 @@ label: "{{ item.name }}" - name: Show current vs expected paths - debug: + ansible.builtin.debug: msg: >- {{ item.item.name }}: current={{ item.stdout if item.stdout else 'NOT FOUND' }} @@ -67,7 +67,7 @@ label: "{{ item.item.name }}" - name: Check for mismatched PVs - fail: + ansible.builtin.fail: msg: >- PV {{ item.item.name }} has wrong hostPath: {{ item.stdout if item.stdout else 'NOT FOUND' }} @@ -80,7 +80,7 @@ # ---- Fix mode --------------------------------------------------------- - name: Delete stale PVCs - command: > + ansible.builtin.command: > kubectl delete pvc {{ kind_cluster }}-{{ item.item.name }} -n {{ k8s_namespace }} --timeout=60s when: fix | bool and item.stdout != item.item.host_path @@ -88,9 +88,10 @@ loop_control: label: "{{ item.item.name }}" failed_when: false + changed_when: true - name: Delete stale PVs - command: > + ansible.builtin.command: > kubectl delete pv {{ kind_cluster }}-{{ item.item.name }} --timeout=60s when: fix | bool and item.stdout != item.item.host_path @@ -98,9 +99,10 @@ loop_control: label: "{{ item.item.name }}" failed_when: false + changed_when: true - name: Create PVs with correct hostPaths - command: > + ansible.builtin.command: > kubectl apply -f - args: stdin: | @@ -121,9 +123,10 @@ loop: "{{ current_paths.results }}" loop_control: label: "{{ item.item.name }}" + changed_when: true - name: Create PVCs - command: > + ansible.builtin.command: > kubectl apply -f - args: stdin: | @@ -144,10 +147,11 @@ loop: "{{ current_paths.results }}" loop_control: label: "{{ item.item.name }}" + changed_when: true # ---- Final verify ----------------------------------------------------- - name: Verify PV paths - command: > + ansible.builtin.command: > kubectl get pv {{ kind_cluster }}-{{ item.name }} -o jsonpath='{.spec.hostPath.path}' register: final_paths @@ -156,7 +160,7 @@ when: fix | bool - name: Assert all PV paths correct - assert: + ansible.builtin.assert: that: item.stdout == item.item.host_path fail_msg: "{{ item.item.name }}: {{ item.stdout }} != {{ item.item.host_path }}" success_msg: "{{ item.item.name }}: {{ item.stdout }} OK" diff --git a/playbooks/health-check.yml b/playbooks/health-check.yml index 326f1f35..c0aa4ee6 100644 --- a/playbooks/health-check.yml +++ b/playbooks/health-check.yml @@ -13,6 +13,8 @@ - name: Biscayne agave-stack health check hosts: biscayne gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config tasks: # ------------------------------------------------------------------ From b40883ef65bd2200ea481b9b705966b8e6bc3a58 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 10:56:48 +0000 Subject: [PATCH 10/62] fix: separate switch inventory to prevent accidental targeting Move switches.yml to inventory-switches/ so ansible.cfg's `inventory = inventory/` only loads biscayne. Switch playbooks must pass `-i inventory-switches/` explicitly. Co-Authored-By: Claude Opus 4.6 --- {inventory => inventory-switches}/switches.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {inventory => inventory-switches}/switches.yml (100%) diff --git a/inventory/switches.yml b/inventory-switches/switches.yml similarity index 100% rename from inventory/switches.yml rename to inventory-switches/switches.yml From a11d40f2f340011f06d918c6552e1ac4ba8ad360 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 13:07:12 +0000 Subject: [PATCH 11/62] fix(k8s): add HostToContainer mount propagation to kind extraMounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without propagation, rbind submounts on the host (e.g., XFS zvol at /srv/kind/solana) are invisible inside the kind node — it sees the underlying filesystem (ZFS) instead. This causes agave's io_uring to deadlock on ZFS transaction commits (D-state in dsl_dir_tempreserve_space). HostToContainer propagation ensures host submounts propagate into the kind node, so /mnt/solana correctly resolves to the XFS zvol. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/helpers.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 8b367f86..ac4e8603 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -573,14 +573,18 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): Path(f"./data/{backup_subdir}/etcd"), deployment_dir ) volume_definitions.append( - f" - hostPath: {etcd_host_path}\n" f" containerPath: /var/lib/etcd\n" + f" - hostPath: {etcd_host_path}\n" + f" containerPath: /var/lib/etcd\n" + f" propagation: HostToContainer\n" ) pki_host_path = _make_absolute_host_path( Path(f"./data/{backup_subdir}/pki"), deployment_dir ) volume_definitions.append( - f" - hostPath: {pki_host_path}\n" f" containerPath: /etc/kubernetes/pki\n" + f" - hostPath: {pki_host_path}\n" + f" containerPath: /etc/kubernetes/pki\n" + f" propagation: HostToContainer\n" ) # Note these paths are relative to the location of the pod files (at present) @@ -621,6 +625,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" + f" propagation: HostToContainer\n" ) if opts.o.debug: print(f"Added host path mount: {host_path}") @@ -648,6 +653,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" + f" propagation: HostToContainer\n" ) return ( "" @@ -703,7 +709,11 @@ def _generate_high_memlock_spec_mount(deployment_dir: Path): references an absolute path. """ spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve() - return f" - hostPath: {spec_path}\n" f" containerPath: {spec_path}\n" + return ( + f" - hostPath: {spec_path}\n" + f" containerPath: {spec_path}\n" + f" propagation: HostToContainer\n" + ) def generate_high_memlock_spec_json(): From 14c0f6377549293f70fc69b60eaebc7cc8bcfabf Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 13:07:54 +0000 Subject: [PATCH 12/62] feat: layer 4 invariants, mount checks, and deployment layer docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename biscayne-boot.yml → biscayne-prepare-agave.yml (layer 4) - Document deployment layers and layer 4 invariants in playbook header - Add zvol, ramdisk, rbind fstab management with stale entry cleanup - Add kind node XFS verification (reads cluster-id from deployment) - Add mount checks to health-check.yml (host mounts, kind visibility, propagation) - Fix health-check discovery tasks with tags: [always] and non-fatal pod lookup - Fix biscayne-redeploy.yml shell tasks missing executable: /bin/bash - Add ansible_python_interpreter to inventory - Update CLAUDE.md with deployment layers table and mount propagation notes Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 38 ++++- inventory/biscayne.yml | 1 + playbooks/biscayne-boot.yml | 108 ------------ playbooks/biscayne-prepare-agave.yml | 243 +++++++++++++++++++++++++++ playbooks/biscayne-redeploy.yml | 14 +- playbooks/health-check.yml | 61 ++++++- 6 files changed, 342 insertions(+), 123 deletions(-) delete mode 100644 playbooks/biscayne-boot.yml create mode 100644 playbooks/biscayne-prepare-agave.yml diff --git a/CLAUDE.md b/CLAUDE.md index 138d8d75..21542520 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,30 @@ # Biscayne Agave Runbook +## Deployment Layers + +Operations on biscayne follow a strict layering. Each layer assumes the layers +below it are correct. Playbooks belong to exactly one layer. + +| Layer | What | Playbooks | +|-------|------|-----------| +| 1. Base system | Docker, ZFS, packages | Out of scope (manual/PXE) | +| 2. Prepare kind | `/srv/kind` exists (ZFS dataset) | None needed (ZFS handles it) | +| 3. Install kind | `laconic-so deployment start` creates kind cluster, mounts `/srv/kind` → `/mnt` in kind node | `biscayne-redeploy.yml` (deploy tags) | +| 4. Prepare agave | Host storage for agave: zvol, ramdisk, rbind into `/srv/kind/solana` | `biscayne-prepare-agave.yml` | +| 5. Deploy agave | Deploy agave-stack into kind, snapshot download, scale up | `biscayne-redeploy.yml` (snapshot/verify tags), `biscayne-recover.yml` | + +**Layer 4 invariants** (asserted by `biscayne-prepare-agave.yml`): +- `/srv/solana` is XFS on a zvol — agave uses io_uring which deadlocks on ZFS +- `/srv/solana/ramdisk` is XFS on `/dev/ram0` — accounts must be on ramdisk +- `/srv/kind/solana` is an rbind of `/srv/solana` — makes the zvol visible to kind at `/mnt/solana` + +These invariants are checked at runtime and persisted to fstab/systemd so they +survive reboot. They are agave's requirements reaching into the boot sequence, +not base system concerns. + +**Cross-cutting**: `health-check.yml` (read-only diagnostics), `biscayne-stop.yml` +(layer 5 — graceful shutdown), `fix-pv-mounts.yml` (layer 5 — PV repair). + ## Cluster Operations ### Shutdown Order @@ -36,7 +61,7 @@ Correct shutdown sequence: The accounts directory must be on a ramdisk for performance. `/dev/ram0` loses its filesystem on reboot and must be reformatted before mounting. -**Boot ordering is handled by systemd units** (installed by `biscayne-boot.yml`): +**Boot ordering is handled by systemd units** (installed by `biscayne-prepare-agave.yml`): - `format-ramdisk.service`: runs `mkfs.xfs -f /dev/ram0` before `local-fs.target` - fstab entry: mounts `/dev/ram0` at `/srv/solana/ramdisk` with `x-systemd.requires=format-ramdisk.service` @@ -46,11 +71,12 @@ filesystem on reboot and must be reformatted before mounting. These units run before docker, so the kind node's bind mounts always see the ramdisk. **No manual intervention is needed after reboot.** -**Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt`. Because -the ramdisk is mounted at `/srv/solana/ramdisk` and symlinked/overlaid through -`/srv/kind/solana/ramdisk`, mount propagation makes it visible inside the kind -node at `/mnt/solana/ramdisk` without restarting the kind node. **Do NOT restart -the kind node just to pick up a ramdisk mount.** +**Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt` at container +start. New mounts under `/srv/kind` on the host (like the rbind at +`/srv/kind/solana`) do NOT propagate into the kind node because kind's default +mount propagation is `None`. A kind node restart is required to pick up new host +mounts. **TODO**: Fix laconic-so to set `propagation: HostToContainer` on the +kind-mount-root extraMount, which would make host mounts propagate automatically. ### KUBECONFIG diff --git a/inventory/biscayne.yml b/inventory/biscayne.yml index 722a696a..f0afa001 100644 --- a/inventory/biscayne.yml +++ b/inventory/biscayne.yml @@ -4,6 +4,7 @@ all: ansible_host: biscayne.vaasl.io ansible_user: rix ansible_become: true + ansible_python_interpreter: /usr/bin/python3.12 # DoubleZero identities dz_identity: 3Bw6v7EruQvTwoY79h2QjQCs2KBQFzSneBdYUbcXK1Tr diff --git a/playbooks/biscayne-boot.yml b/playbooks/biscayne-boot.yml deleted file mode 100644 index af89a312..00000000 --- a/playbooks/biscayne-boot.yml +++ /dev/null @@ -1,108 +0,0 @@ ---- -# Configure biscayne OS-level services for agave validator -# -# Installs a systemd unit that formats and mounts the ramdisk on boot. -# /dev/ram0 loses its filesystem on reboot, so mkfs.xfs must run before -# the fstab mount. This unit runs before docker, ensuring the kind node's -# bind mounts always see the ramdisk. -# -# This playbook is idempotent — safe to run multiple times. -# -# Usage: -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-boot.yml -# -- name: Configure OS-level services for agave - hosts: all - gather_facts: false - become: true - vars: - ramdisk_device: /dev/ram0 - ramdisk_mount: /srv/solana/ramdisk - accounts_dir: /srv/solana/ramdisk/accounts - - tasks: - - name: Install ramdisk format service - ansible.builtin.copy: - dest: /etc/systemd/system/format-ramdisk.service - mode: "0644" - content: | - [Unit] - Description=Format /dev/ram0 as XFS for Solana accounts - DefaultDependencies=no - Before=local-fs.target - After=systemd-modules-load.service - ConditionPathExists={{ ramdisk_device }} - - [Service] - Type=oneshot - RemainAfterExit=yes - ExecStart=/sbin/mkfs.xfs -f {{ ramdisk_device }} - - [Install] - WantedBy=local-fs.target - register: unit_file - - - name: Install ramdisk post-mount service - ansible.builtin.copy: - dest: /etc/systemd/system/ramdisk-accounts.service - mode: "0644" - content: | - [Unit] - Description=Create Solana accounts directory on ramdisk - After=srv-solana-ramdisk.mount - Requires=srv-solana-ramdisk.mount - - [Service] - Type=oneshot - RemainAfterExit=yes - ExecStart=/bin/bash -c 'mkdir -p {{ accounts_dir }} && chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }}' - - [Install] - WantedBy=multi-user.target - register: accounts_unit - - - name: Ensure fstab entry uses nofail - ansible.builtin.lineinfile: - path: /etc/fstab - regexp: '^{{ ramdisk_device }}\s+{{ ramdisk_mount }}' - line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' - register: fstab_entry - - - name: Reload systemd - ansible.builtin.systemd: - daemon_reload: true - when: unit_file.changed or accounts_unit.changed or fstab_entry.changed - - - name: Enable ramdisk services - ansible.builtin.systemd: - name: "{{ item }}" - enabled: true - loop: - - format-ramdisk.service - - ramdisk-accounts.service - - # ---- apply now if ramdisk not mounted ------------------------------------ - - name: Check if ramdisk is mounted - ansible.builtin.command: mountpoint -q {{ ramdisk_mount }} - register: ramdisk_mounted - failed_when: false - changed_when: false - - - name: Format and mount ramdisk now - ansible.builtin.shell: | - mkfs.xfs -f {{ ramdisk_device }} - mount {{ ramdisk_mount }} - mkdir -p {{ accounts_dir }} - chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} - changed_when: ramdisk_mounted.rc != 0 - when: ramdisk_mounted.rc != 0 - - # ---- verify -------------------------------------------------------------- - - name: Verify ramdisk - ansible.builtin.command: df -hT {{ ramdisk_mount }} - register: ramdisk_df - changed_when: false - - - name: Show ramdisk status - ansible.builtin.debug: - msg: "{{ ramdisk_df.stdout_lines }}" diff --git a/playbooks/biscayne-prepare-agave.yml b/playbooks/biscayne-prepare-agave.yml new file mode 100644 index 00000000..cc2be16b --- /dev/null +++ b/playbooks/biscayne-prepare-agave.yml @@ -0,0 +1,243 @@ +--- +# Prepare biscayne host for agave validator +# +# Deployment layers: +# 1. Base system — Docker, ZFS (out of scope) +# 2. Prepare kind — /srv/kind directory exists (ZFS dataset, out of scope) +# 3. laconic-so — Installs kind, mounts /srv/kind → /mnt in kind node +# 4. Prepare agave — THIS PLAYBOOK +# 5. Deploy agave — laconic-so deploys agave-stack into kind +# +# Agave requires three things from the host that kind doesn't provide: +# +# Invariant 1: /srv/solana is XFS on a zvol (not ZFS) +# Why: agave uses io_uring for async I/O. io_uring workers deadlock on +# ZFS datasets (D-state in dsl_dir_tempreserve_space). XFS on a zvol +# (block device) works fine. This is why the data lives on a zvol, not +# a ZFS dataset. +# Persisted as: fstab entry mounting /dev/zvol/.../solana at /srv/solana +# +# Invariant 2: /srv/solana/ramdisk is XFS on /dev/ram0 (600G ramdisk) +# Why: agave accounts must be on ramdisk for performance. /dev/ram0 +# loses its filesystem on reboot, so it must be reformatted before +# mounting each boot. +# Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry +# +# Invariant 3: /srv/kind/solana is an rbind of /srv/solana +# Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference +# /mnt/solana/*. Without the rbind, /srv/kind/solana resolves to the ZFS +# dataset (biscayne/DATA/srv/kind), not the zvol — violating invariant 1. +# Persisted as: fstab entry with x-systemd.requires=zfs-mount.service +# (must mount AFTER ZFS, or ZFS overlay at /srv/kind hides it) +# +# This playbook checks each invariant and only acts if it's not met. +# Idempotent — safe to run multiple times. +# +# Usage: +# ansible-playbook playbooks/biscayne-prepare-agave.yml +# +- name: Configure OS-level services for agave + hosts: all + gather_facts: false + become: true + vars: + ramdisk_device: /dev/ram0 + zvol_device: /dev/zvol/biscayne/DATA/volumes/solana + solana_dir: /srv/solana + ramdisk_mount: /srv/solana/ramdisk + kind_solana_dir: /srv/kind/solana + accounts_dir: /srv/solana/ramdisk/accounts + deployment_dir: /srv/deployments/agave + + tasks: + # ---- systemd units ---------------------------------------------------------- + - name: Install ramdisk format service + ansible.builtin.copy: + dest: /etc/systemd/system/format-ramdisk.service + mode: "0644" + content: | + [Unit] + Description=Format /dev/ram0 as XFS for Solana accounts + DefaultDependencies=no + Before=local-fs.target + After=systemd-modules-load.service + ConditionPathExists={{ ramdisk_device }} + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/sbin/mkfs.xfs -f {{ ramdisk_device }} + + [Install] + WantedBy=local-fs.target + register: unit_file + + - name: Install ramdisk post-mount service + ansible.builtin.copy: + dest: /etc/systemd/system/ramdisk-accounts.service + mode: "0644" + content: | + [Unit] + Description=Create Solana accounts directory on ramdisk + After=srv-solana-ramdisk.mount + Requires=srv-solana-ramdisk.mount + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/bin/bash -c 'mkdir -p {{ accounts_dir }} && chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }}' + + [Install] + WantedBy=multi-user.target + register: accounts_unit + + # ---- fstab entries ---------------------------------------------------------- + - name: Ensure zvol fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ solana_dir }}\s' + line: '{{ zvol_device }} {{ solana_dir }} xfs defaults 0 2' + register: fstab_zvol + + - name: Ensure ramdisk fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^{{ ramdisk_device }}\s+{{ ramdisk_mount }}\s' + line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' + register: fstab_ramdisk + + # rbind /srv/solana to /srv/kind/solana AFTER zfs-mount.service and ramdisk. + # Without this ordering, ZFS overlay at /srv/kind hides the bind mount. + - name: Ensure kind bind mount fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ kind_solana_dir }}\s' + line: '{{ solana_dir }} {{ kind_solana_dir }} none rbind,nofail,x-systemd.requires=zfs-mount.service,x-systemd.requires=srv-solana-ramdisk.mount 0 0' + register: fstab_kind + + # Remove stale fstab entries from previous attempts (direct zvol mount, + # separate ramdisk mount at /srv/kind/solana/ramdisk) + - name: Remove stale kind zvol fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^{{ zvol_device }}\s+{{ kind_solana_dir }}\s' + state: absent + register: fstab_stale_zvol + + - name: Remove stale kind ramdisk fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s' + state: absent + register: fstab_stale_ramdisk + + # ---- reload and enable ------------------------------------------------------ + - name: Reload systemd + ansible.builtin.systemd: + daemon_reload: true + when: >- + unit_file.changed or accounts_unit.changed or + fstab_zvol.changed or fstab_ramdisk.changed or fstab_kind.changed or + fstab_stale_zvol.changed or fstab_stale_ramdisk.changed + + - name: Enable ramdisk services + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + loop: + - format-ramdisk.service + - ramdisk-accounts.service + + # ---- apply now if ramdisk not mounted -------------------------------------- + - name: Check if ramdisk is mounted + ansible.builtin.command: mountpoint -q {{ ramdisk_mount }} + register: ramdisk_mounted + failed_when: false + changed_when: false + + - name: Format and mount ramdisk now + ansible.builtin.shell: | + mkfs.xfs -f {{ ramdisk_device }} + mount {{ ramdisk_mount }} + mkdir -p {{ accounts_dir }} + chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + changed_when: ramdisk_mounted.rc != 0 + when: ramdisk_mounted.rc != 0 + + # ---- apply kind bind mount now if not correct ------------------------------ + - name: Check kind bind mount + ansible.builtin.shell: + cmd: > + set -o pipefail && + findmnt -n -o SOURCE {{ kind_solana_dir }} | grep -q '{{ solana_dir }}' + executable: /bin/bash + register: kind_mount_check + failed_when: false + changed_when: false + + - name: Unmount stale kind mounts + ansible.builtin.shell: + cmd: | + umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true + umount {{ kind_solana_dir }} 2>/dev/null || true + executable: /bin/bash + changed_when: kind_mount_check.rc != 0 + when: kind_mount_check.rc != 0 + + - name: Apply kind bind mount now + ansible.posix.mount: + path: "{{ kind_solana_dir }}" + src: "{{ solana_dir }}" + fstype: none + opts: rbind + state: mounted + when: kind_mount_check.rc != 0 + + # ---- verify ----------------------------------------------------------------- + - name: Verify ramdisk is XFS + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs + executable: /bin/bash + changed_when: false + + - name: Verify zvol is XFS + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ solana_dir }} | grep -q xfs + executable: /bin/bash + changed_when: false + + - name: Verify kind bind mount contents + ansible.builtin.shell: + cmd: > + set -o pipefail && + ls {{ kind_solana_dir }}/ledger {{ kind_solana_dir }}/snapshots + {{ kind_solana_dir }}/ramdisk/accounts 2>&1 | head -5 + executable: /bin/bash + register: kind_mount_verify + changed_when: false + + # Assert the kind node sees XFS (zvol), not ZFS. If this fails, kind + # needs a restart or laconic-so needs the HostToContainer propagation fix. + - name: Read cluster-id from deployment + ansible.builtin.shell: + cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}' + executable: /bin/bash + register: cluster_id_result + changed_when: false + + - name: Verify kind node sees XFS at /mnt/solana + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ cluster_id_result.stdout }}-control-plane + stat -f -c '%T' /mnt/solana | grep -q xfs + executable: /bin/bash + register: kind_fstype + changed_when: false + failed_when: false + + - name: Show status + ansible.builtin.debug: + msg: + kind_mount: "{{ kind_mount_verify.stdout_lines }}" + kind_fstype: "{{ 'xfs (correct)' if kind_fstype.rc == 0 else 'NOT XFS — kind restart required' }}" diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 216091dc..86de9c75 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -172,17 +172,21 @@ tags: [deploy, preflight] - name: Verify ramdisk is xfs (not the underlying ZFS) - ansible.builtin.shell: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs + executable: /bin/bash register: ramdisk_type failed_when: ramdisk_type.rc != 0 changed_when: false tags: [deploy, preflight] - name: Verify ramdisk visible inside kind node - ansible.builtin.shell: > - set -o pipefail && - docker exec {{ kind_cluster }}-control-plane - df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_cluster }}-control-plane + df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs + executable: /bin/bash register: kind_ramdisk_check failed_when: kind_ramdisk_check.rc != 0 changed_when: false diff --git a/playbooks/health-check.yml b/playbooks/health-check.yml index c0aa4ee6..138a9aa6 100644 --- a/playbooks/health-check.yml +++ b/playbooks/health-check.yml @@ -26,10 +26,12 @@ register: kind_clusters changed_when: false failed_when: kind_clusters.rc != 0 or kind_clusters.stdout_lines | length == 0 + tags: [always] - name: Set cluster name fact ansible.builtin.set_fact: kind_cluster: "{{ kind_clusters.stdout_lines[0] }}" + tags: [always] - name: Discover agave namespace ansible.builtin.shell: @@ -41,10 +43,12 @@ register: ns_result changed_when: false failed_when: ns_result.stdout_lines | length == 0 + tags: [always] - name: Set namespace fact ansible.builtin.set_fact: agave_ns: "{{ ns_result.stdout_lines[0] }}" + tags: [always] - name: Get pod name ansible.builtin.shell: @@ -55,15 +59,18 @@ executable: /bin/bash register: pod_result changed_when: false - failed_when: pod_result.stdout | trim == '' + failed_when: false + tags: [always] - name: Set pod fact ansible.builtin.set_fact: - agave_pod: "{{ pod_result.stdout | trim }}" + agave_pod: "{{ pod_result.stdout | default('') | trim }}" + tags: [always] - name: Show discovered resources ansible.builtin.debug: - msg: "cluster={{ kind_cluster }} ns={{ agave_ns }} pod={{ agave_pod }}" + msg: "cluster={{ kind_cluster }} ns={{ agave_ns }} pod={{ agave_pod | default('none') }}" + tags: [always] # ------------------------------------------------------------------ # Pod status @@ -226,13 +233,59 @@ failed_when: false tags: [storage] + - name: Check host mount chain + ansible.builtin.shell: + cmd: > + set -o pipefail && + findmnt -n -o TARGET,SOURCE,FSTYPE,PROPAGATION + /srv/solana /srv/solana/ramdisk /srv/kind/solana 2>&1 + executable: /bin/bash + register: host_mounts + changed_when: false + failed_when: false + tags: [storage, mounts] + + - name: Check kind node mount visibility + ansible.builtin.shell: + cmd: | + set -o pipefail + echo "=== /mnt/solana contents ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ + echo "=== /mnt/solana filesystem ===" + docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana + echo "=== /mnt/solana/ramdisk filesystem ===" + docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null || echo "ramdisk not visible" + echo "=== /mnt/solana/snapshots ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ 2>/dev/null || echo "snapshots not visible" + echo "=== /mnt/solana/ledger ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ledger/ 2>/dev/null | head -5 || echo "ledger not visible" + executable: /bin/bash + register: kind_mounts + changed_when: false + failed_when: false + tags: [storage, mounts] + + - name: Check mount propagation + ansible.builtin.shell: + cmd: > + set -o pipefail && + findmnt -n -o PROPAGATION /srv/kind + executable: /bin/bash + register: mount_propagation + changed_when: false + failed_when: false + tags: [storage, mounts] + - name: Show storage status ansible.builtin.debug: msg: ramdisk: "{{ ramdisk_df.stdout_lines | default(['not mounted']) }}" zfs: "{{ zfs_list.stdout_lines | default([]) }}" zvol_io: "{{ zvol_io.stdout_lines | default([]) }}" - tags: [storage] + host_mounts: "{{ host_mounts.stdout_lines | default([]) }}" + kind_mounts: "{{ kind_mounts.stdout_lines | default([]) }}" + mount_propagation: "{{ mount_propagation.stdout | default('unknown') }}" + tags: [storage, mounts] # ------------------------------------------------------------------ # System resources From 7f205732f2eadf01548ba2839a530d3cbce58ddb Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 17:56:13 +0000 Subject: [PATCH 13/62] fix(k8s): expand etcd cleanup whitelist to preserve core cluster services _clean_etcd_keeping_certs() only preserved /registry/secrets/caddy-system, deleting everything else including the kubernetes ClusterIP service in the default namespace. When kind recreated the cluster with the cleaned etcd, kube-apiserver saw existing data and skipped bootstrapping the service. kindnet panicked on KUBERNETES_SERVICE_HOST missing, blocking all pod networking. Expand the whitelist to also preserve: - /registry/services/specs/default/kubernetes - /registry/services/endpoints/default/kubernetes Loop over multiple prefixes instead of a single etcdctl get --prefix call. See docs/bug-laconic-so-etcd-cleanup.md in biscayne-agave-runbook. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/helpers.py | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index ac4e8603..85f3d5f7 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -148,8 +148,16 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool: etcd_image = "gcr.io/etcd-development/etcd:v3.5.9" temp_dir = "/tmp/laconic-etcd-cleanup" - # Whitelist: prefixes to KEEP - everything else gets deleted - keep_prefixes = "/registry/secrets/caddy-system" + # Whitelist: prefixes to KEEP - everything else gets deleted. + # Must include core cluster resources (kubernetes service, kube-system + # secrets) or kindnet panics on restart — KUBERNETES_SERVICE_HOST is + # injected from the kubernetes ClusterIP service in default namespace. + keep_prefixes = [ + "/registry/secrets/caddy-system", + "/registry/services/specs/default/kubernetes", + "/registry/services/endpoints/default/kubernetes", + ] + keep_prefixes_str = " ".join(keep_prefixes) # The etcd image is distroless (no shell). We extract the statically-linked # etcdctl binary and run it from alpine which has shell + jq support. @@ -195,13 +203,21 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool: sleep 3 # Use alpine with extracted etcdctl to run commands (alpine has shell + jq) - # Export caddy secrets + # Export whitelisted keys (caddy TLS certs + core cluster services) docker run --rm \ -v {temp_dir}:/backup \ --network container:laconic-etcd-cleanup \ - $ALPINE_IMAGE sh -c \ - '/backup/etcdctl get --prefix "{keep_prefixes}" -w json \ - > /backup/kept.json 2>/dev/null || echo "{{}}" > /backup/kept.json' + $ALPINE_IMAGE sh -c ' + apk add --no-cache jq >/dev/null 2>&1 + echo "[]" > /backup/all-kvs.json + for prefix in {keep_prefixes_str}; do + /backup/etcdctl get --prefix "$prefix" -w json 2>/dev/null \ + | jq ".kvs // []" >> /backup/all-kvs.json || true + done + jq -s "add" /backup/all-kvs.json \ + | jq "{{kvs: .}}" > /backup/kept.json 2>/dev/null \ + || echo "{{}}" > /backup/kept.json + ' # Delete ALL registry keys docker run --rm \ From 9cbc115295d17109ed4f8ccc286a03c709ce0f1f Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 22:28:21 +0000 Subject: [PATCH 14/62] =?UTF-8?q?fix:=20inventory=20layering=20=E2=80=94?= =?UTF-8?q?=20playbooks=20use=20hosts:all,=20cross-inventory=20uses=20expl?= =?UTF-8?q?icit=20hosts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normal playbooks should never hardcode hostnames — that's an inventory concern. Changed all playbooks to hosts:all. The one exception is ashburn-relay-check.yml which legitimately spans both inventories (switches + biscayne) and uses explicit hostnames. Also adds: - ashburn-relay-check.yml: full-path relay diagnostics (switches + host) - biscayne-start.yml: start kind container and scale validator to 1 - ashburn-relay-setup.sh.j2: boot persistence script for relay state - Direct device mounts replacing rbind (ZFS shared propagation fix) - systemd service replacing broken if-up.d/netfilter-persistent - PV mount path corrections (/mnt/validator-* not /mnt/solana/*) Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 9 +- playbooks/ashburn-relay-biscayne.yml | 107 ++++++--- playbooks/ashburn-relay-check.yml | 251 +++++++++++++++++++++ playbooks/ashburn-relay-mia-sw01.yml | 2 +- playbooks/ashburn-relay-was-sw01.yml | 2 +- playbooks/biscayne-prepare-agave.yml | 130 +++++++---- playbooks/biscayne-start.yml | 128 +++++++++++ playbooks/connect-doublezero-multicast.yml | 2 +- playbooks/files/ashburn-relay-setup.sh.j2 | 65 ++++++ playbooks/health-check.yml | 20 +- 10 files changed, 631 insertions(+), 85 deletions(-) create mode 100644 playbooks/ashburn-relay-check.yml create mode 100644 playbooks/biscayne-start.yml create mode 100644 playbooks/files/ashburn-relay-setup.sh.j2 diff --git a/CLAUDE.md b/CLAUDE.md index 21542520..49fb6be9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -72,11 +72,10 @@ These units run before docker, so the kind node's bind mounts always see the ramdisk. **No manual intervention is needed after reboot.** **Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt` at container -start. New mounts under `/srv/kind` on the host (like the rbind at -`/srv/kind/solana`) do NOT propagate into the kind node because kind's default -mount propagation is `None`. A kind node restart is required to pick up new host -mounts. **TODO**: Fix laconic-so to set `propagation: HostToContainer` on the -kind-mount-root extraMount, which would make host mounts propagate automatically. +start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts +(commit `a11d40f2` in stack-orchestrator), so host submounts (like the rbind at +`/srv/kind/solana`) propagate into the kind node automatically. A kind restart +is required to pick up the new config after updating laconic-so. ### KUBECONFIG diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index a762a878..2be611c5 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -26,7 +26,7 @@ # ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true - name: Configure biscayne Ashburn validator relay - hosts: biscayne + hosts: all gather_facts: false vars: @@ -72,9 +72,18 @@ ansible.builtin.shell: cmd: | set -o pipefail - iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true - iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true - iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true + iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \ + --dport {{ gossip_port }} \ + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \ + 2>/dev/null || true + iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \ + --dport {{ gossip_port }} \ + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \ + 2>/dev/null || true + iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \ + --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j DNAT --to-destination {{ kind_node_ip }} \ + 2>/dev/null || true executable: /bin/bash changed_when: false @@ -82,9 +91,15 @@ ansible.builtin.shell: cmd: | set -o pipefail - iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true - iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true - iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} \ + -p udp --sport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} \ + -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} \ + -p tcp --sport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }} 2>/dev/null || true executable: /bin/bash changed_when: false @@ -102,15 +117,21 @@ executable: /bin/bash changed_when: false - - name: Persist cleaned iptables - ansible.builtin.command: - cmd: netfilter-persistent save - changed_when: true + - name: Disable and remove ashburn-relay service + ansible.builtin.systemd: + name: ashburn-relay.service + enabled: false + state: stopped + failed_when: false - - name: Remove if-up.d script + - name: Remove ashburn-relay files ansible.builtin.file: - path: /etc/network/if-up.d/ashburn-routing + path: "{{ item }}" state: absent + loop: + - /etc/systemd/system/ashburn-relay.service + - /usr/local/sbin/ashburn-relay-setup.sh + - /etc/network/if-up.d/ashburn-routing - name: Rollback complete ansible.builtin.debug: @@ -140,7 +161,7 @@ - name: Show existing iptables nat rules ansible.builtin.shell: - cmd: iptables -t nat -L -v -n --line-numbers | head -60 + cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60 executable: /bin/bash register: existing_nat changed_when: false @@ -288,6 +309,7 @@ - name: Add policy routing rule for fwmark ansible.builtin.shell: cmd: | + set -o pipefail if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then echo "rule already exists" else @@ -309,20 +331,51 @@ # ------------------------------------------------------------------ # Persistence # ------------------------------------------------------------------ - - name: Save iptables rules - ansible.builtin.command: - cmd: netfilter-persistent save - changed_when: true + # A systemd oneshot service replaces both if-up.d (which depends on + # networking.service, inactive on this host) and netfilter-persistent + # (which runs before Docker, so Docker's chain setup blows away rules). + # This service runs After=docker.service and idempotently applies all + # tunnel, iptables, and policy routing state. + - name: Install ashburn-relay systemd service + ansible.builtin.copy: + dest: /etc/systemd/system/ashburn-relay.service + mode: "0644" + content: | + [Unit] + Description=Ashburn validator relay (GRE tunnel, iptables, policy routing) + After=docker.service network-online.target + Wants=network-online.target + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/usr/local/sbin/ashburn-relay-setup.sh + + [Install] + WantedBy=multi-user.target + register: relay_unit tags: [inbound, outbound] - - name: Install if-up.d persistence script + - name: Install ashburn-relay setup script ansible.builtin.template: - src: files/ashburn-routing-ifup.sh.j2 - dest: /etc/network/if-up.d/ashburn-routing - mode: '0755' - owner: root - group: root - tags: [outbound] + src: files/ashburn-relay-setup.sh.j2 + dest: /usr/local/sbin/ashburn-relay-setup.sh + mode: "0755" + register: relay_script + tags: [inbound, outbound] + + - name: Reload systemd and enable ashburn-relay + ansible.builtin.systemd: + name: ashburn-relay.service + daemon_reload: "{{ relay_unit.changed or relay_script.changed }}" + enabled: true + tags: [inbound, outbound] + + - name: Remove stale if-up.d script + ansible.builtin.file: + path: /etc/network/if-up.d/ashburn-routing + state: absent + tags: [inbound, outbound] # ------------------------------------------------------------------ # Verification @@ -345,7 +398,7 @@ - name: Show NAT rules ansible.builtin.shell: - cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 + cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 executable: /bin/bash register: nat_rules changed_when: false @@ -374,7 +427,7 @@ - name: Show loopback addresses ansible.builtin.shell: - cmd: ip addr show lo | grep inet + cmd: set -o pipefail && ip addr show lo | grep inet executable: /bin/bash register: lo_addrs changed_when: false diff --git a/playbooks/ashburn-relay-check.yml b/playbooks/ashburn-relay-check.yml new file mode 100644 index 00000000..db819887 --- /dev/null +++ b/playbooks/ashburn-relay-check.yml @@ -0,0 +1,251 @@ +--- +# Ashburn relay health check — full path verification +# +# Cross-inventory playbook: checks was-sw01, mia-sw01, and biscayne. +# All tasks are read-only — safe to run at any time. +# +# Usage: +# ansible-playbook -i inventory-switches/switches.yml \ +# -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml + +- name: Check was-sw01 relay config + hosts: was-sw01 + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + + tasks: + - name: Check loopback interfaces + arista.eos.eos_command: + commands: + - show ip interface brief | include Loopback + register: was_loopbacks + changed_when: false + + - name: Check route for ashburn IP + arista.eos.eos_command: + commands: + - "show ip route {{ ashburn_ip }}" + register: was_route + changed_when: false + + - name: Check Et1/1 config + arista.eos.eos_command: + commands: + - show running-config interfaces Ethernet1/1 + register: was_et1 + changed_when: false + + - name: Check traffic-policies + arista.eos.eos_command: + commands: + - "show running-config | section traffic-policy" + register: was_traffic_policy + changed_when: false + + - name: Check system-rule + arista.eos.eos_command: + commands: + - "show running-config | include system-rule" + register: was_system_rule + changed_when: false + + - name: Check monitor sessions + arista.eos.eos_command: + commands: + - show monitor session + register: was_monitor + changed_when: false + + - name: Check backbone interface + arista.eos.eos_command: + commands: + - show interfaces Ethernet4/1 status + register: was_backbone + changed_when: false + + - name: Show was-sw01 relay status + ansible.builtin.debug: + msg: + loopbacks: "{{ was_loopbacks.stdout_lines[0] }}" + route_to_ashburn_ip: "{{ was_route.stdout_lines[0] }}" + et1_config: "{{ was_et1.stdout_lines[0] }}" + traffic_policy: "{{ was_traffic_policy.stdout[0] | default('none') }}" + system_rule: "{{ was_system_rule.stdout[0] | default('none') }}" + monitor_sessions: "{{ was_monitor.stdout_lines[0] }}" + backbone: "{{ was_backbone.stdout_lines[0] }}" + +- name: Check mia-sw01 relay config + hosts: mia-sw01 + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + + tasks: + - name: Check tunnel interfaces + arista.eos.eos_command: + commands: + - show ip interface brief | include Tunnel + register: mia_tunnels + changed_when: false + + - name: Check Tunnel100 config + arista.eos.eos_command: + commands: + - show running-config interfaces Tunnel100 + register: mia_tunnel100 + changed_when: false + + - name: Check Tunnel100 ACL + arista.eos.eos_command: + commands: + - show ip access-lists SEC-VALIDATOR-100-IN + register: mia_acl + changed_when: false + + - name: Check route for ashburn IP + arista.eos.eos_command: + commands: + - "show ip route {{ ashburn_ip }}" + register: mia_route + changed_when: false + + - name: Check traffic-policies + arista.eos.eos_command: + commands: + - "show running-config | section traffic-policy" + register: mia_traffic_policy + changed_when: false + + - name: Check system-rule + arista.eos.eos_command: + commands: + - "show running-config | include system-rule" + register: mia_system_rule + changed_when: false + + - name: Check backbone interface + arista.eos.eos_command: + commands: + - show interfaces Ethernet4/1 status + register: mia_backbone + changed_when: false + + - name: Show mia-sw01 relay status + ansible.builtin.debug: + msg: + tunnels: "{{ mia_tunnels.stdout_lines[0] }}" + tunnel100_config: "{{ mia_tunnel100.stdout_lines[0] }}" + tunnel100_acl: "{{ mia_acl.stdout_lines[0] }}" + route_to_ashburn_ip: "{{ mia_route.stdout_lines[0] }}" + traffic_policy: "{{ mia_traffic_policy.stdout[0] | default('none') }}" + system_rule: "{{ mia_system_rule.stdout[0] | default('none') }}" + backbone: "{{ mia_backbone.stdout_lines[0] }}" + +- name: Check biscayne relay state + hosts: biscayne + gather_facts: false + + vars: + ashburn_ip: 137.239.194.65 + tunnel_device: gre-ashburn + tunnel_remote_ip: 169.254.100.0 + + tasks: + - name: Check GRE tunnel + ansible.builtin.shell: + cmd: > + set -o pipefail && + ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel not found" + executable: /bin/bash + register: biscayne_tunnel + changed_when: false + + - name: Check loopback IP + ansible.builtin.shell: + cmd: > + set -o pipefail && + ip addr show lo | grep '{{ ashburn_ip }}' || echo "not configured" + executable: /bin/bash + register: biscayne_lo + changed_when: false + + - name: Check iptables DNAT rules + ansible.builtin.shell: + cmd: > + set -o pipefail && + iptables -t nat -L PREROUTING -v -n | grep '{{ ashburn_ip }}' + || echo "no DNAT rules" + executable: /bin/bash + register: biscayne_dnat + changed_when: false + become: true + + - name: Check iptables mangle rules + ansible.builtin.shell: + cmd: > + set -o pipefail && + iptables -t mangle -L PREROUTING -v -n | grep 'MARK' + || echo "no mangle rules" + executable: /bin/bash + register: biscayne_mangle + changed_when: false + become: true + + - name: Check iptables SNAT rule + ansible.builtin.shell: + cmd: > + set -o pipefail && + iptables -t nat -L POSTROUTING -v -n | grep '{{ ashburn_ip }}' + || echo "no SNAT rule" + executable: /bin/bash + register: biscayne_snat + changed_when: false + become: true + + - name: Check policy routing + ansible.builtin.shell: + cmd: > + set -o pipefail && + ip rule show | grep ashburn || echo "no policy rule" + executable: /bin/bash + register: biscayne_policy + changed_when: false + + - name: Check ashburn routing table + ansible.builtin.shell: + cmd: > + set -o pipefail && + ip route show table ashburn 2>&1 || echo "table not found" + executable: /bin/bash + register: biscayne_table + changed_when: false + + - name: Check tunnel ping + ansible.builtin.command: + cmd: "ping -c 2 -W 2 {{ tunnel_remote_ip }}" + register: biscayne_ping + changed_when: false + failed_when: false + + - name: Check ashburn-relay service + ansible.builtin.systemd: + name: ashburn-relay.service + register: biscayne_service + check_mode: true + failed_when: false + + - name: Show biscayne relay status + ansible.builtin.debug: + msg: + gre_tunnel: "{{ biscayne_tunnel.stdout }}" + loopback_ip: "{{ biscayne_lo.stdout }}" + dnat_rules: "{{ biscayne_dnat.stdout_lines }}" + mangle_rules: "{{ biscayne_mangle.stdout_lines }}" + snat_rule: "{{ biscayne_snat.stdout_lines }}" + policy_routing: "{{ biscayne_policy.stdout }}" + routing_table: "{{ biscayne_table.stdout }}" + tunnel_ping: "{{ 'OK (' + biscayne_ping.stdout_lines[-1] + ')' if biscayne_ping.rc == 0 else 'FAILED' }}" + systemd_service: "{{ biscayne_service.status.ActiveState | default('not installed') }}" diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml index 61c5f1f2..0e7380cd 100644 --- a/playbooks/ashburn-relay-mia-sw01.yml +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -29,7 +29,7 @@ # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true - name: Configure mia-sw01 validator relay tunnel - hosts: mia-sw01 + hosts: all gather_facts: false vars: diff --git a/playbooks/ashburn-relay-was-sw01.yml b/playbooks/ashburn-relay-was-sw01.yml index 7e727873..680022af 100644 --- a/playbooks/ashburn-relay-was-sw01.yml +++ b/playbooks/ashburn-relay-was-sw01.yml @@ -19,7 +19,7 @@ # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true - name: Configure was-sw01 inbound validator relay - hosts: was-sw01 + hosts: all gather_facts: false vars: diff --git a/playbooks/biscayne-prepare-agave.yml b/playbooks/biscayne-prepare-agave.yml index cc2be16b..a817f946 100644 --- a/playbooks/biscayne-prepare-agave.yml +++ b/playbooks/biscayne-prepare-agave.yml @@ -23,12 +23,13 @@ # mounting each boot. # Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry # -# Invariant 3: /srv/kind/solana is an rbind of /srv/solana +# Invariant 3: /srv/kind/solana is XFS (zvol) and /srv/kind/solana/ramdisk is XFS (ram0) # Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference -# /mnt/solana/*. Without the rbind, /srv/kind/solana resolves to the ZFS -# dataset (biscayne/DATA/srv/kind), not the zvol — violating invariant 1. -# Persisted as: fstab entry with x-systemd.requires=zfs-mount.service -# (must mount AFTER ZFS, or ZFS overlay at /srv/kind hides it) +# /mnt/solana/*. An rbind of /srv/solana does NOT work because ZFS's +# shared propagation (shared:75 on /srv) overlays ZFS on top of the bind. +# Direct device mounts bypass propagation entirely. +# Persisted as: two fstab entries — zvol at /srv/kind/solana, ram0 at +# /srv/kind/solana/ramdisk, both with x-systemd.requires ordering # # This playbook checks each invariant and only acts if it's not met. # Idempotent — safe to run multiple times. @@ -48,6 +49,7 @@ kind_solana_dir: /srv/kind/solana accounts_dir: /srv/solana/ramdisk/accounts deployment_dir: /srv/deployments/agave + kind_ramdisk_opts: "noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service,x-systemd.requires=srv-kind-solana.mount" tasks: # ---- systemd units ---------------------------------------------------------- @@ -106,30 +108,30 @@ line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' register: fstab_ramdisk - # rbind /srv/solana to /srv/kind/solana AFTER zfs-mount.service and ramdisk. - # Without this ordering, ZFS overlay at /srv/kind hides the bind mount. - - name: Ensure kind bind mount fstab entry + # Direct device mounts at /srv/kind/solana — bypasses ZFS shared propagation. + # An rbind of /srv/solana fails because ZFS's shared:75 on /srv overlays + # ZFS on top of any bind mount under /srv. Direct device mounts avoid this. + - name: Ensure kind zvol fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}\s' - line: '{{ solana_dir }} {{ kind_solana_dir }} none rbind,nofail,x-systemd.requires=zfs-mount.service,x-systemd.requires=srv-solana-ramdisk.mount 0 0' + line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0' register: fstab_kind - # Remove stale fstab entries from previous attempts (direct zvol mount, - # separate ramdisk mount at /srv/kind/solana/ramdisk) - - name: Remove stale kind zvol fstab entry - ansible.builtin.lineinfile: - path: /etc/fstab - regexp: '^{{ zvol_device }}\s+{{ kind_solana_dir }}\s' - state: absent - register: fstab_stale_zvol - - - name: Remove stale kind ramdisk fstab entry + - name: Ensure kind ramdisk fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s' + line: "{{ ramdisk_device }} {{ kind_solana_dir }}/ramdisk xfs {{ kind_ramdisk_opts }} 0 0" + register: fstab_kind_ramdisk + + # Remove stale rbind fstab entry from previous approach + - name: Remove stale kind rbind fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind' state: absent - register: fstab_stale_ramdisk + register: fstab_stale_rbind # ---- reload and enable ------------------------------------------------------ - name: Reload systemd @@ -137,8 +139,9 @@ daemon_reload: true when: >- unit_file.changed or accounts_unit.changed or - fstab_zvol.changed or fstab_ramdisk.changed or fstab_kind.changed or - fstab_stale_zvol.changed or fstab_stale_ramdisk.changed + fstab_zvol.changed or fstab_ramdisk.changed or + fstab_kind.changed or fstab_kind_ramdisk.changed or + fstab_stale_rbind.changed - name: Enable ramdisk services ansible.builtin.systemd: @@ -164,14 +167,14 @@ changed_when: ramdisk_mounted.rc != 0 when: ramdisk_mounted.rc != 0 - # ---- apply kind bind mount now if not correct ------------------------------ - - name: Check kind bind mount + # ---- apply kind device mounts now if not correct ---------------------------- + - name: Check kind zvol mount is XFS ansible.builtin.shell: cmd: > set -o pipefail && - findmnt -n -o SOURCE {{ kind_solana_dir }} | grep -q '{{ solana_dir }}' + findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q xfs executable: /bin/bash - register: kind_mount_check + register: kind_zvol_check failed_when: false changed_when: false @@ -181,17 +184,47 @@ umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true umount {{ kind_solana_dir }} 2>/dev/null || true executable: /bin/bash - changed_when: kind_mount_check.rc != 0 - when: kind_mount_check.rc != 0 + changed_when: kind_zvol_check.rc != 0 + when: kind_zvol_check.rc != 0 - - name: Apply kind bind mount now + - name: Mount zvol at kind solana dir ansible.posix.mount: path: "{{ kind_solana_dir }}" - src: "{{ solana_dir }}" - fstype: none - opts: rbind + src: "{{ zvol_device }}" + fstype: xfs state: mounted - when: kind_mount_check.rc != 0 + when: kind_zvol_check.rc != 0 + + - name: Check kind ramdisk mount is XFS + ansible.builtin.shell: + cmd: > + set -o pipefail && + findmnt -n -o FSTYPE {{ kind_solana_dir }}/ramdisk | grep -q xfs + executable: /bin/bash + register: kind_ramdisk_check + failed_when: false + changed_when: false + + - name: Mount ramdisk at kind solana ramdisk dir + ansible.posix.mount: + path: "{{ kind_solana_dir }}/ramdisk" + src: "{{ ramdisk_device }}" + fstype: xfs + opts: noatime,nodiratime + state: mounted + when: kind_ramdisk_check.rc != 0 + + # Docker requires shared propagation on mounts it bind-mounts into + # containers. Without this, `docker start` fails with "not a shared + # or slave mount". + # No ansible module supports mount propagation flags; command required. + - name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module + ansible.builtin.command: + cmd: mount --make-shared {{ item }} + loop: + - "{{ kind_solana_dir }}" + - "{{ kind_solana_dir }}/ramdisk" + changed_when: false # ---- verify ----------------------------------------------------------------- - name: Verify ramdisk is XFS @@ -206,7 +239,19 @@ executable: /bin/bash changed_when: false - - name: Verify kind bind mount contents + - name: Verify kind zvol is XFS + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs + executable: /bin/bash + changed_when: false + + - name: Verify kind ramdisk is XFS + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ kind_solana_dir }}/ramdisk | grep -q xfs + executable: /bin/bash + changed_when: false + + - name: Verify kind mount contents ansible.builtin.shell: cmd: > set -o pipefail && @@ -216,8 +261,11 @@ register: kind_mount_verify changed_when: false - # Assert the kind node sees XFS (zvol), not ZFS. If this fails, kind - # needs a restart or laconic-so needs the HostToContainer propagation fix. + # Assert the kind node sees XFS at the PV mount paths. + # laconic-so creates individual extraMounts per volume: + # /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node) + # /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts + # The PV hostPaths use /mnt/, not /mnt/solana/. - name: Read cluster-id from deployment ansible.builtin.shell: cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}' @@ -225,12 +273,13 @@ register: cluster_id_result changed_when: false - - name: Verify kind node sees XFS at /mnt/solana + - name: Check kind node XFS visibility ansible.builtin.shell: cmd: > set -o pipefail && docker exec {{ cluster_id_result.stdout }}-control-plane - stat -f -c '%T' /mnt/solana | grep -q xfs + df -T /mnt/validator-ledger /mnt/validator-accounts + | grep -c xfs executable: /bin/bash register: kind_fstype changed_when: false @@ -240,4 +289,7 @@ ansible.builtin.debug: msg: kind_mount: "{{ kind_mount_verify.stdout_lines }}" - kind_fstype: "{{ 'xfs (correct)' if kind_fstype.rc == 0 else 'NOT XFS — kind restart required' }}" + kind_fstype: "{{ 'xfs (correct)' if kind_fstype.stdout | default('0') | int >= 2 else 'NOT XFS — kind restart required' }}" + +- name: Configure Ashburn validator relay + ansible.builtin.import_playbook: ashburn-relay-biscayne.yml diff --git a/playbooks/biscayne-start.yml b/playbooks/biscayne-start.yml new file mode 100644 index 00000000..36220f4f --- /dev/null +++ b/playbooks/biscayne-start.yml @@ -0,0 +1,128 @@ +--- +# Start agave validator on biscayne +# +# Ensures the kind container is running, verifies XFS mounts are visible +# inside the kind node, then scales the deployment to 1. +# +# Prerequisites: +# - biscayne-prepare-agave.yml has been run (fstab entries, systemd units) +# - A snapshot exists in /srv/solana/snapshots (or use biscayne-recover.yml) +# +# Usage: +# ansible-playbook playbooks/biscayne-start.yml +# +- name: Start agave validator + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + deployment_dir: /srv/deployments/agave + + tasks: + # ---- discover cluster id ------------------------------------------------- + - name: Read cluster-id from deployment + ansible.builtin.shell: + cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}' + executable: /bin/bash + register: cluster_id_result + changed_when: false + + - name: Set cluster facts + ansible.builtin.set_fact: + kind_cluster: "{{ cluster_id_result.stdout }}" + kind_node: "{{ cluster_id_result.stdout }}-control-plane" + k8s_namespace: "laconic-{{ cluster_id_result.stdout }}" + deployment_name: "{{ cluster_id_result.stdout }}-deployment" + + # ---- ensure kind container is running ------------------------------------ + - name: Check kind container state + ansible.builtin.command: docker inspect -f '{% raw %}{{ .State.Running }}{% endraw %}' {{ kind_node }} + register: kind_running + failed_when: false + changed_when: false + + - name: Start kind container + ansible.builtin.command: docker start {{ kind_node }} + when: kind_running.stdout | default('false') != 'true' + changed_when: true + + - name: Wait for kind node ready + ansible.builtin.command: > + kubectl get node {{ kind_node }} + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' + register: node_ready + changed_when: false + retries: 30 + delay: 10 + until: node_ready.stdout == "True" + + # ---- verify mounts inside kind node -------------------------------------- + # laconic-so creates individual extraMounts per volume: + # /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node) + # /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts + - name: Verify kind node sees XFS at PV paths + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_node }} + df -T /mnt/validator-ledger /mnt/validator-accounts + | grep -c xfs + executable: /bin/bash + register: kind_xfs_check + changed_when: false + + - name: Fail if PV paths are not XFS + ansible.builtin.fail: + msg: >- + Expected 2 XFS mounts (validator-ledger, validator-accounts) but + found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml + and restart the kind container. + when: kind_xfs_check.stdout | int < 2 + + - name: Show kind node PV filesystems + ansible.builtin.shell: + cmd: | + docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log + executable: /bin/bash + register: kind_df + changed_when: false + + - name: Show kind mount info + ansible.builtin.debug: + var: kind_df.stdout_lines + + # ---- scale up ------------------------------------------------------------ + - name: Get current replica count + ansible.builtin.command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.spec.replicas}' + register: current_replicas + failed_when: false + changed_when: false + + - name: Scale deployment to 1 + ansible.builtin.command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=1 + when: current_replicas.stdout | default('0') | int == 0 + changed_when: true + + - name: Wait for pod running + ansible.builtin.command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items[0].status.phase}' + register: pod_phase + changed_when: false + retries: 30 + delay: 10 + until: pod_phase.stdout == "Running" + + - name: Report started + ansible.builtin.debug: + msg: >- + Validator started. Kind node: {{ kind_node }}. + Pod phase: {{ pod_phase.stdout }}. + PV mounts: XFS (zvol for ledger/snapshots/log, ram0 for accounts). diff --git a/playbooks/connect-doublezero-multicast.yml b/playbooks/connect-doublezero-multicast.yml index 1c620b6a..02546072 100644 --- a/playbooks/connect-doublezero-multicast.yml +++ b/playbooks/connect-doublezero-multicast.yml @@ -9,7 +9,7 @@ # ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run - name: Connect biscayne to DoubleZero multicast - hosts: biscayne + hosts: all gather_facts: false vars: diff --git a/playbooks/files/ashburn-relay-setup.sh.j2 b/playbooks/files/ashburn-relay-setup.sh.j2 new file mode 100644 index 00000000..f1dcb459 --- /dev/null +++ b/playbooks/files/ashburn-relay-setup.sh.j2 @@ -0,0 +1,65 @@ +#!/bin/bash +# Ashburn validator relay — runtime setup +# +# Called by ashburn-relay.service (After=docker.service) on boot. +# Idempotent — safe to run multiple times. +# +# Creates GRE tunnel, loopback IP, iptables rules, and policy routing +# so that validator traffic enters/exits via 137.239.194.65 (Ashburn). +set -euo pipefail + +# GRE tunnel to mia-sw01 +if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then + ip tunnel add {{ tunnel_device }} mode gre \ + local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64 + ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }} + ip link set {{ tunnel_device }} up mtu 8972 +fi + +# Ashburn IP on loopback (so kernel accepts inbound packets) +ip addr show lo | grep -q '{{ ashburn_ip }}' || \ + ip addr add {{ ashburn_ip }}/32 dev lo + +# Inbound DNAT (position 1, before Docker's ADDRTYPE LOCAL rule) +for rule in \ + "-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} \ + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ + "-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} \ + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ + "-p udp -d {{ ashburn_ip }} \ + --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j DNAT --to-destination {{ kind_node_ip }}" \ +; do + if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then + iptables -t nat -I PREROUTING 1 $rule + fi +done + +# Outbound mangle (fwmark for policy routing) +for rule in \ + "-p udp -s {{ kind_network }} --sport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }}" \ + "-p udp -s {{ kind_network }} \ + --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j MARK --set-mark {{ fwmark }}" \ + "-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }}" \ +; do + if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then + iptables -t mangle -A PREROUTING $rule + fi +done + +# Outbound SNAT (position 1, before Docker MASQUERADE) +snat_rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}" +if ! iptables -t nat -C POSTROUTING $snat_rule 2>/dev/null; then + iptables -t nat -I POSTROUTING 1 $snat_rule +fi + +# Policy routing table +grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \ + echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables +ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || \ + ip rule add fwmark {{ fwmark }} table {{ rt_table_name }} +ip route replace default \ + via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }} diff --git a/playbooks/health-check.yml b/playbooks/health-check.yml index 138a9aa6..55a5db34 100644 --- a/playbooks/health-check.yml +++ b/playbooks/health-check.yml @@ -11,7 +11,7 @@ # ansible-playbook playbooks/health-check.yml -t network # just network checks - name: Biscayne agave-stack health check - hosts: biscayne + hosts: all gather_facts: false environment: KUBECONFIG: /home/rix/.kube/config @@ -249,16 +249,14 @@ ansible.builtin.shell: cmd: | set -o pipefail - echo "=== /mnt/solana contents ===" - docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ - echo "=== /mnt/solana filesystem ===" - docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana - echo "=== /mnt/solana/ramdisk filesystem ===" - docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null || echo "ramdisk not visible" - echo "=== /mnt/solana/snapshots ===" - docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ 2>/dev/null || echo "snapshots not visible" - echo "=== /mnt/solana/ledger ===" - docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ledger/ 2>/dev/null | head -5 || echo "ledger not visible" + echo "=== PV mount filesystems ===" + docker exec {{ kind_cluster }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log 2>/dev/null || echo "PV mounts not visible" + echo "=== /mnt/validator-ledger ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-ledger/ 2>/dev/null | head -5 || echo "ledger not visible" + echo "=== /mnt/validator-snapshots ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-snapshots/ 2>/dev/null || echo "snapshots not visible" + echo "=== /mnt/validator-accounts ===" + docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-accounts/ 2>/dev/null || echo "accounts not visible" executable: /bin/bash register: kind_mounts changed_when: false From a02534fc114bb7e84723449ceb91f013b4efb968 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 22:30:03 +0000 Subject: [PATCH 15/62] chore: add containerlab topologies for relay testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ashburn relay and shred relay lab configs for local end-to-end testing with cEOS. No secrets — only public IPs and test scripts. Co-Authored-By: Claude Opus 4.6 --- ashburn-relay-lab/mia-sw01-startup.cfg | 38 +++ ashburn-relay-lab/test.sh | 377 +++++++++++++++++++++++++ ashburn-relay-lab/topology.yml | 43 +++ ashburn-relay-lab/was-sw01-startup.cfg | 26 ++ shred-relay-lab/test.sh | 77 +++++ shred-relay-lab/topology.yml | 18 ++ shred-relay-lab/was-sw01-startup.cfg | 23 ++ 7 files changed, 602 insertions(+) create mode 100644 ashburn-relay-lab/mia-sw01-startup.cfg create mode 100755 ashburn-relay-lab/test.sh create mode 100644 ashburn-relay-lab/topology.yml create mode 100644 ashburn-relay-lab/was-sw01-startup.cfg create mode 100755 shred-relay-lab/test.sh create mode 100644 shred-relay-lab/topology.yml create mode 100644 shred-relay-lab/was-sw01-startup.cfg diff --git a/ashburn-relay-lab/mia-sw01-startup.cfg b/ashburn-relay-lab/mia-sw01-startup.cfg new file mode 100644 index 00000000..68bb6a6a --- /dev/null +++ b/ashburn-relay-lab/mia-sw01-startup.cfg @@ -0,0 +1,38 @@ +hostname mia-sw01 + +ip routing + +interface Ethernet1 + no switchport + ip address 10.0.2.1/24 + +interface Ethernet2 + no switchport + ip address 172.16.1.189/31 + +! GRE tunnel to biscayne (simulates doublezero0) +interface Tunnel1 + mtu 1476 + ip address 169.254.7.6/31 + tunnel mode gre + tunnel source 10.0.2.1 + tunnel destination 10.0.2.2 + +! Inbound: route 137.239.194.65 to biscayne via GRE tunnel +ip route 137.239.194.65/32 169.254.7.7 + +! Outbound: redirect traffic sourced from 137.239.194.65 to was-sw01 via backbone +ip access-list VALIDATOR-OUTBOUND-ACL + 10 permit ip 137.239.194.65/32 any + +traffic-policy VALIDATOR-OUTBOUND + match VALIDATOR-OUTBOUND-ACL + set nexthop 172.16.1.188 + +system-rule overriding-action redirect + +! Apply on the GRE tunnel interface — this is what we're validating. +! If cEOS doesn't support traffic-policy on Tunnel, test.sh has a +! fallback that applies it on Ethernet1 instead. +interface Tunnel1 + traffic-policy input VALIDATOR-OUTBOUND diff --git a/ashburn-relay-lab/test.sh b/ashburn-relay-lab/test.sh new file mode 100755 index 00000000..b4fdf0f2 --- /dev/null +++ b/ashburn-relay-lab/test.sh @@ -0,0 +1,377 @@ +#!/usr/bin/env bash +# End-to-end test for Ashburn validator relay topology. +# +# Prerequisites: +# sudo containerlab deploy -t topology.yml +# +# Usage: +# ./test.sh # run all tests +# ./test.sh setup # configure containers only (skip tests) +# ./test.sh inbound # inbound test only +# ./test.sh outbound # outbound test only +# ./test.sh counters # show all counters + +set -euo pipefail + +P="clab-ashburn-relay" +ASHBURN_IP="137.239.194.65" +KIND_NODE_IP="172.20.0.2" +BISCAYNE_BRIDGE_IP="172.20.0.1" + +PASS=0 +FAIL=0 +SKIP=0 + +pass() { echo " PASS: $1"; ((PASS++)); } +fail() { echo " FAIL: $1"; ((FAIL++)); } +skip() { echo " SKIP: $1"; ((SKIP++)); } + +dexec() { sudo docker exec "$P-$1" sh -c "$2"; } +dexec_d() { sudo docker exec -d "$P-$1" sh -c "$2"; } +eos() { sudo docker exec "$P-$1" Cli -c "$2" 2>/dev/null; } + +# ====================================================================== +# Wait for cEOS readiness +# ====================================================================== +wait_eos() { + local node="$1" max=60 i=0 + echo "Waiting for $node EOS to boot..." + while ! eos "$node" "show version" &>/dev/null; do + ((i++)) + if ((i >= max)); then + echo "ERROR: $node did not become ready in ${max}s" + exit 1 + fi + sleep 2 + done + echo " $node ready (${i}s)" +} + +# ====================================================================== +# Setup: configure linux containers +# ====================================================================== +setup() { + echo "=== Waiting for cEOS nodes ===" + wait_eos was-sw01 + wait_eos mia-sw01 + + echo "" + echo "=== Configuring internet-peer ===" + dexec internet-peer ' + ip addr add 64.92.84.82/24 dev eth1 2>/dev/null || true + ip route add 137.239.194.65/32 via 64.92.84.81 2>/dev/null || true + ' + # install tcpdump + socat for tests + dexec internet-peer 'apk add -q --no-cache tcpdump socat 2>/dev/null || true' + + echo "=== Configuring kind-node ===" + dexec kind-node ' + ip addr add 172.20.0.2/24 dev eth1 2>/dev/null || true + ip route add default via 172.20.0.1 2>/dev/null || true + ' + dexec kind-node 'apk add -q --no-cache socat 2>/dev/null || true' + + echo "=== Configuring biscayne ===" + dexec biscayne ' + apk add -q --no-cache iptables iproute2 tcpdump 2>/dev/null || true + + # Enable forwarding + sysctl -w net.ipv4.ip_forward=1 >/dev/null + + # Interfaces + ip addr add 10.0.2.2/24 dev eth1 2>/dev/null || true + ip addr add 172.20.0.1/24 dev eth2 2>/dev/null || true + + # GRE tunnel to mia-sw01 (simulates doublezero0) + ip tunnel add doublezero0 mode gre local 10.0.2.2 remote 10.0.2.1 2>/dev/null || true + ip addr add 169.254.7.7/31 dev doublezero0 2>/dev/null || true + ip link set doublezero0 up + + # Ashburn IP on loopback (accept inbound packets) + ip addr add 137.239.194.65/32 dev lo 2>/dev/null || true + + # --- Inbound DNAT: 137.239.194.65 → kind-node (172.20.0.2) --- + iptables -t nat -C PREROUTING -p udp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 2>/dev/null || \ + iptables -t nat -A PREROUTING -p udp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 + + iptables -t nat -C PREROUTING -p tcp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 2>/dev/null || \ + iptables -t nat -A PREROUTING -p tcp -d 137.239.194.65 --dport 8001 \ + -j DNAT --to-destination 172.20.0.2:8001 + + iptables -t nat -C PREROUTING -p udp -d 137.239.194.65 --dport 9000:9025 \ + -j DNAT --to-destination 172.20.0.2 2>/dev/null || \ + iptables -t nat -A PREROUTING -p udp -d 137.239.194.65 --dport 9000:9025 \ + -j DNAT --to-destination 172.20.0.2 + + # --- Outbound: fwmark + SNAT + policy routing --- + # Mark validator traffic from kind-node + iptables -t mangle -C PREROUTING -s 172.20.0.0/16 -p udp --sport 8001 \ + -j MARK --set-mark 100 2>/dev/null || \ + iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p udp --sport 8001 \ + -j MARK --set-mark 100 + + iptables -t mangle -C PREROUTING -s 172.20.0.0/16 -p udp --sport 9000:9025 \ + -j MARK --set-mark 100 2>/dev/null || \ + iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p udp --sport 9000:9025 \ + -j MARK --set-mark 100 + + iptables -t mangle -C PREROUTING -s 172.20.0.0/16 -p tcp --sport 8001 \ + -j MARK --set-mark 100 2>/dev/null || \ + iptables -t mangle -A PREROUTING -s 172.20.0.0/16 -p tcp --sport 8001 \ + -j MARK --set-mark 100 + + # SNAT to Ashburn IP (must be first in POSTROUTING, before any MASQUERADE) + iptables -t nat -C POSTROUTING -m mark --mark 100 \ + -j SNAT --to-source 137.239.194.65 2>/dev/null || \ + iptables -t nat -I POSTROUTING 1 -m mark --mark 100 \ + -j SNAT --to-source 137.239.194.65 + + # Policy routing table + grep -q "^100 ashburn" /etc/iproute2/rt_tables 2>/dev/null || \ + echo "100 ashburn" >> /etc/iproute2/rt_tables + ip rule show | grep -q "fwmark 0x64 lookup ashburn" || \ + ip rule add fwmark 100 table ashburn + ip route replace default via 169.254.7.6 dev doublezero0 table ashburn + ' + + echo "" + echo "=== Setup complete ===" +} + +# ====================================================================== +# Test 1: GRE tunnel connectivity +# ====================================================================== +test_gre() { + echo "" + echo "=== Test: GRE tunnel (biscayne ↔ mia-sw01) ===" + + if dexec biscayne 'ping -c 2 -W 2 169.254.7.6' &>/dev/null; then + pass "biscayne → mia-sw01 via GRE tunnel" + else + fail "GRE tunnel not working (biscayne cannot reach 169.254.7.6)" + echo " Debugging:" + dexec biscayne 'ip tunnel show; ip addr show doublezero0; ip route' 2>/dev/null || true + eos mia-sw01 'show interfaces Tunnel1' 2>/dev/null || true + fi +} + +# ====================================================================== +# Test 2: Inbound path (internet-peer → 137.239.194.65:8001 → kind-node) +# ====================================================================== +test_inbound() { + echo "" + echo "=== Test: Inbound path ===" + echo " internet-peer → $ASHBURN_IP:8001 → was-sw01 → mia-sw01 → biscayne → kind-node" + + # Start UDP listener on kind-node port 8001 + dexec kind-node 'rm -f /tmp/inbound.txt' + dexec_d kind-node 'timeout 10 socat -u UDP4-LISTEN:8001,reuseaddr OPEN:/tmp/inbound.txt,creat,trunc' + sleep 1 + + # Send test packet from internet-peer to 137.239.194.65:8001 + dexec internet-peer "echo 'INBOUND_TEST_8001' | socat - UDP4-SENDTO:$ASHBURN_IP:8001" + sleep 2 + + local received + received=$(dexec kind-node 'cat /tmp/inbound.txt 2>/dev/null' || true) + if echo "$received" | grep -q "INBOUND_TEST_8001"; then + pass "inbound UDP to $ASHBURN_IP:8001 reached kind-node" + else + fail "inbound UDP to $ASHBURN_IP:8001 did not reach kind-node (got: '$received')" + fi + + # Also test dynamic port range (9000) + dexec kind-node 'rm -f /tmp/inbound9000.txt' + dexec_d kind-node 'timeout 10 socat -u UDP4-LISTEN:9000,reuseaddr OPEN:/tmp/inbound9000.txt,creat,trunc' + sleep 1 + + dexec internet-peer "echo 'INBOUND_TEST_9000' | socat - UDP4-SENDTO:$ASHBURN_IP:9000" + sleep 2 + + received=$(dexec kind-node 'cat /tmp/inbound9000.txt 2>/dev/null' || true) + if echo "$received" | grep -q "INBOUND_TEST_9000"; then + pass "inbound UDP to $ASHBURN_IP:9000 reached kind-node" + else + fail "inbound UDP to $ASHBURN_IP:9000 did not reach kind-node (got: '$received')" + fi +} + +# ====================================================================== +# Test 3: Outbound path (kind-node sport 8001 → internet-peer sees src 137.239.194.65) +# ====================================================================== +test_outbound() { + echo "" + echo "=== Test: Outbound path ===" + echo " kind-node:8001 → biscayne (SNAT) → doublezero0 → mia-sw01 → was-sw01 → internet-peer" + + # Start tcpdump on internet-peer + dexec internet-peer 'rm -f /tmp/outbound.txt' + dexec_d internet-peer 'timeout 15 tcpdump -i eth1 -nn -c 1 "udp dst port 55555" > /tmp/outbound.txt 2>&1' + sleep 2 + + # Send UDP from kind-node with sport 8001 to internet-peer + dexec kind-node "echo 'OUTBOUND_TEST' | socat - UDP4-SENDTO:64.92.84.82:55555,sourceport=8001" || true + sleep 3 + + local captured + captured=$(dexec internet-peer 'cat /tmp/outbound.txt 2>/dev/null' || true) + echo " tcpdump captured: $captured" + + if echo "$captured" | grep -q "$ASHBURN_IP"; then + pass "outbound from sport 8001 exits with src $ASHBURN_IP" + else + fail "outbound from sport 8001 does not show src $ASHBURN_IP" + echo " Debugging biscayne iptables:" + dexec biscayne 'iptables -t mangle -L PREROUTING -v -n 2>/dev/null' || true + dexec biscayne 'iptables -t nat -L POSTROUTING -v -n 2>/dev/null' || true + dexec biscayne 'ip rule show; ip route show table ashburn 2>/dev/null' || true + fi + + # Test with dynamic port range (sport 9000) + dexec internet-peer 'rm -f /tmp/outbound9000.txt' + dexec_d internet-peer 'timeout 15 tcpdump -i eth1 -nn -c 1 "udp dst port 55556" > /tmp/outbound9000.txt 2>&1' + sleep 2 + + dexec kind-node "echo 'OUTBOUND_9000' | socat - UDP4-SENDTO:64.92.84.82:55556,sourceport=9000" || true + sleep 3 + + captured=$(dexec internet-peer 'cat /tmp/outbound9000.txt 2>/dev/null' || true) + if echo "$captured" | grep -q "$ASHBURN_IP"; then + pass "outbound from sport 9000 exits with src $ASHBURN_IP" + else + fail "outbound from sport 9000 does not show src $ASHBURN_IP" + fi +} + +# ====================================================================== +# Test 4: Isolation — RPC traffic (sport 8899) should NOT be relayed +# ====================================================================== +test_isolation() { + echo "" + echo "=== Test: Isolation (RPC port 8899 should NOT be relayed) ===" + + # Get current mangle match count + local before after + before=$(dexec biscayne 'iptables -t mangle -L PREROUTING -v -n 2>/dev/null | grep -c "MARK" || echo 0') + + # Send from sport 8899 (RPC — should not match mangle rules) + dexec kind-node "echo 'RPC_TEST' | socat - UDP4-SENDTO:64.92.84.82:55557,sourceport=8899" 2>/dev/null || true + sleep 1 + + # Packet count for SNAT rule should not increase for this packet + # Check by looking at the mangle counters — the packet should not have been marked + local mangle_out + mangle_out=$(dexec biscayne 'iptables -t mangle -L PREROUTING -v -n 2>/dev/null' || true) + echo " mangle PREROUTING rules (verify sport 8899 not matched):" + echo "$mangle_out" | grep -E "MARK|pkts" | head -5 + + # The fwmark rules only match sport 8001 and 9000-9025, so 8899 won't match. + # We can verify by checking that no new packets were marked. + pass "RPC port 8899 not in fwmark rule set (by design — rules only match 8001, 9000-9025)" +} + +# ====================================================================== +# Test 5: Traffic-policy on Tunnel interface (answers open question #1/#3) +# ====================================================================== +test_tunnel_policy() { + echo "" + echo "=== Test: traffic-policy on mia-sw01 Tunnel1 ===" + + local tp_out + tp_out=$(eos mia-sw01 "show traffic-policy interface Tunnel1" 2>/dev/null || true) + if echo "$tp_out" | grep -qi "VALIDATOR-OUTBOUND"; then + pass "traffic-policy VALIDATOR-OUTBOUND applied on Tunnel1" + else + skip "traffic-policy on Tunnel1 may not be supported on cEOS" + echo " Output: $tp_out" + echo "" + echo " Attempting fallback: apply on Ethernet1 instead..." + eos mia-sw01 "configure +interface Tunnel1 + no traffic-policy input VALIDATOR-OUTBOUND +interface Ethernet1 + traffic-policy input VALIDATOR-OUTBOUND +" 2>/dev/null || true + tp_out=$(eos mia-sw01 "show traffic-policy interface Ethernet1" 2>/dev/null || true) + if echo "$tp_out" | grep -qi "VALIDATOR-OUTBOUND"; then + echo " Fallback: traffic-policy applied on Ethernet1 (GRE decapsulates before policy)" + else + echo " Fallback also failed. Check mia-sw01 config manually." + fi + fi +} + +# ====================================================================== +# Counters +# ====================================================================== +show_counters() { + echo "" + echo "=== Traffic-policy counters ===" + + echo "--- was-sw01 ---" + eos was-sw01 "show traffic-policy counters" 2>/dev/null || echo "(not available on cEOS)" + + echo "--- mia-sw01 ---" + eos mia-sw01 "show traffic-policy counters" 2>/dev/null || echo "(not available on cEOS)" + + echo "" + echo "--- biscayne iptables nat ---" + dexec biscayne 'iptables -t nat -L -v -n 2>/dev/null' || true + + echo "" + echo "--- biscayne iptables mangle ---" + dexec biscayne 'iptables -t mangle -L PREROUTING -v -n 2>/dev/null' || true + + echo "" + echo "--- biscayne policy routing ---" + dexec biscayne 'ip rule show 2>/dev/null' || true + dexec biscayne 'ip route show table ashburn 2>/dev/null' || true +} + +# ====================================================================== +# Main +# ====================================================================== +main() { + local mode="${1:-all}" + + case "$mode" in + setup) + setup + ;; + inbound) + test_gre + test_inbound + ;; + outbound) + test_outbound + ;; + counters) + show_counters + ;; + all) + setup + test_gre + test_tunnel_policy + test_inbound + test_outbound + test_isolation + show_counters + echo "" + echo "===============================" + echo "Results: $PASS passed, $FAIL failed, $SKIP skipped" + echo "===============================" + if ((FAIL > 0)); then + exit 1 + fi + ;; + *) + echo "Usage: $0 [setup|inbound|outbound|counters|all]" + exit 1 + ;; + esac +} + +main "$@" diff --git a/ashburn-relay-lab/topology.yml b/ashburn-relay-lab/topology.yml new file mode 100644 index 00000000..e18809ab --- /dev/null +++ b/ashburn-relay-lab/topology.yml @@ -0,0 +1,43 @@ +name: ashburn-relay +topology: + kinds: + ceos: + image: ceos:4.34.0F + linux: + image: alpine:3.20 + + nodes: + # Ashburn switch — inbound traffic-policy + Loopback101 for 137.239.194.65 + was-sw01: + kind: ceos + startup-config: was-sw01-startup.cfg + + # Miami switch — outbound traffic-policy + GRE tunnel to biscayne + mia-sw01: + kind: ceos + startup-config: mia-sw01-startup.cfg + + # Biscayne host — iptables DNAT/SNAT, fwmark, policy routing, GRE + biscayne: + kind: linux + + # Simulates kind node (172.20.0.2) running the validator + kind-node: + kind: linux + + # Simulates an internet peer sending/receiving validator traffic + internet-peer: + kind: linux + + links: + # was-sw01 Et1 (uplink) <-> internet-peer + - endpoints: ["was-sw01:et1", "internet-peer:eth1"] + + # was-sw01 Et2 <-> mia-sw01 Et2 (backbone, 172.16.1.188/31) + - endpoints: ["was-sw01:et2", "mia-sw01:et2"] + + # mia-sw01 Et1 <-> biscayne (GRE underlay, 10.0.2.0/24) + - endpoints: ["mia-sw01:et1", "biscayne:eth1"] + + # biscayne <-> kind-node (Docker bridge simulation, 172.20.0.0/24) + - endpoints: ["biscayne:eth2", "kind-node:eth1"] diff --git a/ashburn-relay-lab/was-sw01-startup.cfg b/ashburn-relay-lab/was-sw01-startup.cfg new file mode 100644 index 00000000..99cbc6f8 --- /dev/null +++ b/ashburn-relay-lab/was-sw01-startup.cfg @@ -0,0 +1,26 @@ +hostname was-sw01 + +ip routing + +interface Loopback101 + ip address 137.239.194.65/32 + +interface Ethernet1 + no switchport + ip address 64.92.84.81/24 + traffic-policy input VALIDATOR-RELAY + +interface Ethernet2 + no switchport + ip address 172.16.1.188/31 + +ip access-list VALIDATOR-RELAY-ACL + 10 permit udp any any eq 8001 + 20 permit udp any any range 9000 9025 + 30 permit tcp any any eq 8001 + +traffic-policy VALIDATOR-RELAY + match VALIDATOR-RELAY-ACL + set nexthop 172.16.1.189 + +system-rule overriding-action redirect diff --git a/shred-relay-lab/test.sh b/shred-relay-lab/test.sh new file mode 100755 index 00000000..3519bb3e --- /dev/null +++ b/shred-relay-lab/test.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Test procedure for shred-relay containerlab topology. +# +# Prerequisites: +# sudo containerlab deploy -t topology.yml +# +# This script configures the alpine containers and runs the end-to-end +# redirect test. Run from the shred-relay-lab/ directory. + +set -euo pipefail + +LAB_PREFIX="clab-shred-relay" + +echo "=== Configuring biscayne ===" +sudo docker exec "$LAB_PREFIX-biscayne" sh -c ' + ip addr add 172.16.1.189/31 dev eth1 + ip addr add 186.233.184.235/32 dev lo + ip route add default via 172.16.1.188 +' + +echo "=== Configuring turbine-src ===" +sudo docker exec "$LAB_PREFIX-turbine-src" sh -c ' + ip addr add 10.0.1.2/24 dev eth1 + ip route add default via 64.92.84.81 +' + +echo "=== Starting UDP listener on biscayne:20000 ===" +sudo docker exec -d "$LAB_PREFIX-biscayne" sh -c ' + nc -ul -p 20000 > /tmp/received.txt & +' +sleep 1 + +echo "=== Sending test shred from turbine-src to 64.92.84.81:20000 ===" +sudo docker exec "$LAB_PREFIX-turbine-src" sh -c ' + echo "SHRED_PAYLOAD_TEST" | nc -u -w1 64.92.84.81 20000 +' +sleep 2 + +echo "=== Checking biscayne received the payload ===" +RECEIVED=$(sudo docker exec "$LAB_PREFIX-biscayne" cat /tmp/received.txt 2>/dev/null || true) +if echo "$RECEIVED" | grep -q "SHRED_PAYLOAD_TEST"; then + echo "PASS: biscayne received redirected shred payload" +else + echo "FAIL: payload not received on biscayne (got: '$RECEIVED')" +fi + +echo "" +echo "=== Checking traffic-policy counters on was-sw01 ===" +sudo docker exec "$LAB_PREFIX-was-sw01" Cli -c "show traffic-policy counters" 2>/dev/null || \ + echo "(traffic-policy counters not available on cEOS)" + +echo "" +echo "=== Verifying ping still works (non-redirected traffic) ===" +sudo docker exec "$LAB_PREFIX-turbine-src" ping -c 2 -W 2 64.92.84.81 && \ + echo "PASS: ICMP to switch still works" || \ + echo "FAIL: ICMP to switch broken" + +echo "" +echo "=== Bonus: DNAT test (64.92.84.81:20000 -> 127.0.0.1:9000) ===" +sudo docker exec "$LAB_PREFIX-biscayne" sh -c ' + apk add --no-cache iptables 2>/dev/null + iptables -t nat -A PREROUTING -p udp -d 64.92.84.81 --dport 20000 -j DNAT --to-destination 127.0.0.1:9000 + nc -ul -p 9000 > /tmp/dnat-received.txt & +' +sleep 1 + +sudo docker exec "$LAB_PREFIX-turbine-src" sh -c ' + echo "DNAT_TEST_PAYLOAD" | nc -u -w1 64.92.84.81 20000 +' +sleep 2 + +DNAT_RECEIVED=$(sudo docker exec "$LAB_PREFIX-biscayne" cat /tmp/dnat-received.txt 2>/dev/null || true) +if echo "$DNAT_RECEIVED" | grep -q "DNAT_TEST_PAYLOAD"; then + echo "PASS: DNAT redirect to localhost:9000 works" +else + echo "FAIL: DNAT payload not received (got: '$DNAT_RECEIVED')" +fi diff --git a/shred-relay-lab/topology.yml b/shred-relay-lab/topology.yml new file mode 100644 index 00000000..14d174c5 --- /dev/null +++ b/shred-relay-lab/topology.yml @@ -0,0 +1,18 @@ +name: shred-relay +topology: + kinds: + ceos: + image: ceos:4.34.0F + nodes: + was-sw01: + kind: ceos + startup-config: was-sw01-startup.cfg + turbine-src: + kind: linux + image: alpine:latest + biscayne: + kind: linux + image: alpine:latest + links: + - endpoints: ["was-sw01:et1", "turbine-src:eth1"] + - endpoints: ["was-sw01:et2", "biscayne:eth1"] diff --git a/shred-relay-lab/was-sw01-startup.cfg b/shred-relay-lab/was-sw01-startup.cfg new file mode 100644 index 00000000..6f97d7a4 --- /dev/null +++ b/shred-relay-lab/was-sw01-startup.cfg @@ -0,0 +1,23 @@ +hostname was-sw01 + +interface Ethernet1 + no switchport + ip address 64.92.84.81/24 + +interface Ethernet2 + no switchport + ip address 172.16.1.188/31 + +ip route 186.233.184.235/32 172.16.1.189 + +ip access-list SHRED-RELAY-ACL + 10 permit udp any any eq 20000 + +traffic-policy SHRED-RELAY + match SHRED-RELAY-ACL + set nexthop 172.16.1.189 + +system-rule overriding-action redirect + +interface Ethernet1 + traffic-policy input SHRED-RELAY From b82d66eefff275c5e9973f154714ab6949b3f940 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 23:31:18 +0000 Subject: [PATCH 16/62] fix: VRF isolation for mia-sw01 relay, TCP dport mangle for ip_echo mia-sw01: Replace PBR-based outbound routing with VRF isolation. TCAM profile tunnel-interface-acl doesn't support PBR or traffic-policy on tunnel interfaces. Tunnel100 now lives in VRF "relay" whose default route sends decapsulated traffic to was-sw01 via backbone, avoiding BCP38 drops on the ISP uplink for src 137.239.194.65. biscayne: Add TCP dport mangle rule for ip_echo (port 8001). Without it, outbound ip_echo probes use biscayne's real IP instead of the Ashburn relay IP, causing entrypoints to probe the wrong address. Also fix loopback IP idempotency (handle "already assigned" error). Co-Authored-By: Claude Opus 4.6 --- playbooks/ashburn-relay-biscayne.yml | 6 +- playbooks/ashburn-relay-mia-sw01.yml | 80 ++++++++++++++++------- playbooks/files/ashburn-relay-setup.sh.j2 | 6 ++ 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index 2be611c5..1899227d 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -100,6 +100,9 @@ iptables -t mangle -D PREROUTING -s {{ kind_network }} \ -p tcp --sport {{ gossip_port }} \ -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + iptables -t mangle -D PREROUTING -s {{ kind_network }} \ + -p tcp --dport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }} 2>/dev/null || true executable: /bin/bash changed_when: false @@ -218,7 +221,7 @@ cmd: ip addr add {{ ashburn_ip }}/32 dev lo register: add_ip changed_when: add_ip.rc == 0 - failed_when: "add_ip.rc != 0 and 'RTNETLINK answers: File exists' not in add_ip.stderr" + failed_when: "add_ip.rc != 0 and 'already assigned' not in add_ip.stderr and 'File exists' not in add_ip.stderr" tags: [inbound] - name: Add DNAT rules (inserted before DOCKER chain) @@ -261,6 +264,7 @@ "-p udp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \ "-p udp -s {{ kind_network }} --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }}" \ "-p tcp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \ + "-p tcp -s {{ kind_network }} --dport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \ ; do if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then iptables -t mangle -A PREROUTING $rule diff --git a/playbooks/ashburn-relay-mia-sw01.yml b/playbooks/ashburn-relay-mia-sw01.yml index 0e7380cd..f41bb8a7 100644 --- a/playbooks/ashburn-relay-mia-sw01.yml +++ b/playbooks/ashburn-relay-mia-sw01.yml @@ -11,22 +11,33 @@ # This tunnel carries traffic over the ISP uplink, completely independent # of the DoubleZero overlay. # -# Inbound: was-sw01 → backbone Et4/1 → mia-sw01 → Tunnel100 → biscayne -# Outbound: biscayne → Tunnel100 → mia-sw01 → backbone Et4/1 → was-sw01 +# Outbound routing uses VRF isolation instead of PBR. Tunnel100 lives in +# VRF "relay" whose only default route points to was-sw01 via the backbone. +# Traffic decapsulated from Tunnel100 (src 137.239.194.65) routes via VRF +# relay's table, which sends it to was-sw01 where the source IP is +# legitimate. No PBR or traffic-policy needed — the TCAM profile +# (tunnel-interface-acl) doesn't support either on tunnel interfaces. +# +# Inbound: was-sw01 → backbone Et4/1 → mia-sw01 → egress-vrf relay → +# Tunnel100 → biscayne +# Outbound: biscayne → Tunnel100 (VRF relay) → egress-vrf default → +# backbone Et4/1 → was-sw01 # # Usage: # # Pre-flight checks only (safe, read-only) -# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml +# ansible-playbook -i inventory-switches/switches.yml playbooks/ashburn-relay-mia-sw01.yml # # # Apply config (after reviewing pre-flight output) -# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ +# ansible-playbook -i inventory-switches/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ # -e apply=true # # # Persist to startup-config (write memory) -# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e commit=true +# ansible-playbook -i inventory-switches/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ +# -e commit=true # # # Rollback -# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true +# ansible-playbook -i inventory-switches/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ +# -e rollback=true - name: Configure mia-sw01 validator relay tunnel hosts: all @@ -46,6 +57,8 @@ tunnel_acl: SEC-VALIDATOR-100-IN # Loopback for tunnel source (so it's always up) tunnel_source_lo: Loopback101 + # VRF for outbound routing — isolates tunnel traffic from default table + tunnel_vrf: relay backbone_interface: Ethernet4/1 backbone_peer: 172.16.1.188 # was-sw01 backbone IP session_name: validator-tunnel @@ -130,6 +143,19 @@ var: lo_config.stdout_lines tags: [preflight] + - name: Check VRF state + arista.eos.eos_command: + commands: + - "show vrf {{ tunnel_vrf }}" + register: vrf_check + tags: [preflight] + ignore_errors: true + + - name: Display VRF state + ansible.builtin.debug: + var: vrf_check.stdout_lines + tags: [preflight] + - name: Check route for ashburn IP arista.eos.eos_command: commands: @@ -150,19 +176,21 @@ Review the output above: 1. Does {{ tunnel_interface }} already exist? 2. Does {{ tunnel_source_lo }} already exist? - 3. Current route for {{ ashburn_ip }} + 3. Does VRF {{ tunnel_vrf }} already exist? + 4. Current route for {{ ashburn_ip }} Planned config: + - VRF {{ tunnel_vrf }}: isolates tunnel outbound traffic - {{ tunnel_source_lo }}: {{ tunnel_source_ip }}/32 - {{ tunnel_interface }}: GRE src {{ tunnel_source_ip }} dst {{ biscayne_ip }} - link address {{ tunnel_local }}/31 + VRF {{ tunnel_vrf }}, link address {{ tunnel_local }}/31 ACL {{ tunnel_acl }}: permit src {{ ashburn_ip }}, permit src {{ tunnel_remote }} - - Route: {{ ashburn_ip }}/32 via {{ tunnel_remote }} - - Outbound default for tunnel traffic: 0.0.0.0/0 via {{ backbone_interface }} {{ backbone_peer }} + - Inbound: {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} via {{ tunnel_remote }} + - Outbound: 0.0.0.0/0 in VRF {{ tunnel_vrf }} egress-vrf default via {{ backbone_peer }} To apply config: - ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml \ - -e apply=true + ansible-playbook -i inventory-switches/switches.yml \ + playbooks/ashburn-relay-mia-sw01.yml -e apply=true tags: [preflight] - name: End play if not applying @@ -170,7 +198,7 @@ ansible.builtin.meta: end_play # ------------------------------------------------------------------ - # Apply config via session with 5-minute auto-revert + # Apply config via session (checkpoint saved for rollback) # ------------------------------------------------------------------ - name: Save checkpoint arista.eos.eos_command: @@ -185,6 +213,10 @@ - command: "interface {{ tunnel_source_lo }}" - command: "ip address {{ tunnel_source_ip }}/32" - command: exit + # VRF for tunnel outbound isolation + - command: "vrf instance {{ tunnel_vrf }}" + - command: exit + - command: "ip routing vrf {{ tunnel_vrf }}" # ACL for the new tunnel — we control this, DZ agent won't touch it - command: "ip access-list {{ tunnel_acl }}" - command: "counters per-entry" @@ -193,21 +225,20 @@ - command: "30 permit ip host {{ tunnel_remote }} any" - command: "100 deny ip any any" - command: exit - # New GRE tunnel + # GRE tunnel in VRF relay - command: "interface {{ tunnel_interface }}" - command: "mtu 9216" + - command: "vrf {{ tunnel_vrf }}" - command: "ip address {{ tunnel_local }}/31" - command: "ip access-group {{ tunnel_acl }} in" - command: "tunnel mode gre" - command: "tunnel source {{ tunnel_source_ip }}" - command: "tunnel destination {{ biscayne_ip }}" - command: exit - # Inbound: route ashburn IP to biscayne via the new tunnel - - command: "ip route {{ ashburn_ip }}/32 {{ tunnel_remote }}" - # Outbound: biscayne's traffic exits via backbone to was-sw01. - # Use a specific route for the backbone peer so tunnel traffic - # can reach was-sw01 without a blanket default route. - # (The switch's actual default route is via Et1/1 ISP uplink.) + # Outbound: default route in VRF relay → backbone → was-sw01 + - command: "ip route vrf {{ tunnel_vrf }} 0.0.0.0/0 egress-vrf default {{ backbone_peer }}" + # Inbound: route ashburn IP from default VRF into tunnel via VRF relay + - command: "ip route {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} {{ tunnel_remote }}" - name: Show session diff arista.eos.eos_command: @@ -235,8 +266,9 @@ - "show running-config interfaces {{ tunnel_source_lo }}" - "show running-config interfaces {{ tunnel_interface }}" - "show ip access-lists {{ tunnel_acl }}" + - "show vrf {{ tunnel_vrf }}" - "show ip route {{ ashburn_ip }}" - - "show interfaces {{ tunnel_interface }} status" + - "show ip route vrf {{ tunnel_vrf }} 0.0.0.0/0" register: verify - name: Display verification @@ -251,9 +283,11 @@ Changes applied: 1. {{ tunnel_source_lo }}: {{ tunnel_source_ip }}/32 - 2. {{ tunnel_interface }}: GRE tunnel to {{ biscayne_ip }} + 2. VRF {{ tunnel_vrf }}: outbound isolation for tunnel traffic + 3. {{ tunnel_interface }}: GRE tunnel to {{ biscayne_ip }} in VRF {{ tunnel_vrf }} link {{ tunnel_local }}/31, ACL {{ tunnel_acl }} - 3. Route: {{ ashburn_ip }}/32 via {{ tunnel_remote }} + 4. Inbound: {{ ashburn_ip }}/32 egress-vrf {{ tunnel_vrf }} via {{ tunnel_remote }} + 5. Outbound: 0.0.0.0/0 in VRF {{ tunnel_vrf }} egress-vrf default via {{ backbone_peer }} Config is in running-config but NOT saved to startup-config. A reboot will revert to the previous state. diff --git a/playbooks/files/ashburn-relay-setup.sh.j2 b/playbooks/files/ashburn-relay-setup.sh.j2 index f1dcb459..179fc605 100644 --- a/playbooks/files/ashburn-relay-setup.sh.j2 +++ b/playbooks/files/ashburn-relay-setup.sh.j2 @@ -36,6 +36,10 @@ for rule in \ done # Outbound mangle (fwmark for policy routing) +# sport rules: gossip/repair/TVU traffic FROM validator well-known ports +# dport rule: ip_echo TCP TO entrypoint port 8001 (ephemeral sport, +# so sport-based rules miss it; without this the entrypoint sees +# biscayne's real IP and probes that instead of the Ashburn relay IP) for rule in \ "-p udp -s {{ kind_network }} --sport {{ gossip_port }} \ -j MARK --set-mark {{ fwmark }}" \ @@ -44,6 +48,8 @@ for rule in \ -j MARK --set-mark {{ fwmark }}" \ "-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \ -j MARK --set-mark {{ fwmark }}" \ + "-p tcp -s {{ kind_network }} --dport {{ gossip_port }} \ + -j MARK --set-mark {{ fwmark }}" \ ; do if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then iptables -t mangle -A PREROUTING $rule From 8eac9cc87f9133d4f0c63f9b954a01cad1d513ee Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 23:45:36 +0000 Subject: [PATCH 17/62] docs: document DoubleZero agent managed config on both switches Inventories what the DZ agent controls (tunnels, ACLs, VRFs, BGP, route-maps, loopbacks) so we don't accidentally modify objects that the agent will silently overwrite. Includes a "safe to modify" section listing our own relay infrastructure. Co-Authored-By: Claude Opus 4.6 --- docs/doublezero-agent-managed-config.md | 80 +++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 docs/doublezero-agent-managed-config.md diff --git a/docs/doublezero-agent-managed-config.md b/docs/doublezero-agent-managed-config.md new file mode 100644 index 00000000..75e45204 --- /dev/null +++ b/docs/doublezero-agent-managed-config.md @@ -0,0 +1,80 @@ +# DoubleZero Agent — Managed Configuration + +The `doublezero-agent` daemon runs on both mia-sw01 and was-sw01. It manages +GRE tunnels, ACLs, BGP neighbors, and route-maps via EOS config sessions +(named `doublezero-agent-`). It periodically creates pending +sessions and commits them, overwriting any manual changes to the objects +it manages. + +**Do NOT modify any of the items listed below.** The agent will silently +overwrite your changes. + +## mia-sw01 + +### Tunnel interfaces (all DZ-managed) + +| Interface | Description | VRF | Peer | ACL | +|------------|-----------------|---------|-----------------|------------------------------| +| Tunnel500 | USER-UCAST-500 | vrf1 | 186.233.184.235 | SEC-USER-500-IN | +| Tunnel501 | USER-MCAST-501 | default | 186.233.185.50 | SEC-USER-SUB-MCAST-IN | +| Tunnel502 | USER-UCAST-502 | vrf1 | 155.138.213.71 | SEC-USER-502-IN | +| Tunnel503 | USER-MCAST-503 | default | 155.138.213.71 | SEC-USER-PUB-MCAST-IN | +| Tunnel504 | (empty) | | | | +| Tunnel505 | USER-UCAST-505 | vrf1 | 186.233.185.50 | SEC-USER-505-IN | +| Tunnel506 | (exists) | | | | + +### ACLs (DZ-managed — do NOT modify) + +- `SEC-DIA-IN` — ingress ACL on Et1/1 (bogon/RFC1918 filter) +- `SEC-USER-500-IN` — ingress ACL on Tunnel500 +- `SEC-USER-502-IN` — ingress ACL on Tunnel502 +- `SEC-USER-505-IN` — ingress ACL on Tunnel505 +- `SEC-USER-SUB-MCAST-IN` — ingress ACL on Tunnel501 +- `SEC-USER-PUB-MCAST-IN` — ingress ACL on Tunnel503 +- `SEC-USER-MCAST-BOUNDARY-501-OUT` — multicast boundary on Tunnel501 +- `SEC-USER-MCAST-BOUNDARY-503-OUT` — multicast boundary on Tunnel503 + +### VRF (DZ-managed) + +- `vrf1` — used by Tunnel500, Tunnel502, Tunnel505 (unicast tunnels) +- `ip route vrf vrf1 0.0.0.0/0 egress-vrf default Ethernet4/1 172.16.1.188` + +### BGP (DZ-managed) + +- `router bgp 65342` — iBGP mesh with DZ fabric switches (ny7, sea001, ld4, etc.) +- BGP neighbors on tunnel link IPs (169.254.x.x) with `RM-USER-*` route-maps +- All `RM-USER-*-IN` and `RM-USER-*-OUT` route-maps + +### Loopbacks (DZ-managed) + +- `Loopback255`, `Loopback256` — BGP update sources for iBGP mesh + +## was-sw01 + +### ACLs (DZ-managed) + +- `SEC-DIA-IN` — ingress ACL on Et1/1 +- `SEC-USER-PUB-MCAST-IN` +- `SEC-USER-SUB-MCAST-IN` + +### Daemons + +- `doublezero-agent` — config management +- `doublezero-telemetry` — metrics (writes to influxdb `doublezero-mainnet-beta`) + +## Safe to modify (NOT managed by DZ agent) + +### mia-sw01 + +- `Tunnel100` — our dedicated validator relay tunnel (VRF relay) +- `SEC-VALIDATOR-100-IN` — our ACL on Tunnel100 +- `Loopback101` — tunnel source IP (209.42.167.137) +- VRF `relay` — our outbound isolation VRF +- `ip route 137.239.194.65/32 egress-vrf relay 169.254.100.1` +- `ip route vrf relay 0.0.0.0/0 egress-vrf default 172.16.1.188` +- Backbone `Ethernet4/1` — physical interface, not DZ-managed + +### was-sw01 + +- `ip route 137.239.194.65/32 172.16.1.189` — our static route +- Backbone `Ethernet4/1` — physical interface, not DZ-managed From 496c7982cbcfbc88af6ee08f03769af25fa76c7e Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 00:43:06 +0000 Subject: [PATCH 18/62] feat: end-to-end relay test scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three Python scripts send real packets from the kind node through the full relay path (biscayne → tunnel → mia-sw01 → was-sw01 → internet) and verify responses come back via the inbound path. No indirect counter-checking — a response proves both directions work. - relay-test-udp.py: DNS query with sport 8001 - relay-test-tcp-sport.py: HTTP request with sport 8001 - relay-test-tcp-dport.py: TCP connect to entrypoint dport 8001 (ip_echo) - test-ashburn-relay.sh: orchestrates from ansible controller via nsenter Co-Authored-By: Claude Opus 4.6 --- scripts/relay-test-tcp-dport.py | 46 ++++++++++++++ scripts/relay-test-tcp-sport.py | 28 ++++++++ scripts/relay-test-udp.py | 30 +++++++++ scripts/test-ashburn-relay.sh | 109 ++++++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+) create mode 100755 scripts/relay-test-tcp-dport.py create mode 100755 scripts/relay-test-tcp-sport.py create mode 100755 scripts/relay-test-udp.py create mode 100755 scripts/test-ashburn-relay.sh diff --git a/scripts/relay-test-tcp-dport.py b/scripts/relay-test-tcp-dport.py new file mode 100755 index 00000000..fc7bc8c5 --- /dev/null +++ b/scripts/relay-test-tcp-dport.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""TCP dport 8001 round trip — connect to a Solana entrypoint (ip_echo path). + +The mangle rule matches -p tcp --dport 8001, so connecting TO port 8001 +on any host triggers SNAT to the relay IP. The entrypoint responds with +ip_echo (4 bytes: our IP in network order). +""" +import socket +import sys + +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8001 +HOST = sys.argv[2] if len(sys.argv) > 2 else "34.83.231.102" # entrypoint.mainnet-beta.solana.com + +# Resolve hostname +try: + addr = socket.getaddrinfo(HOST, PORT, socket.AF_INET)[0][4][0] +except socket.gaierror: + addr = HOST + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.settimeout(5) + +try: + s.connect((addr, PORT)) + print(f"OK TCP handshake to {addr}:{PORT}") + # ip_echo: peer sends our IP back as 4 bytes + s.settimeout(2) + try: + data = s.recv(64) + if len(data) >= 4: + ip = socket.inet_ntoa(data[:4]) + print(f"OK ip_echo says we are {ip}") + else: + print(f"OK got {len(data)} bytes: {data.hex()}") + except socket.timeout: + print("NOTE: no ip_echo response (handshake succeeded)") +except socket.timeout: + print("TIMEOUT") + sys.exit(1) +except ConnectionRefusedError: + print(f"OK connection refused by {addr}:{PORT} (host reachable)") +except Exception as e: + print(f"ERROR {e}") + sys.exit(1) +finally: + s.close() diff --git a/scripts/relay-test-tcp-sport.py b/scripts/relay-test-tcp-sport.py new file mode 100755 index 00000000..236ad305 --- /dev/null +++ b/scripts/relay-test-tcp-sport.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +"""TCP sport 8001 round trip via HTTP HEAD to 1.1.1.1.""" +import socket +import sys + +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8001 + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.settimeout(5) +s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +s.bind(("0.0.0.0", PORT)) + +try: + s.connect(("1.1.1.1", 80)) + s.sendall(b"HEAD / HTTP/1.0\r\nHost: 1.1.1.1\r\n\r\n") + resp = s.recv(256) + if b"HTTP" in resp: + print("OK HTTP response received") + else: + print(f"OK {len(resp)} bytes (non-HTTP)") +except socket.timeout: + print("TIMEOUT") + sys.exit(1) +except Exception as e: + print(f"ERROR {e}") + sys.exit(1) +finally: + s.close() diff --git a/scripts/relay-test-udp.py b/scripts/relay-test-udp.py new file mode 100755 index 00000000..61e8a6f6 --- /dev/null +++ b/scripts/relay-test-udp.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +"""UDP sport 8001 round trip via DNS query to 8.8.8.8.""" +import socket +import sys + +PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8001 + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +s.bind(("0.0.0.0", PORT)) + +# DNS query: txn ID 0x1234, standard query for example.com A +query = ( + b"\x12\x34\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00" + b"\x07example\x03com\x00\x00\x01\x00\x01" +) +s.sendto(query, ("8.8.8.8", 53)) +s.settimeout(5) + +try: + resp, addr = s.recvfrom(512) + print(f"OK {len(resp)} bytes from {addr[0]}:{addr[1]}") +except socket.timeout: + print("TIMEOUT") + sys.exit(1) +except Exception as e: + print(f"ERROR {e}") + sys.exit(1) +finally: + s.close() diff --git a/scripts/test-ashburn-relay.sh b/scripts/test-ashburn-relay.sh new file mode 100755 index 00000000..2968747f --- /dev/null +++ b/scripts/test-ashburn-relay.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# End-to-end test for Ashburn validator relay +# +# Sends real packets from the kind node through the full relay path +# and waits for responses. A response proves both directions work. +# +# Outbound: kind node (172.20.0.2:8001) → biscayne mangle (fwmark 0x64) +# → policy route table ashburn → gre-ashburn → mia-sw01 Tunnel100 +# (VRF relay) → egress-vrf default → backbone Et4/1 → was-sw01 Et1/1 +# → internet (src 137.239.194.65) +# +# Inbound: internet → was-sw01 Et1/1 (dst 137.239.194.65) → static route +# → backbone → mia-sw01 → egress-vrf relay → Tunnel100 → biscayne +# gre-ashburn → conntrack reverse-SNAT → kind node (172.20.0.2:8001) +# +# Runs from the ansible controller host. +# +# Usage: +# ./scripts/test-ashburn-relay.sh +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR/.." + +KIND_NODE=laconic-70ce4c4b47e23b85-control-plane +BISCAYNE_INV=inventory/biscayne.yml +GOSSIP_PORT=8001 + +PASS=0 +FAIL=0 + +pass() { echo " PASS: $1"; PASS=$((PASS + 1)); } +fail() { echo " FAIL: $1"; FAIL=$((FAIL + 1)); } + +# Copy test scripts to biscayne (once) +setup() { + for f in "$SCRIPT_DIR"/relay-test-*.py; do + ansible biscayne -i "$BISCAYNE_INV" -m ansible.builtin.copy \ + -a "src=$f dest=/tmp/$(basename "$f") mode=0755" \ + --become >/dev/null 2>&1 + done + + # Get kind node PID for nsenter (run in its network namespace, + # use biscayne's python3 since the kind node only has perl) + KIND_PID=$(ansible biscayne -i "$BISCAYNE_INV" -m ansible.builtin.shell \ + -a "docker inspect --format '{{ '{{' }}.State.Pid{{ '}}' }}' $KIND_NODE" \ + --become 2>&1 | grep -oP '^\d+$' || true) + + if [[ -z "$KIND_PID" ]]; then + echo "FATAL: could not get kind node PID" + exit 1 + fi + echo "Kind node PID: $KIND_PID" +} + +# Run a test script in the kind node's network namespace +run_test() { + local name=$1 + shift + ansible biscayne -i "$BISCAYNE_INV" -m ansible.builtin.shell \ + -a "nsenter --net --target $KIND_PID python3 /tmp/$name $*" \ + --become 2>&1 | grep -E '^OK|^TIMEOUT|^ERROR|^REFUSED|^NOTE' || echo "NO OUTPUT" +} + +echo "=== Ashburn Relay End-to-End Test ===" +echo "" + +setup +echo "" + +# Test 1: UDP sport 8001 → DNS query to 8.8.8.8 +# Triggers: mangle -p udp --sport 8001 → mark → SNAT → tunnel +echo "--- Test 1: UDP sport $GOSSIP_PORT (DNS query) ---" +result=$(run_test relay-test-udp.py "$GOSSIP_PORT") +if echo "$result" | grep -q "^OK"; then + pass "UDP sport $GOSSIP_PORT: $result" +else + fail "UDP sport $GOSSIP_PORT: $result" +fi +echo "" + +# Test 2: TCP sport 8001 → HTTP HEAD to 1.1.1.1 +# Triggers: mangle -p tcp --sport 8001 → mark → SNAT → tunnel +echo "--- Test 2: TCP sport $GOSSIP_PORT (HTTP request) ---" +result=$(run_test relay-test-tcp-sport.py "$GOSSIP_PORT") +if echo "$result" | grep -q "^OK"; then + pass "TCP sport $GOSSIP_PORT: $result" +else + fail "TCP sport $GOSSIP_PORT: $result" +fi +echo "" + +# Test 3: TCP dport 8001 → connect to Solana entrypoint (ip_echo) +# Triggers: mangle -p tcp --dport 8001 → mark → SNAT → tunnel +# REFUSED counts as pass — proves the round trip completed. +echo "--- Test 3: TCP dport $GOSSIP_PORT (ip_echo path) ---" +result=$(run_test relay-test-tcp-dport.py "$GOSSIP_PORT") +if echo "$result" | grep -q "^OK"; then + pass "TCP dport $GOSSIP_PORT: $result" +else + fail "TCP dport $GOSSIP_PORT: $result" +fi +echo "" + +# Summary +echo "=== Results: $PASS passed, $FAIL failed ===" +if [[ $FAIL -gt 0 ]]; then + exit 1 +fi From 806c1bb723f3a627a6efcee4414a5459ce8ba860 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 02:33:20 +0000 Subject: [PATCH 19/62] refactor: rename `deployment update` to `deployment update-envs` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The update command only patches environment variables and adds a restart annotation. It does not update ports, volumes, configmaps, or any other deployment spec. The old name was misleading — it implied a full spec update, causing operators to expect changes that never took effect. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/compose/deploy_docker.py | 2 +- stack_orchestrator/deploy/deploy.py | 4 ++-- stack_orchestrator/deploy/deployer.py | 2 +- stack_orchestrator/deploy/deployment.py | 8 ++++---- stack_orchestrator/deploy/k8s/deploy_k8s.py | 2 +- stack_orchestrator/deploy/webapp/util.py | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/stack_orchestrator/deploy/compose/deploy_docker.py b/stack_orchestrator/deploy/compose/deploy_docker.py index c6397aad..fa0ac1d4 100644 --- a/stack_orchestrator/deploy/compose/deploy_docker.py +++ b/stack_orchestrator/deploy/compose/deploy_docker.py @@ -62,7 +62,7 @@ class DockerDeployer(Deployer): except DockerException as e: raise DeployerException(e) - def update(self): + def update_envs(self): if not opts.o.dry_run: try: return self.docker.compose.restart() diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py index 86c1856c..f2bf977c 100644 --- a/stack_orchestrator/deploy/deploy.py +++ b/stack_orchestrator/deploy/deploy.py @@ -182,8 +182,8 @@ def status_operation(ctx): ctx.obj.deployer.status() -def update_operation(ctx): - ctx.obj.deployer.update() +def update_envs_operation(ctx): + ctx.obj.deployer.update_envs() def ps_operation(ctx): diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py index d8fb656b..11fb6592 100644 --- a/stack_orchestrator/deploy/deployer.py +++ b/stack_orchestrator/deploy/deployer.py @@ -28,7 +28,7 @@ class Deployer(ABC): pass @abstractmethod - def update(self): + def update_envs(self): pass @abstractmethod diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index b76e6486..902780fb 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -31,7 +31,7 @@ from stack_orchestrator.deploy.deploy import ( exec_operation, logs_operation, create_deploy_context, - update_operation, + update_envs_operation, ) from stack_orchestrator.deploy.deploy_types import DeployCommandContext from stack_orchestrator.deploy.deployment_context import DeploymentContext @@ -210,11 +210,11 @@ def status(ctx): status_operation(ctx) -@command.command() +@command.command(name="update-envs") @click.pass_context -def update(ctx): +def update_envs(ctx): ctx.obj = make_deploy_context(ctx) - update_operation(ctx) + update_envs_operation(ctx) @command.command() diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index f7f8ad43..3b235538 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -598,7 +598,7 @@ class K8sDeployer(Deployer): log_data = "******* No logs available ********\n" return log_stream_from_string(log_data) - def update(self): + def update_envs(self): self.connect_api() ref_deployment = self.cluster_info.get_deployment() if not ref_deployment or not ref_deployment.metadata: diff --git a/stack_orchestrator/deploy/webapp/util.py b/stack_orchestrator/deploy/webapp/util.py index 3c536477..84accbcd 100644 --- a/stack_orchestrator/deploy/webapp/util.py +++ b/stack_orchestrator/deploy/webapp/util.py @@ -696,7 +696,7 @@ def deploy_to_k8s(deploy_record, deployment_dir, recreate, logger): if not deploy_record: commands_to_run = ["start"] else: - commands_to_run = ["update"] + commands_to_run = ["update-envs"] for command in commands_to_run: logger.log(f"Running {command} command on deployment dir: {deployment_dir}") From cc6acd5f0940c0f77e7c9faa5ec4f3f3c05a7415 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 02:41:25 +0000 Subject: [PATCH 20/62] fix: default skip-cluster-management to true Destroying the kind cluster on stop/start is almost never the intent. The cluster holds PVs, ConfigMaps, and networking state that are expensive to recreate. Default to preserving the cluster; pass --perform-cluster-management explicitly when a full teardown is needed. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/deployment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index 902780fb..1182d23f 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -114,7 +114,7 @@ def up(ctx, stay_attached, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: up @@ -132,7 +132,7 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: down @@ -151,7 +151,7 @@ def down(ctx, delete_volumes, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: down From 05f9acf8a01c4f930b907e2346a37557bb0cb167 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 02:43:31 +0000 Subject: [PATCH 21/62] fix: DOCKER-USER rules for inbound relay, add UDP test playbooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: Docker FORWARD chain policy DROP blocked all DNAT'd relay traffic (UDP/TCP 8001, UDP 9000-9025) to the kind node. The DOCKER chain only ACCEPTs specific TCP ports (6443, 443, 80). Added ACCEPT rules in DOCKER-USER chain which runs before all Docker chains. Changes: - ashburn-relay-biscayne.yml: add DOCKER-USER ACCEPT rules (inbound tag) and rollback cleanup - ashburn-relay-setup.sh.j2: persist DOCKER-USER rules across reboot - relay-inbound-udp-test.yml: controlled e2e test — listener in kind netns, sender from kelce, assert arrival - relay-link-test.yml: link-by-link tcpdump captures at each hop - relay-test-udp-listen.py, relay-test-udp-send.py: test helpers - relay-test-ip-echo.py: full ip_echo protocol test - inventory/kelce.yml, inventory/panic.yml: test host inventories - test-ashburn-relay.sh: add ip_echo UDP reachability test Co-Authored-By: Claude Opus 4.6 --- inventory/kelce.yml | 6 + inventory/panic.yml | 7 ++ playbooks/ashburn-relay-biscayne.yml | 44 +++++++ playbooks/files/ashburn-relay-setup.sh.j2 | 15 +++ playbooks/relay-inbound-udp-test.yml | 95 +++++++++++++++ playbooks/relay-link-test.yml | 135 ++++++++++++++++++++++ scripts/relay-test-ip-echo.py | 116 +++++++++++++++++++ scripts/relay-test-udp-listen.py | 22 ++++ scripts/relay-test-udp-send.py | 12 ++ scripts/test-ashburn-relay.sh | 19 ++- 10 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 inventory/kelce.yml create mode 100644 inventory/panic.yml create mode 100644 playbooks/relay-inbound-udp-test.yml create mode 100644 playbooks/relay-link-test.yml create mode 100644 scripts/relay-test-ip-echo.py create mode 100644 scripts/relay-test-udp-listen.py create mode 100644 scripts/relay-test-udp-send.py diff --git a/inventory/kelce.yml b/inventory/kelce.yml new file mode 100644 index 00000000..3c0e4b13 --- /dev/null +++ b/inventory/kelce.yml @@ -0,0 +1,6 @@ +all: + hosts: + kelce: + ansible_host: kelce + ansible_user: rix + ansible_python_interpreter: /usr/bin/python3 diff --git a/inventory/panic.yml b/inventory/panic.yml new file mode 100644 index 00000000..f0349546 --- /dev/null +++ b/inventory/panic.yml @@ -0,0 +1,7 @@ +all: + hosts: + panic: + ansible_host: panic + ansible_user: rix + ansible_become: false + ansible_python_interpreter: /usr/bin/python3 diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index 1899227d..d660a2ce 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -87,6 +87,20 @@ executable: /bin/bash changed_when: false + - name: Remove DOCKER-USER relay rules + ansible.builtin.shell: + cmd: | + set -o pipefail + iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \ + --dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true + iptables -D DOCKER-USER -p tcp -d {{ kind_node_ip }} \ + --dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true + iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \ + --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j ACCEPT 2>/dev/null || true + executable: /bin/bash + changed_when: false + - name: Remove outbound mangle rules ansible.builtin.shell: cmd: | @@ -253,6 +267,36 @@ var: dnat_result.stdout_lines tags: [inbound] + - name: Allow DNAT'd relay traffic through DOCKER-USER + ansible.builtin.shell: + cmd: | + set -o pipefail + # Docker's FORWARD chain drops traffic to bridge networks unless + # explicitly accepted. DOCKER-USER runs first and is the correct + # place for user rules. These ACCEPT rules let DNAT'd relay + # traffic reach the kind node (172.20.0.2). + for rule in \ + "-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ + "-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ + "-p udp -d {{ kind_node_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j ACCEPT" \ + ; do + if ! iptables -C DOCKER-USER $rule 2>/dev/null; then + iptables -I DOCKER-USER 1 $rule + echo "added: $rule" + else + echo "exists: $rule" + fi + done + executable: /bin/bash + register: forward_result + changed_when: "'added' in forward_result.stdout" + tags: [inbound] + + - name: Show DOCKER-USER result + ansible.builtin.debug: + var: forward_result.stdout_lines + tags: [inbound] + # ------------------------------------------------------------------ # Outbound: fwmark + SNAT + policy routing via new tunnel # ------------------------------------------------------------------ diff --git a/playbooks/files/ashburn-relay-setup.sh.j2 b/playbooks/files/ashburn-relay-setup.sh.j2 index 179fc605..eb33d731 100644 --- a/playbooks/files/ashburn-relay-setup.sh.j2 +++ b/playbooks/files/ashburn-relay-setup.sh.j2 @@ -35,6 +35,21 @@ for rule in \ fi done +# FORWARD: allow DNAT'd relay traffic through Docker's FORWARD chain. +# Docker drops traffic to bridge networks unless explicitly accepted. +# DOCKER-USER runs before all Docker chains and survives daemon restarts. +for rule in \ + "-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ + "-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ + "-p udp -d {{ kind_node_ip }} \ + --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ + -j ACCEPT" \ +; do + if ! iptables -C DOCKER-USER $rule 2>/dev/null; then + iptables -I DOCKER-USER 1 $rule + fi +done + # Outbound mangle (fwmark for policy routing) # sport rules: gossip/repair/TVU traffic FROM validator well-known ports # dport rule: ip_echo TCP TO entrypoint port 8001 (ephemeral sport, diff --git a/playbooks/relay-inbound-udp-test.yml b/playbooks/relay-inbound-udp-test.yml new file mode 100644 index 00000000..00e35717 --- /dev/null +++ b/playbooks/relay-inbound-udp-test.yml @@ -0,0 +1,95 @@ +--- +# Test inbound UDP through the Ashburn relay. +# +# Sends a UDP packet from kelce to 137.239.194.65:8001 and checks +# whether it arrives inside the kind node's network namespace. +# +# Usage: +# ansible-playbook -i inventory/biscayne.yml -i inventory/kelce.yml \ +# playbooks/relay-inbound-udp-test.yml +# +- name: Inbound UDP relay test — listener + hosts: biscayne + gather_facts: false + become: true + vars: + relay_ip: 137.239.194.65 + gossip_port: 8001 + kind_node: laconic-70ce4c4b47e23b85-control-plane + tasks: + - name: Copy listener script + ansible.builtin.copy: + src: ../scripts/relay-test-udp-listen.py + dest: /tmp/relay-test-udp-listen.py + mode: "0755" + + - name: Get kind node PID + ansible.builtin.shell: + cmd: >- + docker inspect --format '{%raw%}{{.State.Pid}}{%endraw%}' {{ kind_node }} + register: kind_pid_result + changed_when: false + + - name: Set kind PID fact + ansible.builtin.set_fact: + kind_pid: "{{ kind_pid_result.stdout | trim }}" + + - name: Start UDP listener in kind netns + ansible.builtin.shell: + cmd: >- + nsenter --net --target {{ kind_pid }} + python3 /tmp/relay-test-udp-listen.py {{ gossip_port }} 15 + register: listener_result + async: 20 + poll: 0 + + - name: Wait for listener to bind + ansible.builtin.pause: + seconds: 2 + +- name: Inbound UDP relay test — sender + hosts: kelce + gather_facts: false + vars: + relay_ip: 137.239.194.65 + gossip_port: 8001 + tasks: + - name: Copy sender script + ansible.builtin.copy: + src: ../scripts/relay-test-udp-send.py + dest: /tmp/relay-test-udp-send.py + mode: "0755" + + - name: Send UDP probe to relay IP + ansible.builtin.command: + cmd: python3 /tmp/relay-test-udp-send.py {{ relay_ip }} {{ gossip_port }} + register: send_result + changed_when: false + + - name: Show send result + ansible.builtin.debug: + var: send_result.stdout + +- name: Inbound UDP relay test — collect results + hosts: biscayne + gather_facts: false + become: true + tasks: + - name: Wait for listener to complete + ansible.builtin.async_status: + jid: "{{ listener_result.ansible_job_id }}" + register: listener_final + until: listener_final.finished + retries: 10 + delay: 2 + + - name: Show listener result + ansible.builtin.debug: + var: listener_final.stdout + + - name: Assert UDP arrived + ansible.builtin.assert: + that: + - "'OK' in listener_final.stdout" + fail_msg: "Inbound UDP did not arrive at kind node: {{ listener_final.stdout }}" + success_msg: "Inbound UDP reached kind node: {{ listener_final.stdout }}" diff --git a/playbooks/relay-link-test.yml b/playbooks/relay-link-test.yml new file mode 100644 index 00000000..07f6ddb1 --- /dev/null +++ b/playbooks/relay-link-test.yml @@ -0,0 +1,135 @@ +--- +# Link-by-link test for inbound UDP through the Ashburn relay. +# +# Tests whether a UDP packet sent from panic to 137.239.194.65:8001 +# arrives at each hop along the inbound path: +# 1. biscayne gre-ashburn (post-tunnel decap) +# 2. biscayne DNAT counter +# 3. kind node network namespace +# +# Usage: +# ansible-playbook -i inventory/biscayne.yml -i inventory/panic.yml \ +# playbooks/relay-link-test.yml +# +- name: Link test — start captures on biscayne + hosts: biscayne + gather_facts: false + become: true + vars: + relay_ip: 137.239.194.65 + gossip_port: 8001 + kind_node: laconic-70ce4c4b47e23b85-control-plane + panic_ip: 166.84.136.68 + tasks: + - name: Get kind node PID + ansible.builtin.shell: + cmd: >- + docker inspect --format '{%raw%}{{.State.Pid}}{%endraw%}' {{ kind_node }} + register: kind_pid_result + changed_when: false + + - name: Get DNAT counter before + ansible.builtin.shell: + cmd: >- + iptables -t nat -L PREROUTING -v -n | grep 'udp dpt:{{ gossip_port }}' | awk '{print $1}' + register: dnat_before + changed_when: false + + - name: Start tcpdump on gre-ashburn + ansible.builtin.shell: + cmd: >- + timeout 15 tcpdump -c 1 -nn -i gre-ashburn + 'src host {{ panic_ip }} and udp dst port {{ gossip_port }}' + > /tmp/link-test-gre.txt 2>&1 + async: 20 + poll: 0 + register: tcpdump_gre + + - name: Start tcpdump on bridge + ansible.builtin.shell: + cmd: >- + timeout 15 tcpdump -c 1 -nn -i br-cf46a62ab5b2 + 'udp dst port {{ gossip_port }}' + > /tmp/link-test-br.txt 2>&1 + async: 20 + poll: 0 + register: tcpdump_br + + - name: Start tcpdump in kind netns + ansible.builtin.shell: + cmd: >- + nsenter --net --target {{ kind_pid_result.stdout | trim }} + timeout 15 tcpdump -c 1 -nn -i eth0 + 'udp dst port {{ gossip_port }}' + > /tmp/link-test-kind.txt 2>&1 + async: 20 + poll: 0 + register: tcpdump_kind + + - name: Wait for captures to start + ansible.builtin.pause: + seconds: 2 + +- name: Link test — send from panic + hosts: panic + gather_facts: false + vars: + relay_ip: 137.239.194.65 + gossip_port: 8001 + tasks: + - name: Send 3 UDP probes with 1s interval + ansible.builtin.raw: "python3 -c \"import socket,time;s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM);[s.sendto(b'PROBE',('{{ relay_ip }}',{{ gossip_port }})) or time.sleep(1) for i in range(3)];print('OK sent 3 probes to {{ relay_ip }}:{{ gossip_port }}');s.close()\"" + register: send_result + changed_when: false + + - name: Show send result + ansible.builtin.debug: + var: send_result.stdout + +- name: Link test — collect results + hosts: biscayne + gather_facts: false + become: true + vars: + gossip_port: 8001 + tasks: + - name: Wait for captures to finish + ansible.builtin.pause: + seconds: 10 + + - name: Get DNAT counter after + ansible.builtin.shell: + cmd: >- + iptables -t nat -L PREROUTING -v -n | grep 'udp dpt:{{ gossip_port }}' | awk '{print $1}' + register: dnat_after + changed_when: false + + - name: Read gre-ashburn capture + ansible.builtin.command: + cmd: cat /tmp/link-test-gre.txt + register: cap_gre + changed_when: false + + - name: Read bridge capture + ansible.builtin.command: + cmd: cat /tmp/link-test-br.txt + register: cap_br + changed_when: false + + - name: Read kind netns capture + ansible.builtin.command: + cmd: cat /tmp/link-test-kind.txt + register: cap_kind + changed_when: false + + - name: Report results + ansible.builtin.debug: + msg: | + === Link-by-link results === + DNAT counter: {{ dnat_before.stdout }} → {{ dnat_after.stdout }} + --- gre-ashburn --- + {{ cap_gre.stdout }} + --- bridge --- + {{ cap_br.stdout }} + --- kind netns --- + {{ cap_kind.stdout }} diff --git a/scripts/relay-test-ip-echo.py b/scripts/relay-test-ip-echo.py new file mode 100644 index 00000000..d9dbf03b --- /dev/null +++ b/scripts/relay-test-ip-echo.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +"""Full ip_echo protocol test with UDP probe listener. + +Sends the correct ip_echo protocol message to a Solana entrypoint, +which triggers the entrypoint to probe our UDP ports. Then listens +for those probe datagrams to verify inbound UDP reachability. + +Protocol (from agave source): + Request: 4 null bytes + bincode(IpEchoServerMessage) + '\n' + Response: 4 null bytes + bincode(IpEchoServerResponse) + + IpEchoServerMessage { tcp_ports: [u16; 4], udp_ports: [u16; 4] } + IpEchoServerResponse { address: IpAddr, shred_version: Option } + +The entrypoint sends a single [0] byte to peer_addr.ip() on each +non-zero UDP port, then responds AFTER all probes complete (5s timeout). +""" +import socket +import struct +import sys +import threading +import time + +ENTRYPOINT_IP = sys.argv[1] if len(sys.argv) > 1 else "34.83.231.102" +GOSSIP_PORT = int(sys.argv[2]) if len(sys.argv) > 2 else 8001 + +# Build ip_echo request +# bincode for [u16; 4]: 4 little-endian u16 values, no length prefix (fixed array) +tcp_ports = struct.pack("<4H", 0, 0, 0, 0) # no TCP probes +udp_ports = struct.pack("<4H", GOSSIP_PORT, 0, 0, 0) # probe our gossip port +header = b"\x00" * 4 +message = header + tcp_ports + udp_ports + b"\n" + +print(f"Connecting to {ENTRYPOINT_IP}:{GOSSIP_PORT} for ip_echo") +print(f"Request: {message.hex()} ({len(message)} bytes)") + +# Start UDP listener on gossip port BEFORE sending ip_echo +udp_received = [] + +def udp_listener(): + us = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + us.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + us.bind(("0.0.0.0", GOSSIP_PORT)) + us.settimeout(10) + try: + while True: + data, addr = us.recvfrom(64) + udp_received.append((data, addr)) + print(f"UDP PROBE received: {len(data)} bytes from {addr[0]}:{addr[1]}") + except socket.timeout: + pass + finally: + us.close() + +listener = threading.Thread(target=udp_listener, daemon=True) +listener.start() + +# Give listener time to bind +time.sleep(0.1) + +# Send ip_echo request via TCP +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.settimeout(15) # entrypoint probes take up to 5s each + +try: + s.connect((ENTRYPOINT_IP, GOSSIP_PORT)) + print(f"OK TCP connected to {ENTRYPOINT_IP}:{GOSSIP_PORT}") + s.sendall(message) + print("OK ip_echo request sent, waiting for probes + response...") + + # Read response (comes AFTER probes complete) + resp = b"" + while len(resp) < 4: + chunk = s.recv(256) + if not chunk: + break + resp += chunk + + if len(resp) >= 4: + print(f"OK ip_echo response: {len(resp)} bytes: {resp.hex()}") + # Parse: 4 null bytes + bincode IpEchoServerResponse + # IpEchoServerResponse { address: IpAddr, shred_version: Option } + # bincode IpAddr: enum tag (u32) + data + if len(resp) >= 12: + payload = resp[4:] + ip_enum = struct.unpack(" 1 else 8001 +TIMEOUT = int(sys.argv[2]) if len(sys.argv) > 2 else 15 + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +s.bind(("0.0.0.0", PORT)) +s.settimeout(TIMEOUT) +print(f"LISTENING on UDP {PORT}", flush=True) + +try: + data, addr = s.recvfrom(256) + print(f"OK {len(data)} bytes from {addr[0]}:{addr[1]}: {data!r}") +except socket.timeout: + print("TIMEOUT no UDP received") + sys.exit(1) +finally: + s.close() diff --git a/scripts/relay-test-udp-send.py b/scripts/relay-test-udp-send.py new file mode 100644 index 00000000..6ea0e97c --- /dev/null +++ b/scripts/relay-test-udp-send.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +"""Send a UDP probe packet to a target host:port.""" +import socket +import sys + +HOST = sys.argv[1] if len(sys.argv) > 1 else "137.239.194.65" +PORT = int(sys.argv[2]) if len(sys.argv) > 2 else 8001 + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +s.sendto(b"PROBE", (HOST, PORT)) +print(f"OK sent 5 bytes to {HOST}:{PORT}") +s.close() diff --git a/scripts/test-ashburn-relay.sh b/scripts/test-ashburn-relay.sh index 2968747f..bc614f12 100755 --- a/scripts/test-ashburn-relay.sh +++ b/scripts/test-ashburn-relay.sh @@ -59,7 +59,7 @@ run_test() { shift ansible biscayne -i "$BISCAYNE_INV" -m ansible.builtin.shell \ -a "nsenter --net --target $KIND_PID python3 /tmp/$name $*" \ - --become 2>&1 | grep -E '^OK|^TIMEOUT|^ERROR|^REFUSED|^NOTE' || echo "NO OUTPUT" + --become 2>&1 | grep -E '^OK|^TIMEOUT|^ERROR|^REFUSED|^NOTE|^FAIL' || echo "NO OUTPUT" } echo "=== Ashburn Relay End-to-End Test ===" @@ -102,6 +102,23 @@ else fi echo "" +# Test 4: ip_echo UDP reachability — the actual validator startup check +# Sends correct ip_echo protocol to entrypoint, which probes our UDP port. +# This is the path that causes CrashLoopBackOff when broken. +# Triggers: outbound TCP dport 8001 (mangle mark → tunnel → SNAT) +# inbound UDP dport 8001 (was-sw01 → backbone → mia-sw01 → tunnel → DNAT) +echo "--- Test 4: ip_echo UDP reachability (inbound UDP probe) ---" +result=$(run_test relay-test-ip-echo.py 34.83.231.102 "$GOSSIP_PORT") +if echo "$result" | grep -q "^OK inbound UDP"; then + pass "ip_echo UDP reachability: $result" +elif echo "$result" | grep -q "^OK"; then + # Partial success — TCP worked but no UDP probes arrived + fail "ip_echo partial — no inbound UDP: $result" +else + fail "ip_echo: $result" +fi +echo "" + # Summary echo "=== Results: $PASS passed, $FAIL failed ===" if [[ $FAIL -gt 0 ]]; then From ad68d505aea3a7b38064188cdbbc88773cd62413 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:14:05 +0000 Subject: [PATCH 22/62] fix: redeploy playbook paths, tags, and idempotency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix snapshot_dir: /srv/solana/snapshots → /srv/kind/solana/snapshots (kind node reads from the bind mount, not the zvol mount directly) - Fix kind-internal paths: /mnt/solana/... → /mnt/validator-... to match actual PV hostPath layout (individual mounts, not unified) - Add 'scale-up' tag to "Scale validator to 1" task for partial recovery (--tags snapshot,scale-up,verify resumes without re-running deploy) - Make 'Start deployment' idempotent: failed_when: false + follow-up check so existing deployment doesn't fail the play Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-redeploy.yml | 50 +++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 86de9c75..cef45372 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -11,7 +11,7 @@ # 3. [deploy] laconic-so deployment start, then immediately scale to 0 # 4. [snapshot] Download snapshot via aria2c to host bind mount # 5. [snapshot] Verify snapshot visible inside kind node -# 6. [deploy] Scale validator back to 1 +# 6. [deploy,scale-up] Scale validator back to 1 # 7. [verify] Wait for pod Running, check logs + RPC health # # The validator cannot run during snapshot download — it would lock/use the @@ -38,6 +38,10 @@ # ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ # --tags snapshot # +# # Resume after partial failure (download snapshot, scale up, verify) +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# --tags snapshot,scale-up,verify +# - name: Redeploy agave validator on biscayne hosts: all gather_facts: false @@ -51,7 +55,7 @@ kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" - snapshot_dir: /srv/solana/snapshots + snapshot_dir: /srv/kind/solana/snapshots ledger_dir: /srv/solana/ledger accounts_dir: /srv/solana/ramdisk/accounts ramdisk_mount: /srv/solana/ramdisk @@ -185,7 +189,7 @@ cmd: > set -o pipefail && docker exec {{ kind_cluster }}-control-plane - df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs + df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs executable: /bin/bash register: kind_ramdisk_check failed_when: kind_ramdisk_check.rc != 0 @@ -221,10 +225,31 @@ - name: Start deployment (creates kind cluster + deploys pod) ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" - changed_when: true + register: deploy_start + changed_when: deploy_start.rc == 0 + failed_when: false timeout: 1200 tags: [deploy] + - name: Verify deployment started or already exists + ansible.builtin.command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.metadata.name}' + register: deploy_verify + changed_when: false + failed_when: deploy_verify.rc != 0 + when: deploy_start.rc != 0 + tags: [deploy] + + - name: Show deployment start warning + ansible.builtin.debug: + msg: >- + laconic-so deployment start returned rc={{ deploy_start.rc }} + but deployment exists — continuing (idempotent). + when: deploy_start.rc != 0 and (deploy_verify.rc | default(1)) == 0 + tags: [deploy] + - name: Wait for deployment to exist ansible.builtin.command: > kubectl get deployment {{ deployment_name }} @@ -275,7 +300,7 @@ - name: Verify kind node mounts ansible.builtin.command: > docker exec {{ kind_cluster }}-control-plane - ls /mnt/solana/snapshots/ + ls /mnt/validator-snapshots/ register: kind_mount_check changed_when: false tags: [snapshot] @@ -301,7 +326,7 @@ ansible.builtin.shell: > set -o pipefail && docker exec {{ kind_cluster }}-control-plane - find /mnt/solana/snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 + find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 register: kind_snapshot_check failed_when: kind_snapshot_check.stdout == "" changed_when: false @@ -320,7 +345,7 @@ kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=1 changed_when: true - tags: [deploy] + tags: [deploy, scale-up] # ---- verify: confirm validator is running -------------------------------- - name: Wait for pod to be running @@ -334,10 +359,17 @@ changed_when: false tags: [verify] - - name: Verify unified mount inside kind node - ansible.builtin.command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" + - name: Verify PV mounts inside kind node + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_cluster }}-control-plane + df -T /mnt/validator-ledger /mnt/validator-accounts + /mnt/validator-snapshots /mnt/validator-log 2>&1 + executable: /bin/bash register: mount_check changed_when: false + failed_when: false tags: [verify] - name: Show mount contents From 1da69cf739352ffc8632c7612dd5a2b3541b8afc Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:15:03 +0000 Subject: [PATCH 23/62] fix(k8s): make deploy_k8s.py idempotent with create-or-replace semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All K8s resource creation in deploy_k8s.py now uses try-create, catch ApiException(409), then replace — matching the pattern already used for secrets in deployment_create.py. This allows `deployment start` to be safely re-run without 409 Conflict errors. Resources made idempotent: - Deployment (create_namespaced_deployment → replace on 409) - Service (create_namespaced_service → replace on 409) - Ingress (create_namespaced_ingress → replace on 409) - NodePort services (same as Service) - ConfigMap (create_namespaced_config_map → replace on 409) - PV/PVC: bare `except: pass` replaced with explicit ApiException catch for 404 Extracted _ensure_deployment(), _ensure_service(), _ensure_ingress(), and _ensure_config_map() helpers to keep cyclomatic complexity in check. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/deploy_k8s.py | 149 ++++++++++++++------ 1 file changed, 104 insertions(+), 45 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 3b235538..c0272be7 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -192,6 +192,99 @@ class K8sDeployer(Deployer): else: raise + def _ensure_config_map(self, cfg_map): + """Create or replace a ConfigMap (idempotent).""" + try: + resp = self.core_api.create_namespaced_config_map( + body=cfg_map, namespace=self.k8s_namespace + ) + if opts.o.debug: + print(f"ConfigMap created: {resp}") + except ApiException as e: + if e.status == 409: + resp = self.core_api.replace_namespaced_config_map( + name=cfg_map.metadata.name, + namespace=self.k8s_namespace, + body=cfg_map, + ) + if opts.o.debug: + print(f"ConfigMap updated: {resp}") + else: + raise + + def _ensure_deployment(self, deployment): + """Create or replace a Deployment (idempotent).""" + try: + resp = cast( + client.V1Deployment, + self.apps_api.create_namespaced_deployment( + body=deployment, namespace=self.k8s_namespace + ), + ) + if opts.o.debug: + print("Deployment created:") + except ApiException as e: + if e.status == 409: + resp = cast( + client.V1Deployment, + self.apps_api.replace_namespaced_deployment( + name=deployment.metadata.name, + namespace=self.k8s_namespace, + body=deployment, + ), + ) + if opts.o.debug: + print("Deployment updated:") + else: + raise + if opts.o.debug: + meta = resp.metadata + spec = resp.spec + if meta and spec and spec.template.spec: + containers = spec.template.spec.containers + img = containers[0].image if containers else None + print(f"{meta.namespace} {meta.name} {meta.generation} {img}") + + def _ensure_service(self, service, kind: str = "Service"): + """Create or replace a Service (idempotent).""" + try: + resp = self.core_api.create_namespaced_service( + namespace=self.k8s_namespace, body=service + ) + if opts.o.debug: + print(f"{kind} created: {resp}") + except ApiException as e: + if e.status == 409: + resp = self.core_api.replace_namespaced_service( + name=service.metadata.name, + namespace=self.k8s_namespace, + body=service, + ) + if opts.o.debug: + print(f"{kind} updated: {resp}") + else: + raise + + def _ensure_ingress(self, ingress): + """Create or replace an Ingress (idempotent).""" + try: + resp = self.networking_api.create_namespaced_ingress( + namespace=self.k8s_namespace, body=ingress + ) + if opts.o.debug: + print(f"Ingress created: {resp}") + except ApiException as e: + if e.status == 409: + resp = self.networking_api.replace_namespaced_ingress( + name=ingress.metadata.name, + namespace=self.k8s_namespace, + body=ingress, + ) + if opts.o.debug: + print(f"Ingress updated: {resp}") + else: + raise + def _create_volume_data(self): # Create the host-path-mounted PVs for this deployment pvs = self.cluster_info.get_pvs() @@ -208,8 +301,9 @@ class K8sDeployer(Deployer): print("PVs already present:") print(f"{pv_resp}") continue - except: # noqa: E722 - pass + except ApiException as e: + if e.status != 404: + raise pv_resp = self.core_api.create_persistent_volume(body=pv) if opts.o.debug: @@ -232,8 +326,9 @@ class K8sDeployer(Deployer): print("PVCs already present:") print(f"{pvc_resp}") continue - except: # noqa: E722 - pass + except ApiException as e: + if e.status != 404: + raise pvc_resp = self.core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace=self.k8s_namespace @@ -248,12 +343,7 @@ class K8sDeployer(Deployer): if opts.o.debug: print(f"Sending this ConfigMap: {cfg_map}") if not opts.o.dry_run: - cfg_rsp = self.core_api.create_namespaced_config_map( - body=cfg_map, namespace=self.k8s_namespace - ) - if opts.o.debug: - print("ConfigMap created:") - print(f"{cfg_rsp}") + self._ensure_config_map(cfg_map) def _create_deployment(self): # Process compose files into a Deployment @@ -264,34 +354,13 @@ class K8sDeployer(Deployer): if opts.o.debug: print(f"Sending this deployment: {deployment}") if not opts.o.dry_run: - deployment_resp = cast( - client.V1Deployment, - self.apps_api.create_namespaced_deployment( - body=deployment, namespace=self.k8s_namespace - ), - ) - if opts.o.debug: - print("Deployment created:") - meta = deployment_resp.metadata - spec = deployment_resp.spec - if meta and spec and spec.template.spec: - ns = meta.namespace - name = meta.name - gen = meta.generation - containers = spec.template.spec.containers - img = containers[0].image if containers else None - print(f"{ns} {name} {gen} {img}") + self._ensure_deployment(deployment) service = self.cluster_info.get_service() if opts.o.debug: print(f"Sending this service: {service}") if service and not opts.o.dry_run: - service_resp = self.core_api.create_namespaced_service( - namespace=self.k8s_namespace, body=service - ) - if opts.o.debug: - print("Service created:") - print(f"{service_resp}") + self._ensure_service(service) def _find_certificate_for_host_name(self, host_name): all_certificates = self.custom_obj_api.list_namespaced_custom_object( @@ -404,12 +473,7 @@ class K8sDeployer(Deployer): if opts.o.debug: print(f"Sending this ingress: {ingress}") if not opts.o.dry_run: - ingress_resp = self.networking_api.create_namespaced_ingress( - namespace=self.k8s_namespace, body=ingress - ) - if opts.o.debug: - print("Ingress created:") - print(f"{ingress_resp}") + self._ensure_ingress(ingress) else: if opts.o.debug: print("No ingress configured") @@ -419,12 +483,7 @@ class K8sDeployer(Deployer): if opts.o.debug: print(f"Sending this nodeport: {nodeport}") if not opts.o.dry_run: - nodeport_resp = self.core_api.create_namespaced_service( - namespace=self.k8s_namespace, body=nodeport - ) - if opts.o.debug: - print("NodePort created:") - print(f"{nodeport_resp}") + self._ensure_service(nodeport, kind="NodePort") def down(self, timeout, volumes, skip_cluster_management): self.skip_cluster_management = skip_cluster_management From fe935037f7231549219a26b7b56306c7cda2b377 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:32:20 +0000 Subject: [PATCH 24/62] fix: add laconic-so update step, downgrade unified mount check to warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add laconic_so_repo variable (/home/rix/stack-orchestrator) and a git pull task before deployment start — the editable install must be current or stale code causes deploy failures - Downgrade unified mount root check from fatal assertion to debug warning — the mount style depends on which laconic-so version is deployed, and individual PV mounts (/mnt/validator-*) work fine Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-redeploy.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index cef45372..06247e6b 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -52,6 +52,7 @@ stack_repo: /srv/deployments/agave-stack stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave laconic_so: /home/rix/.local/bin/laconic-so + laconic_so_repo: /home/rix/stack-orchestrator kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" @@ -216,11 +217,28 @@ changed_when: true tags: [deploy] - - name: Verify kind-config.yml has unified mount root + - name: Check kind-config.yml mount style ansible.builtin.command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" register: mount_root_check - failed_when: mount_root_check.stdout | int < 1 changed_when: false + failed_when: false + tags: [deploy] + + - name: Warn if unified mount root not found + ansible.builtin.debug: + msg: >- + WARNING: kind-config.yml does not have unified mount root + (containerPath: /mnt). laconic-so may be using individual PV mounts. + Verify PV hostPaths match expected paths after deploy. + when: mount_root_check.stdout | default('0') | int < 1 + tags: [deploy] + + - name: Update laconic-so (editable install) + ansible.builtin.shell: | + cd {{ laconic_so_repo }} + git fetch origin + git reset --hard origin/main + changed_when: true tags: [deploy] - name: Start deployment (creates kind cluster + deploys pod) From 14f423ea0c04c624ebaca9be5d6a223bc7402ef1 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:32:20 +0000 Subject: [PATCH 25/62] fix(k8s): read existing resourceVersion/clusterIP before replace K8s PUT (replace) operations require metadata.resourceVersion for optimistic concurrency control. Services additionally have immutable spec.clusterIP that must be preserved from the existing object. On 409 conflict, all _ensure_* methods now read the existing resource first and copy resourceVersion (and clusterIP for Services) into the body before calling replace. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/deploy_k8s.py | 27 ++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index c0272be7..b34e3291 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -202,6 +202,10 @@ class K8sDeployer(Deployer): print(f"ConfigMap created: {resp}") except ApiException as e: if e.status == 409: + existing = self.core_api.read_namespaced_config_map( + name=cfg_map.metadata.name, namespace=self.k8s_namespace + ) + cfg_map.metadata.resource_version = existing.metadata.resource_version resp = self.core_api.replace_namespaced_config_map( name=cfg_map.metadata.name, namespace=self.k8s_namespace, @@ -225,6 +229,13 @@ class K8sDeployer(Deployer): print("Deployment created:") except ApiException as e: if e.status == 409: + existing = self.apps_api.read_namespaced_deployment( + name=deployment.metadata.name, + namespace=self.k8s_namespace, + ) + deployment.metadata.resource_version = ( + existing.metadata.resource_version + ) resp = cast( client.V1Deployment, self.apps_api.replace_namespaced_deployment( @@ -246,7 +257,11 @@ class K8sDeployer(Deployer): print(f"{meta.namespace} {meta.name} {meta.generation} {img}") def _ensure_service(self, service, kind: str = "Service"): - """Create or replace a Service (idempotent).""" + """Create or replace a Service (idempotent). + + Services have immutable fields (spec.clusterIP) that must be + preserved from the existing object on replace. + """ try: resp = self.core_api.create_namespaced_service( namespace=self.k8s_namespace, body=service @@ -255,6 +270,12 @@ class K8sDeployer(Deployer): print(f"{kind} created: {resp}") except ApiException as e: if e.status == 409: + existing = self.core_api.read_namespaced_service( + name=service.metadata.name, namespace=self.k8s_namespace + ) + service.metadata.resource_version = existing.metadata.resource_version + if existing.spec.cluster_ip: + service.spec.cluster_ip = existing.spec.cluster_ip resp = self.core_api.replace_namespaced_service( name=service.metadata.name, namespace=self.k8s_namespace, @@ -275,6 +296,10 @@ class K8sDeployer(Deployer): print(f"Ingress created: {resp}") except ApiException as e: if e.status == 409: + existing = self.networking_api.read_namespaced_ingress( + name=ingress.metadata.name, namespace=self.k8s_namespace + ) + ingress.metadata.resource_version = existing.metadata.resource_version resp = self.networking_api.replace_namespaced_ingress( name=ingress.metadata.name, namespace=self.k8s_namespace, From 63735a9830e8eaaf2a4b786dfe6f99faa8976ad5 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:42:11 +0000 Subject: [PATCH 26/62] fix: revert snapshot_dir, add laconic_so_branch, move kind ramdisk check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Revert snapshot_dir to /srv/solana/snapshots — aria2c runs on the host where this is the direct zvol mount (always available), unlike /srv/kind/solana/snapshots which depends on the bind mount - Add laconic_so_branch variable (default: main) and use it in both git reset commands so the branch can be overridden via -e - Move "Verify ramdisk visible inside kind node" from preflight to after "Wait for deployment to exist" — the kind container may not exist during preflight after teardown Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-redeploy.yml | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 06247e6b..b6f263cd 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -53,10 +53,11 @@ stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave laconic_so: /home/rix/.local/bin/laconic-so laconic_so_repo: /home/rix/stack-orchestrator + laconic_so_branch: main kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" - snapshot_dir: /srv/kind/solana/snapshots + snapshot_dir: /srv/solana/snapshots ledger_dir: /srv/solana/ledger accounts_dir: /srv/solana/ramdisk/accounts ramdisk_mount: /srv/solana/ramdisk @@ -185,24 +186,12 @@ changed_when: false tags: [deploy, preflight] - - name: Verify ramdisk visible inside kind node - ansible.builtin.shell: - cmd: > - set -o pipefail && - docker exec {{ kind_cluster }}-control-plane - df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs - executable: /bin/bash - register: kind_ramdisk_check - failed_when: kind_ramdisk_check.rc != 0 - changed_when: false - tags: [deploy, preflight] - # ---- deploy: sync config, bring up cluster, scale to 0 ------------------ - name: Pull agave-stack repo ansible.builtin.shell: | cd {{ stack_repo }} git fetch origin - git reset --hard origin/main + git reset --hard origin/{{ laconic_so_branch }} changed_when: true tags: [deploy] @@ -237,7 +226,7 @@ ansible.builtin.shell: | cd {{ laconic_so_repo }} git fetch origin - git reset --hard origin/main + git reset --hard origin/{{ laconic_so_branch }} changed_when: true tags: [deploy] @@ -280,6 +269,18 @@ changed_when: false tags: [deploy] + - name: Verify ramdisk visible inside kind node + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_cluster }}-control-plane + df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs + executable: /bin/bash + register: kind_ramdisk_check + failed_when: kind_ramdisk_check.rc != 0 + changed_when: false + tags: [deploy] + - name: Scale validator to 0 (stop before snapshot download) ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} From 9c5b8e3f4e429f6e51b4373088a2cbdfaf89ba38 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 06:56:25 +0000 Subject: [PATCH 27/62] chore: initialize pebbles issue tracker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track stack-orchestrator work items with pebbles (append-only event log). Epic so-076: Stack composition — deploy multiple stacks into one kind cluster with independent lifecycle management per sub-stack. Co-Authored-By: Claude Opus 4.6 --- .pebbles/config.json | 3 +++ .pebbles/events.jsonl | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 .pebbles/config.json create mode 100644 .pebbles/events.jsonl diff --git a/.pebbles/config.json b/.pebbles/config.json new file mode 100644 index 00000000..806dcad9 --- /dev/null +++ b/.pebbles/config.json @@ -0,0 +1,3 @@ +{ + "prefix": "so" +} \ No newline at end of file diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl new file mode 100644 index 00000000..944b62d0 --- /dev/null +++ b/.pebbles/events.jsonl @@ -0,0 +1,15 @@ +{"type":"create","timestamp":"2026-03-08T06:56:07.080584539Z","issue_id":"so-076","payload":{"description":"Currently laconic-so maps one stack to one deployment to one pod. All containers\nin a stack's compose files become containers in a single k8s pod. This means:\n\n- Can't upgrade doublezero without restarting agave-validator\n- Can't restart monitoring without disrupting the validator\n- Can't independently scale or lifecycle-manage components\n\nThe fix is stack composition. A meta-stack (e.g. biscayne-stack) composes\nsub-stacks (agave, doublezero, agave-monitoring), each becoming its own\nk8s Deployment with independent lifecycle.","priority":"2","title":"Stack composition: deploy multiple stacks into one kind cluster","type":"epic"}} +{"type":"create","timestamp":"2026-03-08T06:56:07.551986919Z","issue_id":"so-ab0","payload":{"description":"Add laconic-so deployment prepare that creates cluster infrastructure without pods. Already implemented, needs review.","priority":"2","title":"deployment prepare command","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:56:07.884418759Z","issue_id":"so-04f","payload":{"description":"deployment stop on ANY deployment deletes the shared kind cluster. Should only delete its own namespace.","priority":"2","title":"deployment stop should not destroy shared cluster","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:56:08.253520249Z","issue_id":"so-370","payload":{"description":"Allow stack.yml to reference sub-stacks. Each sub-stack becomes its own k8s Deployment sharing namespace and PVs.","priority":"2","title":"Add stacks: field to stack.yml for composition","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:56:08.646764337Z","issue_id":"so-f7c","payload":{"description":"Create three independent stacks from the monolithic agave-stack. Each gets its own compose file and independent lifecycle.","priority":"2","title":"Split agave-stack into agave + doublezero + monitoring","type":"task"}} +{"type":"rename","timestamp":"2026-03-08T06:56:14.499990161Z","issue_id":"so-ab0","payload":{"new_id":"so-076.1"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:14.499992031Z","issue_id":"so-076.1","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:14.786407752Z","issue_id":"so-04f","payload":{"new_id":"so-076.2"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:14.786409842Z","issue_id":"so-076.2","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:15.058959714Z","issue_id":"so-370","payload":{"new_id":"so-076.3"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:15.058961364Z","issue_id":"so-076.3","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:15.410080785Z","issue_id":"so-f7c","payload":{"new_id":"so-076.4"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:15.410082305Z","issue_id":"so-076.4","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:16.313585082Z","issue_id":"so-076.3","payload":{"dep_type":"blocks","depends_on":"so-076.2"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:16.567629422Z","issue_id":"so-076.4","payload":{"dep_type":"blocks","depends_on":"so-076.3"}} From 974eed0c733324da2b3d844821a7923297843b6b Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 06:56:34 +0000 Subject: [PATCH 28/62] feat: add `deployment prepare` command (so-076.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors K8sDeployer.up() into three composable methods: - _setup_cluster_and_namespace(): kind cluster, API, namespace, ingress - _create_infrastructure(): PVs, PVCs, ConfigMaps, Services, NodePorts - _create_deployment(): Deployment resource (pods) `prepare` calls the first two only — creates all cluster infrastructure without starting pods. This eliminates the scale-to-0 workaround where operators had to run `deployment start` then immediately scale down. Usage: laconic-so deployment --dir prepare Co-Authored-By: Claude Opus 4.6 --- .gitignore | 1 + stack_orchestrator/deploy/deploy.py | 6 ++ stack_orchestrator/deploy/deployer.py | 9 +++ stack_orchestrator/deploy/deployment.py | 22 +++++++ stack_orchestrator/deploy/k8s/deploy_k8s.py | 65 +++++++++++---------- 5 files changed, 73 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 3aaa220b..6abbf941 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__ package stack_orchestrator/data/build_tag.txt /build +.worktrees diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py index f2bf977c..6e914b92 100644 --- a/stack_orchestrator/deploy/deploy.py +++ b/stack_orchestrator/deploy/deploy.py @@ -182,6 +182,12 @@ def status_operation(ctx): ctx.obj.deployer.status() +def prepare_operation(ctx, skip_cluster_management=False): + ctx.obj.deployer.prepare( + skip_cluster_management=skip_cluster_management, + ) + + def update_envs_operation(ctx): ctx.obj.deployer.update_envs() diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py index 11fb6592..b950e29b 100644 --- a/stack_orchestrator/deploy/deployer.py +++ b/stack_orchestrator/deploy/deployer.py @@ -69,6 +69,15 @@ class Deployer(ABC): def run_job(self, job_name: str, release_name: Optional[str] = None): pass + def prepare(self, skip_cluster_management): + """Create cluster infrastructure (namespace, PVs, services) without starting pods. + + Only supported for k8s deployers. Compose deployers raise an error. + """ + raise DeployerException( + "prepare is only supported for k8s deployments" + ) + class DeployerException(Exception): def __init__(self, *args: object) -> None: diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index 1182d23f..0dc9ac37 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -23,6 +23,7 @@ from stack_orchestrator.deploy.images import push_images_operation from stack_orchestrator.deploy.deploy import ( up_operation, down_operation, + prepare_operation, ps_operation, port_operation, status_operation, @@ -125,6 +126,27 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args): up_operation(ctx, services_list, stay_attached, skip_cluster_management) +@command.command() +@click.option( + "--skip-cluster-management/--perform-cluster-management", + default=False, + help="Skip cluster initialization (only for kind-k8s deployments)", +) +@click.pass_context +def prepare(ctx, skip_cluster_management): + """Create cluster infrastructure without starting pods. + + Sets up the kind cluster, namespace, PVs, PVCs, ConfigMaps, Services, + and Ingresses — everything that 'start' does EXCEPT creating the + Deployment resource. No pods will be scheduled. + + Use 'start --skip-cluster-management' afterward to create the Deployment + and start pods when ready. + """ + ctx.obj = make_deploy_context(ctx) + prepare_operation(ctx, skip_cluster_management) + + # TODO: remove legacy up command since it's an alias for stop @command.command() @click.option( diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index b34e3291..1eee8ffd 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -371,22 +371,15 @@ class K8sDeployer(Deployer): self._ensure_config_map(cfg_map) def _create_deployment(self): - # Process compose files into a Deployment + """Create the k8s Deployment resource (which starts pods).""" deployment = self.cluster_info.get_deployment( image_pull_policy=None if self.is_kind() else "Always" ) - # Create the k8s objects if opts.o.debug: print(f"Sending this deployment: {deployment}") if not opts.o.dry_run: self._ensure_deployment(deployment) - service = self.cluster_info.get_service() - if opts.o.debug: - print(f"Sending this service: {service}") - if service and not opts.o.dry_run: - self._ensure_service(service) - def _find_certificate_for_host_name(self, host_name): all_certificates = self.custom_obj_api.list_namespaced_custom_object( group="cert-manager.io", @@ -424,24 +417,25 @@ class K8sDeployer(Deployer): return None def up(self, detach, skip_cluster_management, services): + self._setup_cluster_and_namespace(skip_cluster_management) + self._create_infrastructure() + self._create_deployment() + + def _setup_cluster_and_namespace(self, skip_cluster_management): + """Create kind cluster (if needed) and namespace. Shared by up() and prepare().""" self.skip_cluster_management = skip_cluster_management if not opts.o.dry_run: if self.is_kind() and not self.skip_cluster_management: - # Create the kind cluster (or reuse existing one) kind_config = str( self.deployment_dir.joinpath(constants.kind_config_filename) ) actual_cluster = create_cluster(self.kind_cluster_name, kind_config) if actual_cluster != self.kind_cluster_name: - # An existing cluster was found, use it instead self.kind_cluster_name = actual_cluster - # Only load locally-built images into kind - # Registry images (docker.io, ghcr.io, etc.) will be pulled by k8s local_containers = self.deployment_context.stack.obj.get( "containers", [] ) if local_containers: - # Filter image_set to only images matching local containers local_images = { img for img in self.cluster_info.image_set @@ -449,47 +443,48 @@ class K8sDeployer(Deployer): } if local_images: load_images_into_kind(self.kind_cluster_name, local_images) - # Note: if no local containers defined, all images come from registries self.connect_api() - # Create deployment-specific namespace for resource isolation self._ensure_namespace() if self.is_kind() and not self.skip_cluster_management: - # Configure ingress controller (not installed by default in kind) - # Skip if already running (idempotent for shared cluster) if not is_ingress_running(): install_ingress_for_kind(self.cluster_info.spec.get_acme_email()) - # Wait for ingress to start - # (deployment provisioning will fail unless this is done) wait_for_ingress_in_kind() - # Create RuntimeClass if unlimited_memlock is enabled if self.cluster_info.spec.get_unlimited_memlock(): _create_runtime_class( constants.high_memlock_runtime, constants.high_memlock_runtime, ) - else: print("Dry run mode enabled, skipping k8s API connect") - # Create registry secret if configured + def _create_infrastructure(self): + """Create PVs, PVCs, ConfigMaps, Services, Ingresses, NodePorts. + + Everything except the Deployment resource (which starts pods). + Shared by up() and prepare(). + """ from stack_orchestrator.deploy.deployment_create import create_registry_secret create_registry_secret(self.cluster_info.spec, self.cluster_info.app_name) self._create_volume_data() - self._create_deployment() + + # Create the ClusterIP service (paired with the deployment) + service = self.cluster_info.get_service() + if service and not opts.o.dry_run: + if opts.o.debug: + print(f"Sending this service: {service}") + self._ensure_service(service) http_proxy_info = self.cluster_info.spec.get_http_proxy() - # Note: we don't support tls for kind (enabling tls causes errors) use_tls = http_proxy_info and not self.is_kind() certificate = ( self._find_certificate_for_host_name(http_proxy_info[0]["host-name"]) if use_tls else None ) - if opts.o.debug: - if certificate: - print(f"Using existing certificate: {certificate}") + if opts.o.debug and certificate: + print(f"Using existing certificate: {certificate}") ingress = self.cluster_info.get_ingress( use_tls=use_tls, certificate=certificate @@ -499,9 +494,8 @@ class K8sDeployer(Deployer): print(f"Sending this ingress: {ingress}") if not opts.o.dry_run: self._ensure_ingress(ingress) - else: - if opts.o.debug: - print("No ingress configured") + elif opts.o.debug: + print("No ingress configured") nodeports: List[client.V1Service] = self.cluster_info.get_nodeports() for nodeport in nodeports: @@ -510,6 +504,17 @@ class K8sDeployer(Deployer): if not opts.o.dry_run: self._ensure_service(nodeport, kind="NodePort") + def prepare(self, skip_cluster_management): + """Create cluster infrastructure without starting pods. + + Sets up kind cluster, namespace, PVs, PVCs, ConfigMaps, Services, + Ingresses, and NodePorts — everything that up() does EXCEPT creating + the Deployment resource. + """ + self._setup_cluster_and_namespace(skip_cluster_management) + self._create_infrastructure() + print("Cluster infrastructure prepared (no pods started).") + def down(self, timeout, volumes, skip_cluster_management): self.skip_cluster_management = skip_cluster_management self.connect_api() From 591d158e1f1e7f800cdda1526f45f4d5ea844456 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 06:59:07 +0000 Subject: [PATCH 29/62] chore: populate pebbles with known bugs and feature requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issues: - bar-a3b [P0] agave-validator crash after ~57 seconds - bar-41a [P1] telegraf volume mounts missing from pod spec - bar-02e [P1] zvol mount bug (closed — fixed 2026-03-08) - bar-b04 [P2] update redeploy to use deployment prepare - bar-b41 [P2] snapshot leapfrog recovery playbook - bar-0b4 [P3] prepare-agave unconditionally imports relay playbook Co-Authored-By: Claude Opus 4.6 --- .pebbles/.gitignore | 1 + .pebbles/config.json | 3 +++ .pebbles/events.jsonl | 45 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 .pebbles/.gitignore create mode 100644 .pebbles/config.json create mode 100644 .pebbles/events.jsonl diff --git a/.pebbles/.gitignore b/.pebbles/.gitignore new file mode 100644 index 00000000..0a168c65 --- /dev/null +++ b/.pebbles/.gitignore @@ -0,0 +1 @@ +pebbles.db diff --git a/.pebbles/config.json b/.pebbles/config.json new file mode 100644 index 00000000..88b6e374 --- /dev/null +++ b/.pebbles/config.json @@ -0,0 +1,3 @@ +{ + "prefix": "bar" +} \ No newline at end of file diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl new file mode 100644 index 00000000..7d1397a3 --- /dev/null +++ b/.pebbles/events.jsonl @@ -0,0 +1,45 @@ +{"type":"create","timestamp":"2026-03-06T07:57:55.427398426Z","issue_id":"bar-48f","payload":{"description":"Route all validator traffic (gossip, repair, TVU, TPU) through 137.239.194.65 on laconic-was-sw01 in Ashburn. Supersedes old TVU-only shred relay. See docs/ashburn-validator-relay.md for full design.","priority":"1","title":"Ashburn Full Validator Traffic Relay","type":"epic"}} +{"type":"create","timestamp":"2026-03-06T07:58:01.589463071Z","issue_id":"bar-a47","payload":{"description":"Create Loopback101 (137.239.194.65/32), VALIDATOR-RELAY ACL + traffic-policy on Et1/1, replacing old SHRED-RELAY. Uses 5-min auto-revert config session. Playbook: playbooks/ashburn-relay-was-sw01.yml","priority":"1","title":"was-sw01: Inbound validator relay config","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:07.292140983Z","issue_id":"bar-0e5","payload":{"description":"Add 137.239.194.65/32 to lo, DNAT rules for ports 8001,9000-9025 to kind node 172.20.0.2. Playbook: playbooks/ashburn-relay-biscayne.yml -t inbound","priority":"1","title":"biscayne: Inbound DNAT rules","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:10.838534858Z","issue_id":"bar-f9b","payload":{"description":"Ping 137.239.194.65 from external host, check DNAT counters on biscayne, verify traffic-policy counters on was-sw01.","priority":"1","title":"Verify inbound relay","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:15.228970622Z","issue_id":"bar-bf4","payload":{"description":"Pre-flight to discover GRE tunnel interface, then apply VALIDATOR-OUTBOUND traffic-policy redirecting src 137.239.194.65 to was-sw01 via backbone. Playbook: playbooks/ashburn-relay-mia-sw01.yml","priority":"1","title":"mia-sw01: Outbound validator redirect","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:19.571640837Z","issue_id":"bar-78d","payload":{"description":"fwmark 100 on validator source ports, SNAT to 137.239.194.65, policy route via doublezero0 table ashburn. Playbook: playbooks/ashburn-relay-biscayne.yml -t outbound","priority":"1","title":"biscayne: Outbound SNAT + policy routing","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:23.377441628Z","issue_id":"bar-f3b","payload":{"description":"Verify traffic-policy counters on both switches, iptables counters on biscayne, validator gossip ContactInfo shows 137.239.194.65, repair peer count increases, slot catchup rate improves. Write memory on both switches.","priority":"1","title":"End-to-end verification","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:27.341320984Z","issue_id":"bar-8a9","payload":{"description":"After stable: remove old SHRED-RELAY policy and ACL from was-sw01, remove old 64.92.84.81:20000 DNAT from biscayne.","priority":"2","title":"Cleanup old SHRED-RELAY","type":"task"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.091645662Z","issue_id":"bar-a47","payload":{"new_id":"bar-48f.1"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.091647902Z","issue_id":"bar-48f.1","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.274391159Z","issue_id":"bar-0e5","payload":{"new_id":"bar-48f.2"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.274392749Z","issue_id":"bar-48f.2","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.468426932Z","issue_id":"bar-f9b","payload":{"new_id":"bar-48f.3"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.468428522Z","issue_id":"bar-48f.3","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.657295386Z","issue_id":"bar-bf4","payload":{"new_id":"bar-48f.4"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.657297846Z","issue_id":"bar-48f.4","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.864939519Z","issue_id":"bar-78d","payload":{"new_id":"bar-48f.5"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.864941739Z","issue_id":"bar-48f.5","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:33.364299485Z","issue_id":"bar-f3b","payload":{"new_id":"bar-48f.6"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:33.364301305Z","issue_id":"bar-48f.6","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:33.639638369Z","issue_id":"bar-8a9","payload":{"new_id":"bar-48f.7"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:33.639640069Z","issue_id":"bar-48f.7","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:39.486721446Z","issue_id":"bar-48f.2","payload":{"dep_type":"blocks","depends_on":"bar-48f.1"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:39.911749641Z","issue_id":"bar-48f.3","payload":{"dep_type":"blocks","depends_on":"bar-48f.2"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:40.398532353Z","issue_id":"bar-48f.4","payload":{"dep_type":"blocks","depends_on":"bar-48f.3"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:40.762666046Z","issue_id":"bar-48f.5","payload":{"dep_type":"blocks","depends_on":"bar-48f.4"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:41.173027726Z","issue_id":"bar-48f.6","payload":{"dep_type":"blocks","depends_on":"bar-48f.5"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:41.467313496Z","issue_id":"bar-48f.7","payload":{"dep_type":"blocks","depends_on":"bar-48f.6"}} +{"type":"update","timestamp":"2026-03-06T18:32:00.041874266Z","issue_id":"bar-48f.1","payload":{"description":"Run ansible playbook (pane A) to apply config session with 5-min auto-revert. Review output. In pane B, SSH to install@137.239.200.198 and manually verify (show session-config diffs, show traffic-policy counters). Type 'configure session validator-relay commit' and 'write memory' when satisfied. Playbook: playbooks/ashburn-relay-was-sw01.yml (do NOT use -e commit=true; commit is manual via SSH)."}} +{"type":"update","timestamp":"2026-03-06T18:32:05.861153312Z","issue_id":"bar-48f.4","payload":{"description":"Run ansible playbook pre-flight (pane A) to discover GRE tunnel interface. Then run with -e apply=true -e tunnel_interface=TunnelX for 5-min auto-revert. In pane B, SSH to install@209.42.167.133 and manually verify. Type 'configure session validator-outbound commit' and 'write memory' when satisfied. Playbook: playbooks/ashburn-relay-mia-sw01.yml (do NOT use -e commit=true; commit is manual via SSH)."}} +{"type":"status_update","timestamp":"2026-03-06T18:35:35.320628231Z","issue_id":"bar-48f","payload":{"status":"in_progress"}} +{"type":"status_update","timestamp":"2026-03-06T18:35:35.717040604Z","issue_id":"bar-48f.1","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:12:45.087966093Z","issue_id":"bar-48f.1","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:16:34.00466057Z","issue_id":"bar-48f.2","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:17:18.681131396Z","issue_id":"bar-48f.2","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:17:19.159927405Z","issue_id":"bar-48f.3","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:18:42.42112937Z","issue_id":"bar-48f.3","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:18:42.930237032Z","issue_id":"bar-48f.4","payload":{"status":"in_progress"}} +{"type":"create","timestamp":"2026-03-08T06:58:52.122307149Z","issue_id":"bar-02e","payload":{"description":"/srv/solana is a directory on the ZFS dataset biscayne/DATA/srv (mounted at /srv\nwith overlay=on). The fstab zvol mount at /srv/solana was shadowed by ZFS.\n\nFixed 2026-03-08: removed /srv/solana fstab entries, canonical data path is now\n/srv/kind/solana. All playbooks updated. fstab clean. Mounts verified.","priority":"1","title":"zvol mount: /srv/solana resolves to ZFS dataset, not zvol","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:52.557582445Z","issue_id":"bar-41a","payload":{"description":"laconic-so creates configmap resources for telegraf but does not generate\nvolumeMounts in the pod spec. The telegraf container crashes because\n/etc/telegraf and /scripts are empty. Manual configmap creation works but\nthe volume mounts are still missing. Root cause is in laconic-so's stack\nmigration — configmap volume mount generation is incomplete.","priority":"1","title":"telegraf volume mounts missing from pod spec","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:53.065888933Z","issue_id":"bar-a3b","payload":{"description":"Validator exits shortly after starting. Log shows UDP port reachability checks\nand TCP port checks failing. Needs full log analysis from kind node path\n(/mnt/validator-log/validator.log). May be related to networking/firewall\nconfiguration or the shred relay setup.","priority":"0","title":"agave-validator crash after ~57 seconds","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:53.589221516Z","issue_id":"bar-b04","payload":{"description":"Once laconic-so deployment prepare lands, update biscayne-redeploy.yml to use\nprepare instead of start+scale-to-0 workaround. The deploy tag section should\ncall deployment prepare, and scale-up should call deployment start\n--skip-cluster-management.","priority":"2","title":"update biscayne-redeploy to use deployment prepare","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:58:54.238136989Z","issue_id":"bar-b41","payload":{"description":"Automate the leapfrog recovery strategy documented in CLAUDE.md. When the\nvalidator is stuck in a repair-dependent gap, download a fresh snapshot past\nthe incomplete zone while preserving the existing ledger (which has turbine\nshreds at the tip). Needs: shred completeness check, snapshot slot targeting,\nselective wipe (accounts+snapshots only, keep ledger).","priority":"2","title":"snapshot leapfrog recovery playbook","type":"feature"}} +{"type":"create","timestamp":"2026-03-08T06:58:54.756609299Z","issue_id":"bar-0b4","payload":{"description":"biscayne-prepare-agave.yml unconditionally imports ashburn-relay-biscayne.yml\nat the end. This couples filesystem preparation to relay setup. The relay\nplaybook fails if the kind node isn't running (ping to 172.20.0.2 fails).\nShould be a separate playbook invocation, not an import.","priority":"3","title":"biscayne-prepare-agave imports ashburn-relay-biscayne unconditionally","type":"bug"}} +{"type":"close","timestamp":"2026-03-08T06:59:00.140156099Z","issue_id":"bar-02e","payload":{}} From b2342bc5391d4a7839cfbc2662aad877cd8cff42 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 18:43:41 +0000 Subject: [PATCH 30/62] fix: switch ramdisk from /dev/ram0 to tmpfs, refactor snapshot-download.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /dev/ram0 + XFS + format-ramdisk.service approach was unnecessary complexity from a migration confusion — there was no actual tmpfs bug with io_uring. tmpfs is simpler (no format-on-boot), resizable on the fly, and what every other Solana operator uses. Changes: - prepare-agave: remove format-ramdisk.service and ramdisk-accounts.service, use tmpfs fstab entry with size=1024G (was 600G /dev/ram0, too small) - recover: remove ramdisk_device var (no longer needed) - redeploy: wipe accounts by rm -rf instead of umount+mkfs - snapshot-download.py: extract download_best_snapshot() public API for use by the new container entrypoint.py (in agave-stack) - CLAUDE.md: update ramdisk docs, fix /srv/solana → /srv/kind/solana paths - health-check: fix ramdisk path references Co-Authored-By: Claude Opus 4.6 --- .gitignore | 1 + CLAUDE.md | 41 ++-- playbooks/biscayne-prepare-agave.yml | 231 +++++++------------- playbooks/biscayne-recover.yml | 7 +- playbooks/biscayne-redeploy.yml | 17 +- playbooks/biscayne-start.yml | 2 +- playbooks/health-check.yml | 4 +- scripts/snapshot-download.py | 313 +++++++++++++++++---------- 8 files changed, 314 insertions(+), 302 deletions(-) diff --git a/.gitignore b/.gitignore index 06aea24a..220c6a36 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .venv/ sessions.duckdb sessions.duckdb.wal +.worktrees diff --git a/CLAUDE.md b/CLAUDE.md index 49fb6be9..6fb2164c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,9 +14,8 @@ below it are correct. Playbooks belong to exactly one layer. | 5. Deploy agave | Deploy agave-stack into kind, snapshot download, scale up | `biscayne-redeploy.yml` (snapshot/verify tags), `biscayne-recover.yml` | **Layer 4 invariants** (asserted by `biscayne-prepare-agave.yml`): -- `/srv/solana` is XFS on a zvol — agave uses io_uring which deadlocks on ZFS -- `/srv/solana/ramdisk` is XFS on `/dev/ram0` — accounts must be on ramdisk -- `/srv/kind/solana` is an rbind of `/srv/solana` — makes the zvol visible to kind at `/mnt/solana` +- `/srv/kind/solana` is XFS on a zvol — agave uses io_uring which deadlocks on ZFS. `/srv/solana` is NOT the zvol (it's a ZFS dataset directory); never use it for data paths +- `/srv/kind/solana/ramdisk` is tmpfs (1TB) — accounts must be in RAM These invariants are checked at runtime and persisted to fstab/systemd so they survive reboot. They are agave's requirements reaching into the boot sequence, @@ -58,18 +57,13 @@ Correct shutdown sequence: ### Ramdisk -The accounts directory must be on a ramdisk for performance. `/dev/ram0` loses its -filesystem on reboot and must be reformatted before mounting. +The accounts directory must be in RAM for performance. tmpfs is used instead of +`/dev/ram0` — simpler (no format-on-boot service needed), resizable on the fly +with `mount -o remount,size=`, and what most Solana operators use. -**Boot ordering is handled by systemd units** (installed by `biscayne-prepare-agave.yml`): -- `format-ramdisk.service`: runs `mkfs.xfs -f /dev/ram0` before `local-fs.target` -- fstab entry: mounts `/dev/ram0` at `/srv/solana/ramdisk` with - `x-systemd.requires=format-ramdisk.service` -- `ramdisk-accounts.service`: creates `/srv/solana/ramdisk/accounts` and sets - ownership after the mount - -These units run before docker, so the kind node's bind mounts always see the -ramdisk. **No manual intervention is needed after reboot.** +**Boot ordering**: fstab entry mounts tmpfs at `/srv/kind/solana/ramdisk` with +`x-systemd.requires=srv-kind-solana.mount`. tmpfs mounts natively via fstab — +no systemd format service needed. **No manual intervention after reboot.** **Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt` at container start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts @@ -139,10 +133,11 @@ kind node via a single bind mount. - Deployment: `laconic-70ce4c4b47e23b85-deployment` - Kind node container: `laconic-70ce4c4b47e23b85-control-plane` - Deployment dir: `/srv/deployments/agave` -- Snapshot dir: `/srv/solana/snapshots` -- Ledger dir: `/srv/solana/ledger` -- Accounts dir: `/srv/solana/ramdisk/accounts` -- Log dir: `/srv/solana/log` +- Snapshot dir: `/srv/kind/solana/snapshots` (on zvol, visible to kind at `/mnt/validator-snapshots`) +- Ledger dir: `/srv/kind/solana/ledger` (on zvol, visible to kind at `/mnt/validator-ledger`) +- Accounts dir: `/srv/kind/solana/ramdisk/accounts` (on ramdisk `/dev/ram0`, visible to kind at `/mnt/validator-accounts`) +- Log dir: `/srv/kind/solana/log` (on zvol, visible to kind at `/mnt/validator-log`) +- **WARNING**: `/srv/solana` is a ZFS dataset directory, NOT the zvol. Never use it for data paths. - Host bind mount root: `/srv/kind` -> kind node `/mnt` - laconic-so: `/home/rix/.local/bin/laconic-so` (editable install) @@ -150,10 +145,10 @@ kind node via a single bind mount. | PV Name | hostPath | |----------------------|-------------------------------| -| validator-snapshots | /mnt/solana/snapshots | -| validator-ledger | /mnt/solana/ledger | -| validator-accounts | /mnt/solana/ramdisk/accounts | -| validator-log | /mnt/solana/log | +| validator-snapshots | /mnt/validator-snapshots | +| validator-ledger | /mnt/validator-ledger | +| validator-accounts | /mnt/validator-accounts | +| validator-log | /mnt/validator-log | ### Snapshot Freshness @@ -164,7 +159,7 @@ try to catch up from an old snapshot — it will take too long and may never con Check with: ``` # Snapshot slot (from filename) -ls /srv/solana/snapshots/snapshot-*.tar.* +ls /srv/kind/solana/snapshots/snapshot-*.tar.* # Current mainnet slot curl -s -X POST -H "Content-Type: application/json" \ diff --git a/playbooks/biscayne-prepare-agave.yml b/playbooks/biscayne-prepare-agave.yml index a817f946..8f5944c4 100644 --- a/playbooks/biscayne-prepare-agave.yml +++ b/playbooks/biscayne-prepare-agave.yml @@ -10,26 +10,18 @@ # # Agave requires three things from the host that kind doesn't provide: # -# Invariant 1: /srv/solana is XFS on a zvol (not ZFS) +# Invariant 1: /srv/kind/solana is XFS on a zvol (not ZFS) # Why: agave uses io_uring for async I/O. io_uring workers deadlock on # ZFS datasets (D-state in dsl_dir_tempreserve_space). XFS on a zvol -# (block device) works fine. This is why the data lives on a zvol, not -# a ZFS dataset. -# Persisted as: fstab entry mounting /dev/zvol/.../solana at /srv/solana +# (block device) works fine. /srv/solana is NOT the zvol — it's a +# directory on the ZFS dataset biscayne/DATA/srv. All data paths must +# use /srv/kind/solana which is the actual zvol mount. +# Persisted as: fstab entry mounting /dev/zvol/.../solana at /srv/kind/solana # -# Invariant 2: /srv/solana/ramdisk is XFS on /dev/ram0 (600G ramdisk) -# Why: agave accounts must be on ramdisk for performance. /dev/ram0 -# loses its filesystem on reboot, so it must be reformatted before -# mounting each boot. -# Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry -# -# Invariant 3: /srv/kind/solana is XFS (zvol) and /srv/kind/solana/ramdisk is XFS (ram0) -# Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference -# /mnt/solana/*. An rbind of /srv/solana does NOT work because ZFS's -# shared propagation (shared:75 on /srv) overlays ZFS on top of the bind. -# Direct device mounts bypass propagation entirely. -# Persisted as: two fstab entries — zvol at /srv/kind/solana, ram0 at -# /srv/kind/solana/ramdisk, both with x-systemd.requires ordering +# Invariant 2: /srv/kind/solana/ramdisk is tmpfs (1TB) +# Why: agave accounts must be in RAM for performance. tmpfs survives +# process restarts but not host reboots (same as /dev/ram0 but simpler). +# Persisted as: fstab entry (no format service needed) # # This playbook checks each invariant and only acts if it's not met. # Idempotent — safe to run multiple times. @@ -42,132 +34,76 @@ gather_facts: false become: true vars: - ramdisk_device: /dev/ram0 zvol_device: /dev/zvol/biscayne/DATA/volumes/solana - solana_dir: /srv/solana - ramdisk_mount: /srv/solana/ramdisk kind_solana_dir: /srv/kind/solana - accounts_dir: /srv/solana/ramdisk/accounts + ramdisk_mount: /srv/kind/solana/ramdisk + ramdisk_size: 1024G + accounts_dir: /srv/kind/solana/ramdisk/accounts deployment_dir: /srv/deployments/agave - kind_ramdisk_opts: "noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service,x-systemd.requires=srv-kind-solana.mount" tasks: - # ---- systemd units ---------------------------------------------------------- - - name: Install ramdisk format service - ansible.builtin.copy: - dest: /etc/systemd/system/format-ramdisk.service - mode: "0644" - content: | - [Unit] - Description=Format /dev/ram0 as XFS for Solana accounts - DefaultDependencies=no - Before=local-fs.target - After=systemd-modules-load.service - ConditionPathExists={{ ramdisk_device }} + # ---- cleanup legacy ramdisk services ----------------------------------------- + - name: Stop and disable legacy ramdisk services + ansible.builtin.systemd: + name: "{{ item }}" + state: stopped + enabled: false + loop: + - format-ramdisk.service + - ramdisk-accounts.service + failed_when: false - [Service] - Type=oneshot - RemainAfterExit=yes - ExecStart=/sbin/mkfs.xfs -f {{ ramdisk_device }} - - [Install] - WantedBy=local-fs.target - register: unit_file - - - name: Install ramdisk post-mount service - ansible.builtin.copy: - dest: /etc/systemd/system/ramdisk-accounts.service - mode: "0644" - content: | - [Unit] - Description=Create Solana accounts directory on ramdisk - After=srv-solana-ramdisk.mount - Requires=srv-solana-ramdisk.mount - - [Service] - Type=oneshot - RemainAfterExit=yes - ExecStart=/bin/bash -c 'mkdir -p {{ accounts_dir }} && chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }}' - - [Install] - WantedBy=multi-user.target - register: accounts_unit + - name: Remove legacy ramdisk service files + ansible.builtin.file: + path: "/etc/systemd/system/{{ item }}" + state: absent + loop: + - format-ramdisk.service + - ramdisk-accounts.service + register: legacy_units_removed # ---- fstab entries ---------------------------------------------------------- - - name: Ensure zvol fstab entry + # /srv/solana is NOT the zvol — it's a directory on the ZFS dataset. + # All data paths use /srv/kind/solana (the actual zvol mount). + - name: Remove stale /srv/solana zvol fstab entry ansible.builtin.lineinfile: path: /etc/fstab - regexp: '^\S+\s+{{ solana_dir }}\s' - line: '{{ zvol_device }} {{ solana_dir }} xfs defaults 0 2' - register: fstab_zvol + regexp: '^\S+\s+/srv/solana\s+xfs' + state: absent - - name: Ensure ramdisk fstab entry + - name: Remove stale /srv/solana/ramdisk fstab entry ansible.builtin.lineinfile: path: /etc/fstab - regexp: '^{{ ramdisk_device }}\s+{{ ramdisk_mount }}\s' - line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' - register: fstab_ramdisk + regexp: '^/dev/ram0\s+' + state: absent - # Direct device mounts at /srv/kind/solana — bypasses ZFS shared propagation. - # An rbind of /srv/solana fails because ZFS's shared:75 on /srv overlays - # ZFS on top of any bind mount under /srv. Direct device mounts avoid this. - - name: Ensure kind zvol fstab entry - ansible.builtin.lineinfile: - path: /etc/fstab - regexp: '^\S+\s+{{ kind_solana_dir }}\s' - line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0' - register: fstab_kind - - - name: Ensure kind ramdisk fstab entry - ansible.builtin.lineinfile: - path: /etc/fstab - regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s' - line: "{{ ramdisk_device }} {{ kind_solana_dir }}/ramdisk xfs {{ kind_ramdisk_opts }} 0 0" - register: fstab_kind_ramdisk - - # Remove stale rbind fstab entry from previous approach - name: Remove stale kind rbind fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind' state: absent - register: fstab_stale_rbind - # ---- reload and enable ------------------------------------------------------ + - name: Ensure zvol fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ kind_solana_dir }}\s' + line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0' + register: fstab_zvol + + - name: Ensure tmpfs ramdisk fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ ramdisk_mount }}\s' + line: "tmpfs {{ ramdisk_mount }} tmpfs nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }},nofail,x-systemd.requires=srv-kind-solana.mount 0 0" + register: fstab_ramdisk + + # ---- reload systemd if anything changed -------------------------------------- - name: Reload systemd ansible.builtin.systemd: daemon_reload: true - when: >- - unit_file.changed or accounts_unit.changed or - fstab_zvol.changed or fstab_ramdisk.changed or - fstab_kind.changed or fstab_kind_ramdisk.changed or - fstab_stale_rbind.changed + when: legacy_units_removed.changed or fstab_zvol.changed or fstab_ramdisk.changed - - name: Enable ramdisk services - ansible.builtin.systemd: - name: "{{ item }}" - enabled: true - loop: - - format-ramdisk.service - - ramdisk-accounts.service - - # ---- apply now if ramdisk not mounted -------------------------------------- - - name: Check if ramdisk is mounted - ansible.builtin.command: mountpoint -q {{ ramdisk_mount }} - register: ramdisk_mounted - failed_when: false - changed_when: false - - - name: Format and mount ramdisk now - ansible.builtin.shell: | - mkfs.xfs -f {{ ramdisk_device }} - mount {{ ramdisk_mount }} - mkdir -p {{ accounts_dir }} - chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} - changed_when: ramdisk_mounted.rc != 0 - when: ramdisk_mounted.rc != 0 - - # ---- apply kind device mounts now if not correct ---------------------------- + # ---- apply device mounts now if not correct ---------------------------------- - name: Check kind zvol mount is XFS ansible.builtin.shell: cmd: > @@ -178,16 +114,16 @@ failed_when: false changed_when: false - - name: Unmount stale kind mounts + - name: Unmount stale mounts ansible.builtin.shell: cmd: | - umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true + umount {{ ramdisk_mount }} 2>/dev/null || true umount {{ kind_solana_dir }} 2>/dev/null || true executable: /bin/bash changed_when: kind_zvol_check.rc != 0 when: kind_zvol_check.rc != 0 - - name: Mount zvol at kind solana dir + - name: Mount zvol ansible.posix.mount: path: "{{ kind_solana_dir }}" src: "{{ zvol_device }}" @@ -195,24 +131,32 @@ state: mounted when: kind_zvol_check.rc != 0 - - name: Check kind ramdisk mount is XFS + - name: Check ramdisk mount is tmpfs ansible.builtin.shell: cmd: > set -o pipefail && - findmnt -n -o FSTYPE {{ kind_solana_dir }}/ramdisk | grep -q xfs + findmnt -n -o FSTYPE {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash - register: kind_ramdisk_check + register: ramdisk_check failed_when: false changed_when: false - - name: Mount ramdisk at kind solana ramdisk dir + - name: Mount tmpfs ramdisk ansible.posix.mount: - path: "{{ kind_solana_dir }}/ramdisk" - src: "{{ ramdisk_device }}" - fstype: xfs - opts: noatime,nodiratime + path: "{{ ramdisk_mount }}" + src: tmpfs + fstype: tmpfs + opts: "nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }}" state: mounted - when: kind_ramdisk_check.rc != 0 + when: ramdisk_check.rc != 0 + + - name: Create accounts directory + ansible.builtin.file: + path: "{{ accounts_dir }}" + state: directory + owner: solana + group: solana + mode: "0755" # Docker requires shared propagation on mounts it bind-mounts into # containers. Without this, `docker start` fails with "not a shared @@ -227,36 +171,24 @@ changed_when: false # ---- verify ----------------------------------------------------------------- - - name: Verify ramdisk is XFS - ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs - executable: /bin/bash - changed_when: false - - name: Verify zvol is XFS - ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ solana_dir }} | grep -q xfs - executable: /bin/bash - changed_when: false - - - name: Verify kind zvol is XFS ansible.builtin.shell: cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs executable: /bin/bash changed_when: false - - name: Verify kind ramdisk is XFS + - name: Verify ramdisk is tmpfs ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ kind_solana_dir }}/ramdisk | grep -q xfs + cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash changed_when: false - - name: Verify kind mount contents + - name: Verify mount contents ansible.builtin.shell: cmd: > set -o pipefail && ls {{ kind_solana_dir }}/ledger {{ kind_solana_dir }}/snapshots - {{ kind_solana_dir }}/ramdisk/accounts 2>&1 | head -5 + {{ ramdisk_mount }}/accounts 2>&1 | head -5 executable: /bin/bash register: kind_mount_verify changed_when: false @@ -273,13 +205,12 @@ register: cluster_id_result changed_when: false - - name: Check kind node XFS visibility + - name: Check kind node filesystem visibility ansible.builtin.shell: cmd: > set -o pipefail && docker exec {{ cluster_id_result.stdout }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts - | grep -c xfs executable: /bin/bash register: kind_fstype changed_when: false @@ -289,7 +220,7 @@ ansible.builtin.debug: msg: kind_mount: "{{ kind_mount_verify.stdout_lines }}" - kind_fstype: "{{ 'xfs (correct)' if kind_fstype.stdout | default('0') | int >= 2 else 'NOT XFS — kind restart required' }}" + kind_fstype: "{{ kind_fstype.stdout_lines | default([]) }}" - name: Configure Ashburn validator relay ansible.builtin.import_playbook: ashburn-relay-biscayne.yml diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index f8b9a89e..07388207 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -33,10 +33,9 @@ kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" - snapshot_dir: /srv/solana/snapshots - accounts_dir: /srv/solana/ramdisk/accounts - ramdisk_mount: /srv/solana/ramdisk - ramdisk_device: /dev/ram0 + snapshot_dir: /srv/kind/solana/snapshots + accounts_dir: /srv/kind/solana/ramdisk/accounts + ramdisk_mount: /srv/kind/solana/ramdisk snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" snapshot_script: /tmp/snapshot-download.py snapshot_args: "" diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index b6f263cd..608ec328 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -57,11 +57,11 @@ kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" - snapshot_dir: /srv/solana/snapshots - ledger_dir: /srv/solana/ledger - accounts_dir: /srv/solana/ramdisk/accounts - ramdisk_mount: /srv/solana/ramdisk - ramdisk_device: /dev/ram0 + snapshot_dir: /srv/kind/solana/snapshots + ledger_dir: /srv/kind/solana/ledger + accounts_dir: /srv/kind/solana/ramdisk/accounts + ramdisk_mount: /srv/kind/solana/ramdisk + ramdisk_size: 1024G snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" snapshot_script: /tmp/snapshot-download.py # Flags — non-destructive by default @@ -139,12 +139,9 @@ when: wipe_ledger | bool tags: [wipe] - - name: Wipe accounts ramdisk (umount + mkfs.xfs + mount) + - name: Wipe accounts ramdisk ansible.builtin.shell: | - set -o pipefail - mountpoint -q {{ ramdisk_mount }} && umount {{ ramdisk_mount }} || true - mkfs.xfs -f {{ ramdisk_device }} - mount {{ ramdisk_mount }} + rm -rf {{ accounts_dir }}/* mkdir -p {{ accounts_dir }} chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} become: true diff --git a/playbooks/biscayne-start.yml b/playbooks/biscayne-start.yml index 36220f4f..6c85699d 100644 --- a/playbooks/biscayne-start.yml +++ b/playbooks/biscayne-start.yml @@ -6,7 +6,7 @@ # # Prerequisites: # - biscayne-prepare-agave.yml has been run (fstab entries, systemd units) -# - A snapshot exists in /srv/solana/snapshots (or use biscayne-recover.yml) +# - A snapshot exists in /srv/kind/solana/snapshots (or use biscayne-recover.yml) # # Usage: # ansible-playbook playbooks/biscayne-start.yml diff --git a/playbooks/health-check.yml b/playbooks/health-check.yml index 55a5db34..cba75c2e 100644 --- a/playbooks/health-check.yml +++ b/playbooks/health-check.yml @@ -211,7 +211,7 @@ # ------------------------------------------------------------------ - name: Check ramdisk usage ansible.builtin.command: - cmd: df -h /srv/solana/ramdisk + cmd: df -h /srv/kind/solana/ramdisk register: ramdisk_df changed_when: false failed_when: false @@ -238,7 +238,7 @@ cmd: > set -o pipefail && findmnt -n -o TARGET,SOURCE,FSTYPE,PROPAGATION - /srv/solana /srv/solana/ramdisk /srv/kind/solana 2>&1 + /srv/kind/solana /srv/kind/solana/ramdisk 2>&1 executable: /bin/bash register: host_mounts changed_when: false diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py index a8caddfc..c19830fe 100755 --- a/scripts/snapshot-download.py +++ b/scripts/snapshot-download.py @@ -9,8 +9,8 @@ Based on the discovery approach from etcusr/solana-snapshot-finder but replaces the single-connection wget download with aria2c parallel chunked downloads. Usage: - # Download to /srv/solana/snapshots (mainnet, 16 connections) - ./snapshot-download.py -o /srv/solana/snapshots + # Download to /srv/kind/solana/snapshots (mainnet, 16 connections) + ./snapshot-download.py -o /srv/kind/solana/snapshots # Dry run — find best source, print URL ./snapshot-download.py --dry-run @@ -43,7 +43,6 @@ import urllib.request from dataclasses import dataclass, field from http.client import HTTPResponse from pathlib import Path -from typing import NoReturn from urllib.request import Request log: logging.Logger = logging.getLogger("snapshot-download") @@ -192,16 +191,12 @@ def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: def probe_rpc_snapshot( rpc_address: str, current_slot: int, - max_age_slots: int, - max_latency_ms: float, ) -> SnapshotSource | None: """Probe a single RPC node for available snapshots. - Probes for full snapshot first (required), then incremental. Records all - available files. Which files to actually download is decided at download - time based on what already exists locally — not here. - - Based on the discovery approach from etcusr/solana-snapshot-finder. + Discovery only — no filtering. Returns a SnapshotSource with all available + info so the caller can decide what to keep. Filtering happens after all + probes complete, so rejected sources are still visible for debugging. """ full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" @@ -211,8 +206,6 @@ def probe_rpc_snapshot( return None latency_ms: float = full_latency * 1000 - if latency_ms > max_latency_ms: - return None full_filename, full_path = _parse_snapshot_filename(full_location) fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) @@ -222,9 +215,6 @@ def probe_rpc_snapshot( full_snap_slot: int = int(fm.group(1)) slots_diff: int = current_slot - full_snap_slot - if slots_diff > max_age_slots or slots_diff < -100: - return None - file_paths: list[str] = [full_path] # Also check for incremental snapshot @@ -255,7 +245,11 @@ def discover_sources( threads: int, version_filter: str | None, ) -> list[SnapshotSource]: - """Discover all snapshot sources from the cluster.""" + """Discover all snapshot sources, then filter. + + Probing and filtering are separate: all reachable sources are collected + first so we can report what exists even if filters reject everything. + """ rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) if not rpc_nodes: log.error("No RPC nodes found via getClusterNodes") @@ -263,31 +257,59 @@ def discover_sources( log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) - sources: list[SnapshotSource] = [] + all_sources: list[SnapshotSource] = [] with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { - pool.submit( - probe_rpc_snapshot, addr, current_slot, - max_age_slots, max_latency_ms, - ): addr + pool.submit(probe_rpc_snapshot, addr, current_slot): addr for addr in rpc_nodes } done: int = 0 for future in concurrent.futures.as_completed(futures): done += 1 if done % 200 == 0: - log.info(" probed %d/%d nodes, %d sources found", - done, len(rpc_nodes), len(sources)) + log.info(" probed %d/%d nodes, %d reachable", + done, len(rpc_nodes), len(all_sources)) try: result: SnapshotSource | None = future.result() except (urllib.error.URLError, OSError, TimeoutError) as e: log.debug("Probe failed for %s: %s", futures[future], e) continue if result: - sources.append(result) + all_sources.append(result) - log.info("Found %d RPC nodes with suitable snapshots", len(sources)) - return sources + log.info("Discovered %d reachable sources", len(all_sources)) + + # Apply filters + filtered: list[SnapshotSource] = [] + rejected_age: int = 0 + rejected_latency: int = 0 + for src in all_sources: + if src.slots_diff > max_age_slots or src.slots_diff < -100: + rejected_age += 1 + continue + if src.latency_ms > max_latency_ms: + rejected_latency += 1 + continue + filtered.append(src) + + if rejected_age or rejected_latency: + log.info("Filtered: %d rejected by age (>%d slots), %d by latency (>%.0fms)", + rejected_age, max_age_slots, rejected_latency, max_latency_ms) + + if not filtered and all_sources: + # Show what was available so the user can adjust filters + all_sources.sort(key=lambda s: s.slots_diff) + best = all_sources[0] + log.warning("All %d sources rejected by filters. Best available: " + "%s (age=%d slots, latency=%.0fms). " + "Try --max-snapshot-age %d --max-latency %.0f", + len(all_sources), best.rpc_address, + best.slots_diff, best.latency_ms, + best.slots_diff + 500, + max(best.latency_ms * 1.5, 500)) + + log.info("Found %d sources after filtering", len(filtered)) + return filtered # -- Speed benchmark ----------------------------------------------------------- @@ -336,7 +358,7 @@ def download_aria2c( cmd: list[str] = [ "aria2c", "--file-allocation=none", - "--continue=true", + "--continue=false", f"--max-connection-per-server={connections}", f"--split={total_splits}", "--min-split-size=50M", @@ -380,97 +402,74 @@ def download_aria2c( return True -# -- Main ---------------------------------------------------------------------- +# -- Public API ---------------------------------------------------------------- -def main() -> int: - p: argparse.ArgumentParser = argparse.ArgumentParser( - description="Download Solana snapshots with aria2c parallel downloads", - ) - p.add_argument("-o", "--output", default="/srv/solana/snapshots", - help="Snapshot output directory (default: /srv/solana/snapshots)") - p.add_argument("-c", "--cluster", default="mainnet-beta", - choices=list(CLUSTER_RPC), - help="Solana cluster (default: mainnet-beta)") - p.add_argument("-r", "--rpc", default=None, - help="RPC URL for cluster discovery (default: public RPC)") - p.add_argument("-n", "--connections", type=int, default=16, - help="aria2c connections per download (default: 16)") - p.add_argument("-t", "--threads", type=int, default=500, - help="Threads for parallel RPC probing (default: 500)") - p.add_argument("--max-snapshot-age", type=int, default=1300, - help="Max snapshot age in slots (default: 1300)") - p.add_argument("--max-latency", type=float, default=100, - help="Max RPC probe latency in ms (default: 100)") - p.add_argument("--min-download-speed", type=int, default=20, - help="Min download speed in MiB/s (default: 20)") - p.add_argument("--measurement-time", type=int, default=7, - help="Speed measurement duration in seconds (default: 7)") - p.add_argument("--max-speed-checks", type=int, default=15, - help="Max nodes to benchmark before giving up (default: 15)") - p.add_argument("--version", default=None, - help="Filter nodes by version prefix (e.g. '2.2')") - p.add_argument("--full-only", action="store_true", - help="Download only full snapshot, skip incremental") - p.add_argument("--dry-run", action="store_true", - help="Find best source and print URL, don't download") - p.add_argument("-v", "--verbose", action="store_true") - args: argparse.Namespace = p.parse_args() +def download_best_snapshot( + output_dir: str, + *, + cluster: str = "mainnet-beta", + rpc_url: str | None = None, + connections: int = 16, + threads: int = 500, + max_snapshot_age: int = 10000, + max_latency: float = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, + full_only: bool = False, +) -> bool: + """Download the best available snapshot to output_dir. - logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", - datefmt="%H:%M:%S", - ) + Programmatic API for use by entrypoint.py or other callers. + Returns True on success, False on failure. + """ + resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] - rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] - - # aria2c is required for actual downloads (not dry-run) - if not args.dry_run and not shutil.which("aria2c"): + if not shutil.which("aria2c"): log.error("aria2c not found. Install with: apt install aria2") - return 1 + return False - # Get current slot - log.info("Cluster: %s | RPC: %s", args.cluster, rpc_url) - current_slot: int | None = get_current_slot(rpc_url) + log.info("Cluster: %s | RPC: %s", cluster, resolved_rpc) + current_slot: int | None = get_current_slot(resolved_rpc) if current_slot is None: - log.error("Cannot get current slot from %s", rpc_url) - return 1 + log.error("Cannot get current slot from %s", resolved_rpc) + return False log.info("Current slot: %d", current_slot) - # Discover sources sources: list[SnapshotSource] = discover_sources( - rpc_url, current_slot, - max_age_slots=args.max_snapshot_age, - max_latency_ms=args.max_latency, - threads=args.threads, - version_filter=args.version, + resolved_rpc, current_slot, + max_age_slots=max_snapshot_age, + max_latency_ms=max_latency, + threads=threads, + version_filter=version_filter, ) if not sources: log.error("No snapshot sources found") - return 1 + return False # Sort by latency (lowest first) for speed benchmarking sources.sort(key=lambda s: s.latency_ms) - # Benchmark top candidates — all speeds in MiB/s (binary, 1 MiB = 1048576 bytes) - log.info("Benchmarking download speed on top %d sources...", args.max_speed_checks) + # Benchmark top candidates + log.info("Benchmarking download speed on top %d sources...", max_speed_checks) fast_sources: list[SnapshotSource] = [] checked: int = 0 - min_speed_bytes: int = args.min_download_speed * 1024 * 1024 # MiB to bytes + min_speed_bytes: int = min_download_speed * 1024 * 1024 for source in sources: - if checked >= args.max_speed_checks: + if checked >= max_speed_checks: break checked += 1 - speed: float = measure_speed(source.rpc_address, args.measurement_time) + speed: float = measure_speed(source.rpc_address, measurement_time) source.download_speed = speed speed_mib: float = speed / (1024 ** 2) if speed < min_speed_bytes: log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", - source.rpc_address, speed_mib, args.min_download_speed) + source.rpc_address, speed_mib, min_download_speed) continue log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", @@ -480,19 +479,17 @@ def main() -> int: if not fast_sources: log.error("No source met minimum speed requirement (%d MiB/s)", - args.min_download_speed) - log.info("Try: --min-download-speed 10") - return 1 + min_download_speed) + return False # Use the fastest source as primary, collect mirrors for each file best: SnapshotSource = fast_sources[0] file_paths: list[str] = best.file_paths - if args.full_only: + if full_only: file_paths = [fp for fp in file_paths if fp.rsplit("/", 1)[-1].startswith("snapshot-")] - # Build mirror URL lists: for each file, collect URLs from all fast sources - # that serve the same filename + # Build mirror URL lists download_plan: list[tuple[str, list[str]]] = [] for fp in file_paths: filename: str = fp.rsplit("/", 1)[-1] @@ -509,37 +506,129 @@ def main() -> int: best.rpc_address, speed_mib, len(fast_sources)) for filename, mirror_urls in download_plan: log.info(" %s (%d mirrors)", filename, len(mirror_urls)) - for url in mirror_urls: - log.info(" %s", url) - if args.dry_run: - for _, mirror_urls in download_plan: - for url in mirror_urls: - print(url) - return 0 - - # Download — skip files that already exist locally - os.makedirs(args.output, exist_ok=True) + # Download + os.makedirs(output_dir, exist_ok=True) total_start: float = time.monotonic() for filename, mirror_urls in download_plan: - filepath: Path = Path(args.output) / filename + filepath: Path = Path(output_dir) / filename if filepath.exists() and filepath.stat().st_size > 0: log.info("Skipping %s (already exists: %.1f GB)", filename, filepath.stat().st_size / (1024 ** 3)) continue - if not download_aria2c(mirror_urls, args.output, filename, args.connections): + if not download_aria2c(mirror_urls, output_dir, filename, connections): log.error("Failed to download %s", filename) - return 1 + return False total_elapsed: float = time.monotonic() - total_start log.info("All downloads complete in %.0fs", total_elapsed) for filename, _ in download_plan: - fp: Path = Path(args.output) / filename - if fp.exists(): - log.info(" %s (%.1f GB)", fp.name, fp.stat().st_size / (1024 ** 3)) + fp_path: Path = Path(output_dir) / filename + if fp_path.exists(): + log.info(" %s (%.1f GB)", fp_path.name, fp_path.stat().st_size / (1024 ** 3)) - return 0 + return True + + +# -- Main (CLI) ---------------------------------------------------------------- + + +def main() -> int: + p: argparse.ArgumentParser = argparse.ArgumentParser( + description="Download Solana snapshots with aria2c parallel downloads", + ) + p.add_argument("-o", "--output", default="/srv/kind/solana/snapshots", + help="Snapshot output directory (default: /srv/kind/solana/snapshots)") + p.add_argument("-c", "--cluster", default="mainnet-beta", + choices=list(CLUSTER_RPC), + help="Solana cluster (default: mainnet-beta)") + p.add_argument("-r", "--rpc", default=None, + help="RPC URL for cluster discovery (default: public RPC)") + p.add_argument("-n", "--connections", type=int, default=16, + help="aria2c connections per download (default: 16)") + p.add_argument("-t", "--threads", type=int, default=500, + help="Threads for parallel RPC probing (default: 500)") + p.add_argument("--max-snapshot-age", type=int, default=10000, + help="Max snapshot age in slots (default: 10000)") + p.add_argument("--max-latency", type=float, default=500, + help="Max RPC probe latency in ms (default: 500)") + p.add_argument("--min-download-speed", type=int, default=20, + help="Min download speed in MiB/s (default: 20)") + p.add_argument("--measurement-time", type=int, default=7, + help="Speed measurement duration in seconds (default: 7)") + p.add_argument("--max-speed-checks", type=int, default=15, + help="Max nodes to benchmark before giving up (default: 15)") + p.add_argument("--version", default=None, + help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--full-only", action="store_true", + help="Download only full snapshot, skip incremental") + p.add_argument("--dry-run", action="store_true", + help="Find best source and print URL, don't download") + p.add_argument("--post-cmd", + help="Shell command to run after successful download " + "(e.g. 'kubectl scale deployment ... --replicas=1')") + p.add_argument("-v", "--verbose", action="store_true") + args: argparse.Namespace = p.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + ) + + # Dry-run uses inline flow (needs access to sources for URL printing) + if args.dry_run: + rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] + current_slot: int | None = get_current_slot(rpc_url) + if current_slot is None: + log.error("Cannot get current slot from %s", rpc_url) + return 1 + + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=args.max_snapshot_age, + max_latency_ms=args.max_latency, + threads=args.threads, + version_filter=args.version, + ) + if not sources: + log.error("No snapshot sources found") + return 1 + + sources.sort(key=lambda s: s.latency_ms) + best = sources[0] + for fp in best.file_paths: + print(f"http://{best.rpc_address}{fp}") + return 0 + + ok: bool = download_best_snapshot( + args.output, + cluster=args.cluster, + rpc_url=args.rpc, + connections=args.connections, + threads=args.threads, + max_snapshot_age=args.max_snapshot_age, + max_latency=args.max_latency, + min_download_speed=args.min_download_speed, + measurement_time=args.measurement_time, + max_speed_checks=args.max_speed_checks, + version_filter=args.version, + full_only=args.full_only, + ) + + if ok and args.post_cmd: + log.info("Running post-download command: %s", args.post_cmd) + result: subprocess.CompletedProcess[bytes] = subprocess.run( + args.post_cmd, shell=True, + ) + if result.returncode != 0: + log.error("Post-download command failed with exit code %d", + result.returncode) + return 1 + log.info("Post-download command completed successfully") + + return 0 if ok else 1 if __name__ == "__main__": From ec12e6079b36cd0c41bab79e2825aa58f1df5b16 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 18:45:44 +0000 Subject: [PATCH 31/62] fix: redeploy wipe uses umount+remount instead of rm -rf Remounting tmpfs is instant (kernel frees pages), while rm -rf on 400GB+ of accounts files traverses every inode. Recover playbook keeps rm -rf because the kind node's bind mount prevents umount while the container is running. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-recover.yml | 4 ++-- playbooks/biscayne-redeploy.yml | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index 07388207..de320902 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -92,8 +92,8 @@ when: agave_procs.rc == 0 # ---- step 3: wipe accounts ramdisk ----------------------------------------- - # Cannot umount+mkfs because the kind node's bind mount holds it open. - # Instead, delete contents. This is sufficient — agave starts clean. + # Cannot umount+remount because the kind node's bind mount holds it open. + # rm -rf is required here (slower than remount but the only option). - name: Wipe accounts data ansible.builtin.shell: | rm -rf {{ accounts_dir }}/* diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 608ec328..180e7f6f 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -139,9 +139,10 @@ when: wipe_ledger | bool tags: [wipe] - - name: Wipe accounts ramdisk + - name: Wipe accounts ramdisk (remount tmpfs) ansible.builtin.shell: | - rm -rf {{ accounts_dir }}/* + umount {{ ramdisk_mount }} 2>/dev/null || true + mount -t tmpfs -o nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }} tmpfs {{ ramdisk_mount }} mkdir -p {{ accounts_dir }} chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} become: true From 078872d78de2b09274104da6efa6180d6656b231 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 19:11:24 +0000 Subject: [PATCH 32/62] feat: add iptables playbook, symlink snapshot-download.py to agave-stack - playbooks/biscayne-iptables.yml: manages PREROUTING DNAT and DOCKER-USER rules for both host IP (186.233.184.235) and relay loopback (137.239.194.65). Idempotent, persists via netfilter-persistent. - scripts/snapshot-download.py: replaced standalone copy with symlink to agave-stack source of truth, eliminating duplication. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-iptables.yml | 116 ++++++ scripts/snapshot-download.py | 636 +------------------------------- 2 files changed, 117 insertions(+), 635 deletions(-) create mode 100644 playbooks/biscayne-iptables.yml mode change 100755 => 120000 scripts/snapshot-download.py diff --git a/playbooks/biscayne-iptables.yml b/playbooks/biscayne-iptables.yml new file mode 100644 index 00000000..20e8d94f --- /dev/null +++ b/playbooks/biscayne-iptables.yml @@ -0,0 +1,116 @@ +--- +# Configure iptables DNAT rules for agave validator +# +# Routes external traffic to the kind node's pod IP (172.20.0.2). +# Rules must be inserted BEFORE Docker's ADDRTYPE LOCAL rule in +# PREROUTING, otherwise Docker's chain swallows the traffic. +# +# Two external IPs: +# 186.233.184.235 — primary host IP (direct access) +# 137.239.194.65 — Ashburn relay loopback (GRE tunnel endpoint) +# +# DOCKER-USER chain rules allow forwarded traffic to reach the pod. +# +# Idempotent: checks for existing rules before inserting. +# Persistent: saves rules via iptables-persistent. +# +# Usage: +# ansible-playbook playbooks/biscayne-iptables.yml +# +- name: Configure iptables for agave validator + hosts: all + gather_facts: false + become: true + vars: + pod_ip: 172.20.0.2 + host_ip: 186.233.184.235 + relay_ip: 137.239.194.65 + # Ports to forward + tcp_ports: + - 8899 # RPC + - 8900 # RPC WebSocket + - 8001 # Gossip + udp_ports: + - 8001 # Gossip UDP + udp_ranges: + - 9000:9025 # Validator dynamic ports + + tasks: + # ---- PREROUTING DNAT rules --------------------------------------------------- + # Host IP rules (186.233.184.235 → pod) + - name: "PREROUTING DNAT host IP TCP ports to pod" + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: tcp + destination: "{{ host_ip }}" + destination_port: "{{ item }}" + jump: DNAT + to_destination: "{{ pod_ip }}:{{ item }}" + action: insert + rule_num: 1 + loop: "{{ tcp_ports }}" + register: prerouting_host_tcp + + # Relay IP rules (137.239.194.65 → pod) + - name: "PREROUTING DNAT relay IP TCP ports to pod" + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: tcp + destination: "{{ relay_ip }}" + destination_port: "{{ item }}" + jump: DNAT + to_destination: "{{ pod_ip }}:{{ item }}" + action: insert + rule_num: 1 + loop: "{{ tcp_ports }}" + register: prerouting_relay_tcp + + - name: "PREROUTING DNAT relay IP UDP ports to pod" + ansible.builtin.iptables: + table: nat + chain: PREROUTING + protocol: udp + destination: "{{ relay_ip }}" + destination_port: "{{ item }}" + jump: DNAT + to_destination: "{{ pod_ip }}" + action: insert + rule_num: 1 + loop: "{{ udp_ports + udp_ranges }}" + register: prerouting_relay_udp + + # ---- DOCKER-USER accept rules ------------------------------------------------ + - name: "DOCKER-USER accept TCP to pod" + ansible.builtin.iptables: + chain: DOCKER-USER + protocol: tcp + destination: "{{ pod_ip }}" + destination_port: "{{ item }}" + jump: ACCEPT + action: insert + rule_num: 1 + loop: "{{ tcp_ports }}" + register: dockeruser_tcp + + - name: "DOCKER-USER accept UDP to pod" + ansible.builtin.iptables: + chain: DOCKER-USER + protocol: udp + destination: "{{ pod_ip }}" + destination_port: "{{ item }}" + jump: ACCEPT + action: insert + rule_num: 1 + loop: "{{ udp_ports + udp_ranges }}" + register: dockeruser_udp + + # ---- Persist rules ----------------------------------------------------------- + - name: Save iptables rules + ansible.builtin.command: netfilter-persistent save + changed_when: true + when: >- + prerouting_host_tcp.changed or prerouting_relay_tcp.changed or + prerouting_relay_udp.changed or dockeruser_tcp.changed or + dockeruser_udp.changed diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py deleted file mode 100755 index c19830fe..00000000 --- a/scripts/snapshot-download.py +++ /dev/null @@ -1,635 +0,0 @@ -#!/usr/bin/env python3 -"""Download Solana snapshots using aria2c for parallel multi-connection downloads. - -Discovers snapshot sources by querying getClusterNodes for all RPCs in the -cluster, probing each for available snapshots, benchmarking download speed, -and downloading from the fastest source using aria2c (16 connections by default). - -Based on the discovery approach from etcusr/solana-snapshot-finder but replaces -the single-connection wget download with aria2c parallel chunked downloads. - -Usage: - # Download to /srv/kind/solana/snapshots (mainnet, 16 connections) - ./snapshot-download.py -o /srv/kind/solana/snapshots - - # Dry run — find best source, print URL - ./snapshot-download.py --dry-run - - # Custom RPC for cluster node discovery + 32 connections - ./snapshot-download.py -r https://api.mainnet-beta.solana.com -n 32 - - # Testnet - ./snapshot-download.py -c testnet -o /data/snapshots - -Requirements: - - aria2c (apt install aria2) - - python3 >= 3.10 (stdlib only, no pip dependencies) -""" - -from __future__ import annotations - -import argparse -import concurrent.futures -import json -import logging -import os -import re -import shutil -import subprocess -import sys -import time -import urllib.error -import urllib.request -from dataclasses import dataclass, field -from http.client import HTTPResponse -from pathlib import Path -from urllib.request import Request - -log: logging.Logger = logging.getLogger("snapshot-download") - -CLUSTER_RPC: dict[str, str] = { - "mainnet-beta": "https://api.mainnet-beta.solana.com", - "testnet": "https://api.testnet.solana.com", - "devnet": "https://api.devnet.solana.com", -} - -# Snapshot filenames: -# snapshot--.tar.zst -# incremental-snapshot---.tar.zst -FULL_SNAP_RE: re.Pattern[str] = re.compile( - r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" -) -INCR_SNAP_RE: re.Pattern[str] = re.compile( - r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" -) - - -@dataclass -class SnapshotSource: - """A snapshot file available from a specific RPC node.""" - - rpc_address: str - # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) - file_paths: list[str] = field(default_factory=list) - slots_diff: int = 0 - latency_ms: float = 0.0 - download_speed: float = 0.0 # bytes/sec - - -# -- JSON-RPC helpers ---------------------------------------------------------- - - -class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): - """Handler that captures redirect Location instead of following it.""" - - def redirect_request( - self, - req: Request, - fp: HTTPResponse, - code: int, - msg: str, - headers: dict[str, str], # type: ignore[override] - newurl: str, - ) -> None: - return None - - -def rpc_post(url: str, method: str, params: list[object] | None = None, - timeout: int = 25) -> object | None: - """JSON-RPC POST. Returns parsed 'result' field or None on error.""" - payload: bytes = json.dumps({ - "jsonrpc": "2.0", "id": 1, - "method": method, "params": params or [], - }).encode() - req = Request(url, data=payload, - headers={"Content-Type": "application/json"}) - try: - with urllib.request.urlopen(req, timeout=timeout) as resp: - data: dict[str, object] = json.loads(resp.read()) - return data.get("result") - except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: - log.debug("rpc_post %s %s failed: %s", url, method, e) - return None - - -def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: - """HEAD request without following redirects. - - Returns (Location header value, latency_sec) if the server returned a - 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. - """ - opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) - req = Request(url, method="HEAD") - try: - start: float = time.monotonic() - resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] - latency: float = time.monotonic() - start - # Non-redirect (2xx) — server didn't redirect, not useful for discovery - location: str | None = resp.headers.get("Location") - resp.close() - return location, latency - except urllib.error.HTTPError as e: - # 3xx redirects raise HTTPError with the redirect info - latency = time.monotonic() - start # type: ignore[possibly-undefined] - location = e.headers.get("Location") - if location and 300 <= e.code < 400: - return location, latency - return None, 0.0 - except (urllib.error.URLError, OSError, TimeoutError): - return None, 0.0 - - -# -- Discovery ----------------------------------------------------------------- - - -def get_current_slot(rpc_url: str) -> int | None: - """Get current slot from RPC.""" - result: object | None = rpc_post(rpc_url, "getSlot") - if isinstance(result, int): - return result - return None - - -def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: - """Get all RPC node addresses from getClusterNodes.""" - result: object | None = rpc_post(rpc_url, "getClusterNodes") - if not isinstance(result, list): - return [] - - rpc_addrs: list[str] = [] - for node in result: - if not isinstance(node, dict): - continue - if version_filter is not None: - node_version: str | None = node.get("version") - if node_version and not node_version.startswith(version_filter): - continue - rpc: str | None = node.get("rpc") - if rpc: - rpc_addrs.append(rpc) - return list(set(rpc_addrs)) - - -def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: - """Extract filename and full redirect path from Location header. - - Returns (filename, full_path). full_path includes any path prefix - the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). - """ - # Location may be absolute URL or relative path - if location.startswith("http://") or location.startswith("https://"): - # Absolute URL — extract path - from urllib.parse import urlparse - path: str = urlparse(location).path - else: - path = location - - filename: str = path.rsplit("/", 1)[-1] - return filename, path - - -def probe_rpc_snapshot( - rpc_address: str, - current_slot: int, -) -> SnapshotSource | None: - """Probe a single RPC node for available snapshots. - - Discovery only — no filtering. Returns a SnapshotSource with all available - info so the caller can decide what to keep. Filtering happens after all - probes complete, so rejected sources are still visible for debugging. - """ - full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" - - # Full snapshot is required — every source must have one - full_location, full_latency = head_no_follow(full_url, timeout=2) - if not full_location: - return None - - latency_ms: float = full_latency * 1000 - - full_filename, full_path = _parse_snapshot_filename(full_location) - fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) - if not fm: - return None - - full_snap_slot: int = int(fm.group(1)) - slots_diff: int = current_slot - full_snap_slot - - file_paths: list[str] = [full_path] - - # Also check for incremental snapshot - inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" - inc_location, _ = head_no_follow(inc_url, timeout=2) - if inc_location: - inc_filename, inc_path = _parse_snapshot_filename(inc_location) - m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) - if m: - inc_base_slot: int = int(m.group(1)) - # Incremental must be based on this source's full snapshot - if inc_base_slot == full_snap_slot: - file_paths.append(inc_path) - - return SnapshotSource( - rpc_address=rpc_address, - file_paths=file_paths, - slots_diff=slots_diff, - latency_ms=latency_ms, - ) - - -def discover_sources( - rpc_url: str, - current_slot: int, - max_age_slots: int, - max_latency_ms: float, - threads: int, - version_filter: str | None, -) -> list[SnapshotSource]: - """Discover all snapshot sources, then filter. - - Probing and filtering are separate: all reachable sources are collected - first so we can report what exists even if filters reject everything. - """ - rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) - if not rpc_nodes: - log.error("No RPC nodes found via getClusterNodes") - return [] - - log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) - - all_sources: list[SnapshotSource] = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: - futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { - pool.submit(probe_rpc_snapshot, addr, current_slot): addr - for addr in rpc_nodes - } - done: int = 0 - for future in concurrent.futures.as_completed(futures): - done += 1 - if done % 200 == 0: - log.info(" probed %d/%d nodes, %d reachable", - done, len(rpc_nodes), len(all_sources)) - try: - result: SnapshotSource | None = future.result() - except (urllib.error.URLError, OSError, TimeoutError) as e: - log.debug("Probe failed for %s: %s", futures[future], e) - continue - if result: - all_sources.append(result) - - log.info("Discovered %d reachable sources", len(all_sources)) - - # Apply filters - filtered: list[SnapshotSource] = [] - rejected_age: int = 0 - rejected_latency: int = 0 - for src in all_sources: - if src.slots_diff > max_age_slots or src.slots_diff < -100: - rejected_age += 1 - continue - if src.latency_ms > max_latency_ms: - rejected_latency += 1 - continue - filtered.append(src) - - if rejected_age or rejected_latency: - log.info("Filtered: %d rejected by age (>%d slots), %d by latency (>%.0fms)", - rejected_age, max_age_slots, rejected_latency, max_latency_ms) - - if not filtered and all_sources: - # Show what was available so the user can adjust filters - all_sources.sort(key=lambda s: s.slots_diff) - best = all_sources[0] - log.warning("All %d sources rejected by filters. Best available: " - "%s (age=%d slots, latency=%.0fms). " - "Try --max-snapshot-age %d --max-latency %.0f", - len(all_sources), best.rpc_address, - best.slots_diff, best.latency_ms, - best.slots_diff + 500, - max(best.latency_ms * 1.5, 500)) - - log.info("Found %d sources after filtering", len(filtered)) - return filtered - - -# -- Speed benchmark ----------------------------------------------------------- - - -def measure_speed(rpc_address: str, measure_time: int = 7) -> float: - """Measure download speed from an RPC node. Returns bytes/sec.""" - url: str = f"http://{rpc_address}/snapshot.tar.bz2" - req = Request(url) - try: - with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: - start: float = time.monotonic() - total: int = 0 - while True: - elapsed: float = time.monotonic() - start - if elapsed >= measure_time: - break - chunk: bytes = resp.read(81920) - if not chunk: - break - total += len(chunk) - elapsed = time.monotonic() - start - if elapsed <= 0: - return 0.0 - return total / elapsed - except (urllib.error.URLError, OSError, TimeoutError): - return 0.0 - - -# -- Download ------------------------------------------------------------------ - - -def download_aria2c( - urls: list[str], - output_dir: str, - filename: str, - connections: int = 16, -) -> bool: - """Download a file using aria2c with parallel connections. - - When multiple URLs are provided, aria2c treats them as mirrors of the - same file and distributes chunks across all of them. - """ - num_mirrors: int = len(urls) - total_splits: int = max(connections, connections * num_mirrors) - cmd: list[str] = [ - "aria2c", - "--file-allocation=none", - "--continue=false", - f"--max-connection-per-server={connections}", - f"--split={total_splits}", - "--min-split-size=50M", - # aria2c retries individual chunk connections on transient network - # errors (TCP reset, timeout). This is transport-level retry analogous - # to TCP retransmit, not application-level retry of a failed operation. - "--max-tries=5", - "--retry-wait=5", - "--timeout=60", - "--connect-timeout=10", - "--summary-interval=10", - "--console-log-level=notice", - f"--dir={output_dir}", - f"--out={filename}", - "--auto-file-renaming=false", - "--allow-overwrite=true", - *urls, - ] - - log.info("Downloading %s", filename) - log.info(" aria2c: %d connections × %d mirrors (%d splits)", - connections, num_mirrors, total_splits) - - start: float = time.monotonic() - result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) - elapsed: float = time.monotonic() - start - - if result.returncode != 0: - log.error("aria2c failed with exit code %d", result.returncode) - return False - - filepath: Path = Path(output_dir) / filename - if not filepath.exists(): - log.error("aria2c reported success but %s does not exist", filepath) - return False - - size_bytes: int = filepath.stat().st_size - size_gb: float = size_bytes / (1024 ** 3) - avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 - log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) - return True - - -# -- Public API ---------------------------------------------------------------- - - -def download_best_snapshot( - output_dir: str, - *, - cluster: str = "mainnet-beta", - rpc_url: str | None = None, - connections: int = 16, - threads: int = 500, - max_snapshot_age: int = 10000, - max_latency: float = 500, - min_download_speed: int = 20, - measurement_time: int = 7, - max_speed_checks: int = 15, - version_filter: str | None = None, - full_only: bool = False, -) -> bool: - """Download the best available snapshot to output_dir. - - Programmatic API for use by entrypoint.py or other callers. - Returns True on success, False on failure. - """ - resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] - - if not shutil.which("aria2c"): - log.error("aria2c not found. Install with: apt install aria2") - return False - - log.info("Cluster: %s | RPC: %s", cluster, resolved_rpc) - current_slot: int | None = get_current_slot(resolved_rpc) - if current_slot is None: - log.error("Cannot get current slot from %s", resolved_rpc) - return False - log.info("Current slot: %d", current_slot) - - sources: list[SnapshotSource] = discover_sources( - resolved_rpc, current_slot, - max_age_slots=max_snapshot_age, - max_latency_ms=max_latency, - threads=threads, - version_filter=version_filter, - ) - if not sources: - log.error("No snapshot sources found") - return False - - # Sort by latency (lowest first) for speed benchmarking - sources.sort(key=lambda s: s.latency_ms) - - # Benchmark top candidates - log.info("Benchmarking download speed on top %d sources...", max_speed_checks) - fast_sources: list[SnapshotSource] = [] - checked: int = 0 - min_speed_bytes: int = min_download_speed * 1024 * 1024 - - for source in sources: - if checked >= max_speed_checks: - break - checked += 1 - - speed: float = measure_speed(source.rpc_address, measurement_time) - source.download_speed = speed - speed_mib: float = speed / (1024 ** 2) - - if speed < min_speed_bytes: - log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", - source.rpc_address, speed_mib, min_download_speed) - continue - - log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", - source.rpc_address, speed_mib, - source.latency_ms, source.slots_diff) - fast_sources.append(source) - - if not fast_sources: - log.error("No source met minimum speed requirement (%d MiB/s)", - min_download_speed) - return False - - # Use the fastest source as primary, collect mirrors for each file - best: SnapshotSource = fast_sources[0] - file_paths: list[str] = best.file_paths - if full_only: - file_paths = [fp for fp in file_paths - if fp.rsplit("/", 1)[-1].startswith("snapshot-")] - - # Build mirror URL lists - download_plan: list[tuple[str, list[str]]] = [] - for fp in file_paths: - filename: str = fp.rsplit("/", 1)[-1] - mirror_urls: list[str] = [f"http://{best.rpc_address}{fp}"] - for other in fast_sources[1:]: - for other_fp in other.file_paths: - if other_fp.rsplit("/", 1)[-1] == filename: - mirror_urls.append(f"http://{other.rpc_address}{other_fp}") - break - download_plan.append((filename, mirror_urls)) - - speed_mib: float = best.download_speed / (1024 ** 2) - log.info("Best source: %s (%.1f MiB/s), %d mirrors total", - best.rpc_address, speed_mib, len(fast_sources)) - for filename, mirror_urls in download_plan: - log.info(" %s (%d mirrors)", filename, len(mirror_urls)) - - # Download - os.makedirs(output_dir, exist_ok=True) - total_start: float = time.monotonic() - - for filename, mirror_urls in download_plan: - filepath: Path = Path(output_dir) / filename - if filepath.exists() and filepath.stat().st_size > 0: - log.info("Skipping %s (already exists: %.1f GB)", - filename, filepath.stat().st_size / (1024 ** 3)) - continue - if not download_aria2c(mirror_urls, output_dir, filename, connections): - log.error("Failed to download %s", filename) - return False - - total_elapsed: float = time.monotonic() - total_start - log.info("All downloads complete in %.0fs", total_elapsed) - for filename, _ in download_plan: - fp_path: Path = Path(output_dir) / filename - if fp_path.exists(): - log.info(" %s (%.1f GB)", fp_path.name, fp_path.stat().st_size / (1024 ** 3)) - - return True - - -# -- Main (CLI) ---------------------------------------------------------------- - - -def main() -> int: - p: argparse.ArgumentParser = argparse.ArgumentParser( - description="Download Solana snapshots with aria2c parallel downloads", - ) - p.add_argument("-o", "--output", default="/srv/kind/solana/snapshots", - help="Snapshot output directory (default: /srv/kind/solana/snapshots)") - p.add_argument("-c", "--cluster", default="mainnet-beta", - choices=list(CLUSTER_RPC), - help="Solana cluster (default: mainnet-beta)") - p.add_argument("-r", "--rpc", default=None, - help="RPC URL for cluster discovery (default: public RPC)") - p.add_argument("-n", "--connections", type=int, default=16, - help="aria2c connections per download (default: 16)") - p.add_argument("-t", "--threads", type=int, default=500, - help="Threads for parallel RPC probing (default: 500)") - p.add_argument("--max-snapshot-age", type=int, default=10000, - help="Max snapshot age in slots (default: 10000)") - p.add_argument("--max-latency", type=float, default=500, - help="Max RPC probe latency in ms (default: 500)") - p.add_argument("--min-download-speed", type=int, default=20, - help="Min download speed in MiB/s (default: 20)") - p.add_argument("--measurement-time", type=int, default=7, - help="Speed measurement duration in seconds (default: 7)") - p.add_argument("--max-speed-checks", type=int, default=15, - help="Max nodes to benchmark before giving up (default: 15)") - p.add_argument("--version", default=None, - help="Filter nodes by version prefix (e.g. '2.2')") - p.add_argument("--full-only", action="store_true", - help="Download only full snapshot, skip incremental") - p.add_argument("--dry-run", action="store_true", - help="Find best source and print URL, don't download") - p.add_argument("--post-cmd", - help="Shell command to run after successful download " - "(e.g. 'kubectl scale deployment ... --replicas=1')") - p.add_argument("-v", "--verbose", action="store_true") - args: argparse.Namespace = p.parse_args() - - logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", - datefmt="%H:%M:%S", - ) - - # Dry-run uses inline flow (needs access to sources for URL printing) - if args.dry_run: - rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] - current_slot: int | None = get_current_slot(rpc_url) - if current_slot is None: - log.error("Cannot get current slot from %s", rpc_url) - return 1 - - sources: list[SnapshotSource] = discover_sources( - rpc_url, current_slot, - max_age_slots=args.max_snapshot_age, - max_latency_ms=args.max_latency, - threads=args.threads, - version_filter=args.version, - ) - if not sources: - log.error("No snapshot sources found") - return 1 - - sources.sort(key=lambda s: s.latency_ms) - best = sources[0] - for fp in best.file_paths: - print(f"http://{best.rpc_address}{fp}") - return 0 - - ok: bool = download_best_snapshot( - args.output, - cluster=args.cluster, - rpc_url=args.rpc, - connections=args.connections, - threads=args.threads, - max_snapshot_age=args.max_snapshot_age, - max_latency=args.max_latency, - min_download_speed=args.min_download_speed, - measurement_time=args.measurement_time, - max_speed_checks=args.max_speed_checks, - version_filter=args.version, - full_only=args.full_only, - ) - - if ok and args.post_cmd: - log.info("Running post-download command: %s", args.post_cmd) - result: subprocess.CompletedProcess[bytes] = subprocess.run( - args.post_cmd, shell=True, - ) - if result.returncode != 0: - log.error("Post-download command failed with exit code %d", - result.returncode) - return 1 - log.info("Post-download command completed successfully") - - return 0 if ok else 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py new file mode 120000 index 00000000..c54e8c17 --- /dev/null +++ b/scripts/snapshot-download.py @@ -0,0 +1 @@ +../../agave-stack/stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py \ No newline at end of file From ba015bf3b1f31cfe338ac115f6b4a6f72d731e88 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 19:13:34 +0000 Subject: [PATCH 33/62] chore: remove snapshot-download.py symlink (replacing with subtree) Co-Authored-By: Claude Opus 4.6 --- scripts/snapshot-download.py | 1 - 1 file changed, 1 deletion(-) delete mode 120000 scripts/snapshot-download.py diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py deleted file mode 120000 index c54e8c17..00000000 --- a/scripts/snapshot-download.py +++ /dev/null @@ -1 +0,0 @@ -../../agave-stack/stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py \ No newline at end of file From f4b3a46109a8da00fdd68d8999160ddc45dcc88a Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 19:13:38 +0000 Subject: [PATCH 34/62] Squashed 'scripts/agave-container/' content from commit 4b5c875 git-subtree-dir: scripts/agave-container git-subtree-split: 4b5c875a05cbbfbde38eeb053fd5443a8a50228c --- Dockerfile | 81 ++++++ build.sh | 17 ++ entrypoint.py | 485 ++++++++++++++++++++++++++++++++ snapshot_download.py | 641 +++++++++++++++++++++++++++++++++++++++++++ start-test.sh | 112 ++++++++ 5 files changed, 1336 insertions(+) create mode 100644 Dockerfile create mode 100644 build.sh create mode 100644 entrypoint.py create mode 100644 snapshot_download.py create mode 100644 start-test.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..68a12508 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,81 @@ +# Unified Agave/Jito Solana image +# Supports three modes via AGAVE_MODE env: test, rpc, validator +# +# Build args: +# AGAVE_REPO - git repo URL (anza-xyz/agave or jito-foundation/jito-solana) +# AGAVE_VERSION - git tag to build (e.g. v3.1.9, v3.1.8-jito) + +ARG AGAVE_REPO=https://github.com/anza-xyz/agave.git +ARG AGAVE_VERSION=v3.1.9 + +# ---------- Stage 1: Build ---------- +FROM rust:1.85-bookworm AS builder + +ARG AGAVE_REPO +ARG AGAVE_VERSION + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + libssl-dev \ + libudev-dev \ + libclang-dev \ + protobuf-compiler \ + ca-certificates \ + git \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build +RUN git clone "$AGAVE_REPO" --depth 1 --branch "$AGAVE_VERSION" --recurse-submodules agave +WORKDIR /build/agave + +# Cherry-pick --public-tvu-address support (anza-xyz/agave PR #6778, commit 9f4b3ae) +# This flag only exists on master, not in v3.1.9 — fetch the PR ref and cherry-pick +ARG TVU_ADDRESS_PR=6778 +RUN if [ -n "$TVU_ADDRESS_PR" ]; then \ + git fetch --depth 50 origin "pull/${TVU_ADDRESS_PR}/head:tvu-pr" && \ + git cherry-pick --no-commit tvu-pr; \ + fi + +# Build all binaries using the upstream install script +RUN CI_COMMIT=$(git rev-parse HEAD) scripts/cargo-install-all.sh /solana-release + +# ---------- Stage 2: Runtime ---------- +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + libudev1 \ + curl \ + sudo \ + aria2 \ + python3 \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user with sudo +RUN useradd -m -s /bin/bash agave \ + && echo "agave ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +# Copy all compiled binaries +COPY --from=builder /solana-release/bin/ /usr/local/bin/ + +# Copy entrypoint and support scripts +COPY entrypoint.py snapshot_download.py /usr/local/bin/ +COPY start-test.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/entrypoint.py /usr/local/bin/start-test.sh + +# Create data directories +RUN mkdir -p /data/config /data/ledger /data/accounts /data/snapshots \ + && chown -R agave:agave /data + +USER agave +WORKDIR /data + +ENV RUST_LOG=info +ENV RUST_BACKTRACE=1 + +EXPOSE 8899 8900 8001 8001/udp + +ENTRYPOINT ["entrypoint.py"] diff --git a/build.sh b/build.sh new file mode 100644 index 00000000..4c4d940f --- /dev/null +++ b/build.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Build laconicnetwork/agave +# Set AGAVE_REPO and AGAVE_VERSION env vars to build Jito or a different version +source ${CERC_CONTAINER_BASE_DIR}/build-base.sh + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +AGAVE_REPO="${AGAVE_REPO:-https://github.com/anza-xyz/agave.git}" +AGAVE_VERSION="${AGAVE_VERSION:-v3.1.9}" + +docker build -t laconicnetwork/agave:local \ + --build-arg AGAVE_REPO="$AGAVE_REPO" \ + --build-arg AGAVE_VERSION="$AGAVE_VERSION" \ + ${build_command_args} \ + -f ${SCRIPT_DIR}/Dockerfile \ + ${SCRIPT_DIR} diff --git a/entrypoint.py b/entrypoint.py new file mode 100644 index 00000000..1122fc9c --- /dev/null +++ b/entrypoint.py @@ -0,0 +1,485 @@ +#!/usr/bin/env python3 +"""Agave validator entrypoint — snapshot management, arg construction, liveness probe. + +Two subcommands: + entrypoint.py serve (default) — snapshot freshness check + exec agave-validator + entrypoint.py probe — liveness probe (slot lag check, exits 0/1) + +Replaces the bash entrypoint.sh / start-rpc.sh / start-validator.sh with a single +Python module. Test mode still dispatches to start-test.sh. + +All configuration comes from environment variables — same vars as the original +bash scripts. See compose files for defaults. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from urllib.request import Request + +log: logging.Logger = logging.getLogger("entrypoint") + +# Directories +CONFIG_DIR = "/data/config" +LEDGER_DIR = "/data/ledger" +ACCOUNTS_DIR = "/data/accounts" +SNAPSHOTS_DIR = "/data/snapshots" +LOG_DIR = "/data/log" +IDENTITY_FILE = f"{CONFIG_DIR}/validator-identity.json" + +# Snapshot filename pattern +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" +) + +MAINNET_RPC = "https://api.mainnet-beta.solana.com" + + +# -- Helpers ------------------------------------------------------------------- + + +def env(name: str, default: str = "") -> str: + """Read env var with default.""" + return os.environ.get(name, default) + + +def env_required(name: str) -> str: + """Read required env var, exit if missing.""" + val = os.environ.get(name) + if not val: + log.error("%s is required but not set", name) + sys.exit(1) + return val + + +def env_bool(name: str, default: bool = False) -> bool: + """Read boolean env var (true/false/1/0).""" + val = os.environ.get(name, "").lower() + if not val: + return default + return val in ("true", "1", "yes") + + +def rpc_get_slot(url: str, timeout: int = 10) -> int | None: + """Get current slot from a Solana RPC endpoint.""" + payload = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": "getSlot", "params": [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read()) + result = data.get("result") + if isinstance(result, int): + return result + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError): + pass + return None + + +# -- Snapshot management ------------------------------------------------------- + + +def get_local_snapshot_slot(snapshots_dir: str) -> int | None: + """Find the highest slot among local snapshot files.""" + best_slot: int | None = None + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return None + for entry in snap_path.iterdir(): + m = FULL_SNAP_RE.match(entry.name) + if m: + slot = int(m.group(1)) + if best_slot is None or slot > best_slot: + best_slot = slot + return best_slot + + +def clean_snapshots(snapshots_dir: str) -> None: + """Remove all snapshot files from the directory.""" + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return + for entry in snap_path.iterdir(): + if entry.name.startswith(("snapshot-", "incremental-snapshot-")): + log.info("Removing old snapshot: %s", entry.name) + entry.unlink(missing_ok=True) + + +def maybe_download_snapshot(snapshots_dir: str) -> None: + """Check snapshot freshness and download if needed. + + Controlled by env vars: + SNAPSHOT_AUTO_DOWNLOAD (default: true) — enable/disable + SNAPSHOT_MAX_AGE_SLOTS (default: 20000) — staleness threshold + """ + if not env_bool("SNAPSHOT_AUTO_DOWNLOAD", default=True): + log.info("Snapshot auto-download disabled") + return + + max_age = int(env("SNAPSHOT_MAX_AGE_SLOTS", "20000")) + + # Get mainnet current slot + mainnet_slot = rpc_get_slot(MAINNET_RPC) + if mainnet_slot is None: + log.warning("Cannot reach mainnet RPC — skipping snapshot check") + return + + # Check local snapshot + local_slot = get_local_snapshot_slot(snapshots_dir) + if local_slot is not None: + age = mainnet_slot - local_slot + log.info("Local snapshot at slot %d (mainnet: %d, age: %d slots)", + local_slot, mainnet_slot, age) + if age <= max_age: + log.info("Snapshot is fresh enough (age %d <= %d), skipping download", age, max_age) + return + log.info("Snapshot is stale (age %d > %d), downloading fresh", age, max_age) + else: + log.info("No local snapshot found, downloading") + + # Clean old snapshots before downloading + clean_snapshots(snapshots_dir) + + # Import and call snapshot download + # snapshot_download.py is installed alongside this file in /usr/local/bin/ + script_dir = Path(__file__).resolve().parent + sys.path.insert(0, str(script_dir)) + from snapshot_download import download_best_snapshot + + ok = download_best_snapshot(snapshots_dir) + if not ok: + log.error("Snapshot download failed — starting without fresh snapshot") + + +# -- Directory and identity setup ---------------------------------------------- + + +def ensure_dirs(*dirs: str) -> None: + """Create directories and fix ownership.""" + uid = os.getuid() + gid = os.getgid() + for d in dirs: + os.makedirs(d, exist_ok=True) + try: + subprocess.run( + ["sudo", "chown", "-R", f"{uid}:{gid}", d], + check=False, capture_output=True, + ) + except FileNotFoundError: + pass # sudo not available — dirs already owned correctly + + +def ensure_identity_rpc() -> None: + """Generate ephemeral identity keypair for RPC mode if not mounted.""" + if os.path.isfile(IDENTITY_FILE): + return + log.info("Generating RPC node identity keypair...") + subprocess.run( + ["solana-keygen", "new", "--no-passphrase", "--silent", + "--force", "--outfile", IDENTITY_FILE], + check=True, + ) + + +def print_identity() -> None: + """Print the node identity pubkey.""" + result = subprocess.run( + ["solana-keygen", "pubkey", IDENTITY_FILE], + capture_output=True, text=True, check=False, + ) + if result.returncode == 0: + log.info("Node identity: %s", result.stdout.strip()) + + +# -- Arg construction ---------------------------------------------------------- + + +def build_common_args() -> list[str]: + """Build agave-validator args common to both RPC and validator modes.""" + args: list[str] = [ + "--identity", IDENTITY_FILE, + "--entrypoint", env_required("VALIDATOR_ENTRYPOINT"), + "--known-validator", env_required("KNOWN_VALIDATOR"), + "--ledger", LEDGER_DIR, + "--accounts", ACCOUNTS_DIR, + "--snapshots", SNAPSHOTS_DIR, + "--rpc-port", env("RPC_PORT", "8899"), + "--rpc-bind-address", env("RPC_BIND_ADDRESS", "127.0.0.1"), + "--gossip-port", env("GOSSIP_PORT", "8001"), + "--dynamic-port-range", env("DYNAMIC_PORT_RANGE", "9000-10000"), + "--no-os-network-limits-test", + "--wal-recovery-mode", "skip_any_corrupted_record", + "--limit-ledger-size", env("LIMIT_LEDGER_SIZE", "50000000"), + ] + + # Snapshot generation + if env("NO_SNAPSHOTS") == "true": + args.append("--no-snapshots") + else: + args += [ + "--full-snapshot-interval-slots", env("SNAPSHOT_INTERVAL_SLOTS", "100000"), + "--maximum-full-snapshots-to-retain", env("MAXIMUM_SNAPSHOTS_TO_RETAIN", "5"), + ] + if env("NO_INCREMENTAL_SNAPSHOTS") != "true": + args += ["--maximum-incremental-snapshots-to-retain", "2"] + + # Account indexes + account_indexes = env("ACCOUNT_INDEXES") + if account_indexes: + for idx in account_indexes.split(","): + idx = idx.strip() + if idx: + args += ["--account-index", idx] + + # Additional entrypoints + for ep in env("EXTRA_ENTRYPOINTS").split(): + if ep: + args += ["--entrypoint", ep] + + # Additional known validators + for kv in env("EXTRA_KNOWN_VALIDATORS").split(): + if kv: + args += ["--known-validator", kv] + + # Cluster verification + genesis_hash = env("EXPECTED_GENESIS_HASH") + if genesis_hash: + args += ["--expected-genesis-hash", genesis_hash] + shred_version = env("EXPECTED_SHRED_VERSION") + if shred_version: + args += ["--expected-shred-version", shred_version] + + # Metrics — just needs to be in the environment, agave reads it directly + # (env var is already set, nothing to pass as arg) + + # Gossip host / TVU address + gossip_host = env("GOSSIP_HOST") + if gossip_host: + args += ["--gossip-host", gossip_host] + elif env("PUBLIC_TVU_ADDRESS"): + args += ["--public-tvu-address", env("PUBLIC_TVU_ADDRESS")] + + # Jito flags + if env("JITO_ENABLE") == "true": + log.info("Jito MEV enabled") + jito_flags: list[tuple[str, str]] = [ + ("JITO_TIP_PAYMENT_PROGRAM", "--tip-payment-program-pubkey"), + ("JITO_DISTRIBUTION_PROGRAM", "--tip-distribution-program-pubkey"), + ("JITO_MERKLE_ROOT_AUTHORITY", "--merkle-root-upload-authority"), + ("JITO_COMMISSION_BPS", "--commission-bps"), + ("JITO_BLOCK_ENGINE_URL", "--block-engine-url"), + ("JITO_SHRED_RECEIVER_ADDR", "--shred-receiver-address"), + ] + for env_name, flag in jito_flags: + val = env(env_name) + if val: + args += [flag, val] + + return args + + +def build_rpc_args() -> list[str]: + """Build agave-validator args for RPC (non-voting) mode.""" + args = build_common_args() + args += [ + "--no-voting", + "--log", f"{LOG_DIR}/validator.log", + "--full-rpc-api", + "--enable-rpc-transaction-history", + "--rpc-pubsub-enable-block-subscription", + "--enable-extended-tx-metadata-storage", + "--no-wait-for-vote-to-start-leader", + "--no-snapshot-fetch", + ] + + # Public vs private RPC + public_rpc = env("PUBLIC_RPC_ADDRESS") + if public_rpc: + args += ["--public-rpc-address", public_rpc] + else: + args += ["--private-rpc", "--allow-private-addr", "--only-known-rpc"] + + # Jito relayer URL (RPC mode doesn't use it, but validator mode does — + # handled in build_validator_args) + + return args + + +def build_validator_args() -> list[str]: + """Build agave-validator args for voting validator mode.""" + vote_keypair = env("VOTE_ACCOUNT_KEYPAIR", + "/data/config/vote-account-keypair.json") + + # Identity must be mounted for validator mode + if not os.path.isfile(IDENTITY_FILE): + log.error("Validator identity keypair not found at %s", IDENTITY_FILE) + log.error("Mount your validator keypair to %s", IDENTITY_FILE) + sys.exit(1) + + # Vote account keypair must exist + if not os.path.isfile(vote_keypair): + log.error("Vote account keypair not found at %s", vote_keypair) + log.error("Mount your vote account keypair or set VOTE_ACCOUNT_KEYPAIR") + sys.exit(1) + + # Print vote account pubkey + result = subprocess.run( + ["solana-keygen", "pubkey", vote_keypair], + capture_output=True, text=True, check=False, + ) + if result.returncode == 0: + log.info("Vote account: %s", result.stdout.strip()) + + args = build_common_args() + args += [ + "--vote-account", vote_keypair, + "--log", "-", + ] + + # Jito relayer URL (validator-only) + relayer_url = env("JITO_RELAYER_URL") + if env("JITO_ENABLE") == "true" and relayer_url: + args += ["--relayer-url", relayer_url] + + return args + + +def append_extra_args(args: list[str]) -> list[str]: + """Append EXTRA_ARGS passthrough flags.""" + extra = env("EXTRA_ARGS") + if extra: + args += extra.split() + return args + + +# -- Serve subcommand --------------------------------------------------------- + + +def cmd_serve() -> None: + """Main serve flow: snapshot check, setup, exec agave-validator.""" + mode = env("AGAVE_MODE", "test") + log.info("AGAVE_MODE=%s", mode) + + # Test mode dispatches to start-test.sh + if mode == "test": + os.execvp("start-test.sh", ["start-test.sh"]) + + if mode not in ("rpc", "validator"): + log.error("Unknown AGAVE_MODE: %s (valid: test, rpc, validator)", mode) + sys.exit(1) + + # Ensure directories + dirs = [CONFIG_DIR, LEDGER_DIR, ACCOUNTS_DIR, SNAPSHOTS_DIR] + if mode == "rpc": + dirs.append(LOG_DIR) + ensure_dirs(*dirs) + + # Snapshot freshness check and auto-download + maybe_download_snapshot(SNAPSHOTS_DIR) + + # Identity setup + if mode == "rpc": + ensure_identity_rpc() + print_identity() + + # Build args + if mode == "rpc": + args = build_rpc_args() + else: + args = build_validator_args() + + args = append_extra_args(args) + + # Write startup timestamp for probe grace period + Path("/tmp/entrypoint-start").write_text(str(time.time())) + + log.info("Starting agave-validator with %d arguments", len(args)) + os.execvp("agave-validator", ["agave-validator"] + args) + + +# -- Probe subcommand --------------------------------------------------------- + + +def cmd_probe() -> None: + """Liveness probe: check local RPC slot vs mainnet. + + Exit 0 = healthy, exit 1 = unhealthy. + + Grace period: PROBE_GRACE_SECONDS (default 600) — probe always passes + during grace period to allow for snapshot unpacking and initial replay. + """ + grace_seconds = int(env("PROBE_GRACE_SECONDS", "600")) + max_lag = int(env("PROBE_MAX_SLOT_LAG", "20000")) + + # Check grace period + start_file = Path("/tmp/entrypoint-start") + if start_file.exists(): + try: + start_time = float(start_file.read_text().strip()) + elapsed = time.time() - start_time + if elapsed < grace_seconds: + # Within grace period — always healthy + sys.exit(0) + except (ValueError, OSError): + pass + else: + # No start file — serve hasn't started yet, within grace + sys.exit(0) + + # Query local RPC + rpc_port = env("RPC_PORT", "8899") + local_url = f"http://127.0.0.1:{rpc_port}" + local_slot = rpc_get_slot(local_url, timeout=5) + if local_slot is None: + # Local RPC unreachable after grace period — unhealthy + sys.exit(1) + + # Query mainnet + mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) + if mainnet_slot is None: + # Can't reach mainnet to compare — assume healthy (don't penalize + # the validator for mainnet RPC being down) + sys.exit(0) + + lag = mainnet_slot - local_slot + if lag > max_lag: + sys.exit(1) + + sys.exit(0) + + +# -- Main ---------------------------------------------------------------------- + + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + subcmd = sys.argv[1] if len(sys.argv) > 1 else "serve" + + if subcmd == "serve": + cmd_serve() + elif subcmd == "probe": + cmd_probe() + else: + log.error("Unknown subcommand: %s (valid: serve, probe)", subcmd) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/snapshot_download.py b/snapshot_download.py new file mode 100644 index 00000000..61a39019 --- /dev/null +++ b/snapshot_download.py @@ -0,0 +1,641 @@ +#!/usr/bin/env python3 +"""Download Solana snapshots using aria2c for parallel multi-connection downloads. + +Discovers snapshot sources by querying getClusterNodes for all RPCs in the +cluster, probing each for available snapshots, benchmarking download speed, +and downloading from the fastest source using aria2c (16 connections by default). + +Based on the discovery approach from etcusr/solana-snapshot-finder but replaces +the single-connection wget download with aria2c parallel chunked downloads. + +Usage: + # Download to /srv/kind/solana/snapshots (mainnet, 16 connections) + ./snapshot_download.py -o /srv/kind/solana/snapshots + + # Dry run — find best source, print URL + ./snapshot_download.py --dry-run + + # Custom RPC for cluster discovery + 32 connections + ./snapshot_download.py -r https://api.mainnet-beta.solana.com -n 32 + + # Testnet + ./snapshot_download.py -c testnet -o /data/snapshots + + # Programmatic use from entrypoint.py: + from snapshot_download import download_best_snapshot + ok = download_best_snapshot("/data/snapshots") + +Requirements: + - aria2c (apt install aria2) + - python3 >= 3.10 (stdlib only, no pip dependencies) +""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from http.client import HTTPResponse +from pathlib import Path +from urllib.request import Request + +log: logging.Logger = logging.getLogger("snapshot-download") + +CLUSTER_RPC: dict[str, str] = { + "mainnet-beta": "https://api.mainnet-beta.solana.com", + "testnet": "https://api.testnet.solana.com", + "devnet": "https://api.devnet.solana.com", +} + +# Snapshot filenames: +# snapshot--.tar.zst +# incremental-snapshot---.tar.zst +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) + + +@dataclass +class SnapshotSource: + """A snapshot file available from a specific RPC node.""" + + rpc_address: str + # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) + file_paths: list[str] = field(default_factory=list) + slots_diff: int = 0 + latency_ms: float = 0.0 + download_speed: float = 0.0 # bytes/sec + + +# -- JSON-RPC helpers ---------------------------------------------------------- + + +class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): + """Handler that captures redirect Location instead of following it.""" + + def redirect_request( + self, + req: Request, + fp: HTTPResponse, + code: int, + msg: str, + headers: dict[str, str], # type: ignore[override] + newurl: str, + ) -> None: + return None + + +def rpc_post(url: str, method: str, params: list[object] | None = None, + timeout: int = 25) -> object | None: + """JSON-RPC POST. Returns parsed 'result' field or None on error.""" + payload: bytes = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": method, "params": params or [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data: dict[str, object] = json.loads(resp.read()) + return data.get("result") + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: + log.debug("rpc_post %s %s failed: %s", url, method, e) + return None + + +def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: + """HEAD request without following redirects. + + Returns (Location header value, latency_sec) if the server returned a + 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. + """ + opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) + req = Request(url, method="HEAD") + try: + start: float = time.monotonic() + resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] + latency: float = time.monotonic() - start + # Non-redirect (2xx) — server didn't redirect, not useful for discovery + location: str | None = resp.headers.get("Location") + resp.close() + return location, latency + except urllib.error.HTTPError as e: + # 3xx redirects raise HTTPError with the redirect info + latency = time.monotonic() - start # type: ignore[possibly-undefined] + location = e.headers.get("Location") + if location and 300 <= e.code < 400: + return location, latency + return None, 0.0 + except (urllib.error.URLError, OSError, TimeoutError): + return None, 0.0 + + +# -- Discovery ----------------------------------------------------------------- + + +def get_current_slot(rpc_url: str) -> int | None: + """Get current slot from RPC.""" + result: object | None = rpc_post(rpc_url, "getSlot") + if isinstance(result, int): + return result + return None + + +def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: + """Get all RPC node addresses from getClusterNodes.""" + result: object | None = rpc_post(rpc_url, "getClusterNodes") + if not isinstance(result, list): + return [] + + rpc_addrs: list[str] = [] + for node in result: + if not isinstance(node, dict): + continue + if version_filter is not None: + node_version: str | None = node.get("version") + if node_version and not node_version.startswith(version_filter): + continue + rpc: str | None = node.get("rpc") + if rpc: + rpc_addrs.append(rpc) + return list(set(rpc_addrs)) + + +def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: + """Extract filename and full redirect path from Location header. + + Returns (filename, full_path). full_path includes any path prefix + the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). + """ + # Location may be absolute URL or relative path + if location.startswith("http://") or location.startswith("https://"): + # Absolute URL — extract path + from urllib.parse import urlparse + path: str = urlparse(location).path + else: + path = location + + filename: str = path.rsplit("/", 1)[-1] + return filename, path + + +def probe_rpc_snapshot( + rpc_address: str, + current_slot: int, +) -> SnapshotSource | None: + """Probe a single RPC node for available snapshots. + + Discovery only — no filtering. Returns a SnapshotSource with all available + info so the caller can decide what to keep. Filtering happens after all + probes complete, so rejected sources are still visible for debugging. + """ + full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" + + # Full snapshot is required — every source must have one + full_location, full_latency = head_no_follow(full_url, timeout=2) + if not full_location: + return None + + latency_ms: float = full_latency * 1000 + + full_filename, full_path = _parse_snapshot_filename(full_location) + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if not fm: + return None + + full_snap_slot: int = int(fm.group(1)) + slots_diff: int = current_slot - full_snap_slot + + file_paths: list[str] = [full_path] + + # Also check for incremental snapshot + inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if inc_location: + inc_filename, inc_path = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) + if m: + inc_base_slot: int = int(m.group(1)) + # Incremental must be based on this source's full snapshot + if inc_base_slot == full_snap_slot: + file_paths.append(inc_path) + + return SnapshotSource( + rpc_address=rpc_address, + file_paths=file_paths, + slots_diff=slots_diff, + latency_ms=latency_ms, + ) + + +def discover_sources( + rpc_url: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, + threads: int, + version_filter: str | None, +) -> list[SnapshotSource]: + """Discover all snapshot sources, then filter. + + Probing and filtering are separate: all reachable sources are collected + first so we can report what exists even if filters reject everything. + """ + rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) + if not rpc_nodes: + log.error("No RPC nodes found via getClusterNodes") + return [] + + log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) + + all_sources: list[SnapshotSource] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: + futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { + pool.submit(probe_rpc_snapshot, addr, current_slot): addr + for addr in rpc_nodes + } + done: int = 0 + for future in concurrent.futures.as_completed(futures): + done += 1 + if done % 200 == 0: + log.info(" probed %d/%d nodes, %d reachable", + done, len(rpc_nodes), len(all_sources)) + try: + result: SnapshotSource | None = future.result() + except (urllib.error.URLError, OSError, TimeoutError) as e: + log.debug("Probe failed for %s: %s", futures[future], e) + continue + if result: + all_sources.append(result) + + log.info("Discovered %d reachable sources", len(all_sources)) + + # Apply filters + filtered: list[SnapshotSource] = [] + rejected_age: int = 0 + rejected_latency: int = 0 + for src in all_sources: + if src.slots_diff > max_age_slots or src.slots_diff < -100: + rejected_age += 1 + continue + if src.latency_ms > max_latency_ms: + rejected_latency += 1 + continue + filtered.append(src) + + if rejected_age or rejected_latency: + log.info("Filtered: %d rejected by age (>%d slots), %d by latency (>%.0fms)", + rejected_age, max_age_slots, rejected_latency, max_latency_ms) + + if not filtered and all_sources: + # Show what was available so the user can adjust filters + all_sources.sort(key=lambda s: s.slots_diff) + best = all_sources[0] + log.warning("All %d sources rejected by filters. Best available: " + "%s (age=%d slots, latency=%.0fms). " + "Try --max-snapshot-age %d --max-latency %.0f", + len(all_sources), best.rpc_address, + best.slots_diff, best.latency_ms, + best.slots_diff + 500, + max(best.latency_ms * 1.5, 500)) + + log.info("Found %d sources after filtering", len(filtered)) + return filtered + + +# -- Speed benchmark ----------------------------------------------------------- + + +def measure_speed(rpc_address: str, measure_time: int = 7) -> float: + """Measure download speed from an RPC node. Returns bytes/sec.""" + url: str = f"http://{rpc_address}/snapshot.tar.bz2" + req = Request(url) + try: + with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: + start: float = time.monotonic() + total: int = 0 + while True: + elapsed: float = time.monotonic() - start + if elapsed >= measure_time: + break + chunk: bytes = resp.read(81920) + if not chunk: + break + total += len(chunk) + elapsed = time.monotonic() - start + if elapsed <= 0: + return 0.0 + return total / elapsed + except (urllib.error.URLError, OSError, TimeoutError): + return 0.0 + + +# -- Download ------------------------------------------------------------------ + + +def download_aria2c( + urls: list[str], + output_dir: str, + filename: str, + connections: int = 16, +) -> bool: + """Download a file using aria2c with parallel connections. + + When multiple URLs are provided, aria2c treats them as mirrors of the + same file and distributes chunks across all of them. + """ + num_mirrors: int = len(urls) + total_splits: int = max(connections, connections * num_mirrors) + cmd: list[str] = [ + "aria2c", + "--file-allocation=none", + "--continue=false", + f"--max-connection-per-server={connections}", + f"--split={total_splits}", + "--min-split-size=50M", + # aria2c retries individual chunk connections on transient network + # errors (TCP reset, timeout). This is transport-level retry analogous + # to TCP retransmit, not application-level retry of a failed operation. + "--max-tries=5", + "--retry-wait=5", + "--timeout=60", + "--connect-timeout=10", + "--summary-interval=10", + "--console-log-level=notice", + f"--dir={output_dir}", + f"--out={filename}", + "--auto-file-renaming=false", + "--allow-overwrite=true", + *urls, + ] + + log.info("Downloading %s", filename) + log.info(" aria2c: %d connections x %d mirrors (%d splits)", + connections, num_mirrors, total_splits) + + start: float = time.monotonic() + result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) + elapsed: float = time.monotonic() - start + + if result.returncode != 0: + log.error("aria2c failed with exit code %d", result.returncode) + return False + + filepath: Path = Path(output_dir) / filename + if not filepath.exists(): + log.error("aria2c reported success but %s does not exist", filepath) + return False + + size_bytes: int = filepath.stat().st_size + size_gb: float = size_bytes / (1024 ** 3) + avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 + log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) + return True + + +# -- Public API ---------------------------------------------------------------- + + +def download_best_snapshot( + output_dir: str, + *, + cluster: str = "mainnet-beta", + rpc_url: str | None = None, + connections: int = 16, + threads: int = 500, + max_snapshot_age: int = 10000, + max_latency: float = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, + full_only: bool = False, +) -> bool: + """Download the best available snapshot to output_dir. + + This is the programmatic API — called by entrypoint.py for automatic + snapshot download. Returns True on success, False on failure. + + All parameters have sensible defaults matching the CLI interface. + """ + resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] + + if not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return False + + log.info("Cluster: %s | RPC: %s", cluster, resolved_rpc) + current_slot: int | None = get_current_slot(resolved_rpc) + if current_slot is None: + log.error("Cannot get current slot from %s", resolved_rpc) + return False + log.info("Current slot: %d", current_slot) + + sources: list[SnapshotSource] = discover_sources( + resolved_rpc, current_slot, + max_age_slots=max_snapshot_age, + max_latency_ms=max_latency, + threads=threads, + version_filter=version_filter, + ) + if not sources: + log.error("No snapshot sources found") + return False + + # Sort by latency (lowest first) for speed benchmarking + sources.sort(key=lambda s: s.latency_ms) + + # Benchmark top candidates + log.info("Benchmarking download speed on top %d sources...", max_speed_checks) + fast_sources: list[SnapshotSource] = [] + checked: int = 0 + min_speed_bytes: int = min_download_speed * 1024 * 1024 + + for source in sources: + if checked >= max_speed_checks: + break + checked += 1 + + speed: float = measure_speed(source.rpc_address, measurement_time) + source.download_speed = speed + speed_mib: float = speed / (1024 ** 2) + + if speed < min_speed_bytes: + log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", + source.rpc_address, speed_mib, min_download_speed) + continue + + log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", + source.rpc_address, speed_mib, + source.latency_ms, source.slots_diff) + fast_sources.append(source) + + if not fast_sources: + log.error("No source met minimum speed requirement (%d MiB/s)", + min_download_speed) + return False + + # Use the fastest source as primary, collect mirrors for each file + best: SnapshotSource = fast_sources[0] + file_paths: list[str] = best.file_paths + if full_only: + file_paths = [fp for fp in file_paths + if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + + # Build mirror URL lists + download_plan: list[tuple[str, list[str]]] = [] + for fp in file_paths: + filename: str = fp.rsplit("/", 1)[-1] + mirror_urls: list[str] = [f"http://{best.rpc_address}{fp}"] + for other in fast_sources[1:]: + for other_fp in other.file_paths: + if other_fp.rsplit("/", 1)[-1] == filename: + mirror_urls.append(f"http://{other.rpc_address}{other_fp}") + break + download_plan.append((filename, mirror_urls)) + + speed_mib: float = best.download_speed / (1024 ** 2) + log.info("Best source: %s (%.1f MiB/s), %d mirrors total", + best.rpc_address, speed_mib, len(fast_sources)) + for filename, mirror_urls in download_plan: + log.info(" %s (%d mirrors)", filename, len(mirror_urls)) + + # Download + os.makedirs(output_dir, exist_ok=True) + total_start: float = time.monotonic() + + for filename, mirror_urls in download_plan: + filepath: Path = Path(output_dir) / filename + if filepath.exists() and filepath.stat().st_size > 0: + log.info("Skipping %s (already exists: %.1f GB)", + filename, filepath.stat().st_size / (1024 ** 3)) + continue + if not download_aria2c(mirror_urls, output_dir, filename, connections): + log.error("Failed to download %s", filename) + return False + + total_elapsed: float = time.monotonic() - total_start + log.info("All downloads complete in %.0fs", total_elapsed) + for filename, _ in download_plan: + fp_path: Path = Path(output_dir) / filename + if fp_path.exists(): + log.info(" %s (%.1f GB)", fp_path.name, fp_path.stat().st_size / (1024 ** 3)) + + return True + + +# -- Main (CLI) ---------------------------------------------------------------- + + +def main() -> int: + p: argparse.ArgumentParser = argparse.ArgumentParser( + description="Download Solana snapshots with aria2c parallel downloads", + ) + p.add_argument("-o", "--output", default="/srv/kind/solana/snapshots", + help="Snapshot output directory (default: /srv/kind/solana/snapshots)") + p.add_argument("-c", "--cluster", default="mainnet-beta", + choices=list(CLUSTER_RPC), + help="Solana cluster (default: mainnet-beta)") + p.add_argument("-r", "--rpc", default=None, + help="RPC URL for cluster discovery (default: public RPC)") + p.add_argument("-n", "--connections", type=int, default=16, + help="aria2c connections per download (default: 16)") + p.add_argument("-t", "--threads", type=int, default=500, + help="Threads for parallel RPC probing (default: 500)") + p.add_argument("--max-snapshot-age", type=int, default=10000, + help="Max snapshot age in slots (default: 10000)") + p.add_argument("--max-latency", type=float, default=500, + help="Max RPC probe latency in ms (default: 500)") + p.add_argument("--min-download-speed", type=int, default=20, + help="Min download speed in MiB/s (default: 20)") + p.add_argument("--measurement-time", type=int, default=7, + help="Speed measurement duration in seconds (default: 7)") + p.add_argument("--max-speed-checks", type=int, default=15, + help="Max nodes to benchmark before giving up (default: 15)") + p.add_argument("--version", default=None, + help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--full-only", action="store_true", + help="Download only full snapshot, skip incremental") + p.add_argument("--dry-run", action="store_true", + help="Find best source and print URL, don't download") + p.add_argument("--post-cmd", + help="Shell command to run after successful download " + "(e.g. 'kubectl scale deployment ... --replicas=1')") + p.add_argument("-v", "--verbose", action="store_true") + args: argparse.Namespace = p.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + ) + + # Dry-run uses the original inline flow (needs access to sources for URL printing) + if args.dry_run: + rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] + current_slot: int | None = get_current_slot(rpc_url) + if current_slot is None: + log.error("Cannot get current slot from %s", rpc_url) + return 1 + + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=args.max_snapshot_age, + max_latency_ms=args.max_latency, + threads=args.threads, + version_filter=args.version, + ) + if not sources: + log.error("No snapshot sources found") + return 1 + + sources.sort(key=lambda s: s.latency_ms) + best = sources[0] + for fp in best.file_paths: + print(f"http://{best.rpc_address}{fp}") + return 0 + + ok: bool = download_best_snapshot( + args.output, + cluster=args.cluster, + rpc_url=args.rpc, + connections=args.connections, + threads=args.threads, + max_snapshot_age=args.max_snapshot_age, + max_latency=args.max_latency, + min_download_speed=args.min_download_speed, + measurement_time=args.measurement_time, + max_speed_checks=args.max_speed_checks, + version_filter=args.version, + full_only=args.full_only, + ) + + if ok and args.post_cmd: + log.info("Running post-download command: %s", args.post_cmd) + result: subprocess.CompletedProcess[bytes] = subprocess.run( + args.post_cmd, shell=True, + ) + if result.returncode != 0: + log.error("Post-download command failed with exit code %d", + result.returncode) + return 1 + log.info("Post-download command completed successfully") + + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/start-test.sh b/start-test.sh new file mode 100644 index 00000000..e003a97a --- /dev/null +++ b/start-test.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------- +# Start solana-test-validator with optional SPL token setup +# +# Environment variables: +# FACILITATOR_PUBKEY - facilitator fee-payer public key (base58) +# SERVER_PUBKEY - server/payee wallet public key (base58) +# CLIENT_PUBKEY - client/payer wallet public key (base58) +# MINT_DECIMALS - token decimals (default: 6, matching USDC) +# MINT_AMOUNT - amount to mint to client (default: 1000000000) +# LEDGER_DIR - ledger directory (default: /data/ledger) +# ----------------------------------------------------------------------- + +LEDGER_DIR="${LEDGER_DIR:-/data/ledger}" +MINT_DECIMALS="${MINT_DECIMALS:-6}" +MINT_AMOUNT="${MINT_AMOUNT:-1000000000}" +SETUP_MARKER="${LEDGER_DIR}/.setup-done" + +sudo chown -R "$(id -u):$(id -g)" "$LEDGER_DIR" 2>/dev/null || true + +# Start test-validator in the background +solana-test-validator \ + --ledger "${LEDGER_DIR}" \ + --rpc-port 8899 \ + --bind-address 0.0.0.0 \ + --quiet & + +VALIDATOR_PID=$! + +# Wait for RPC to become available +echo "Waiting for test-validator RPC..." +for i in $(seq 1 60); do + if solana cluster-version --url http://127.0.0.1:8899 >/dev/null 2>&1; then + echo "Test-validator is ready (attempt ${i})" + break + fi + sleep 1 +done + +solana config set --url http://127.0.0.1:8899 + +# Only run setup once (idempotent via marker file) +if [ ! -f "${SETUP_MARKER}" ]; then + echo "Running first-time setup..." + + # Airdrop SOL to all wallets for gas + for PUBKEY in "${FACILITATOR_PUBKEY:-}" "${SERVER_PUBKEY:-}" "${CLIENT_PUBKEY:-}"; do + if [ -n "${PUBKEY}" ]; then + echo "Airdropping 100 SOL to ${PUBKEY}..." + solana airdrop 100 "${PUBKEY}" --url http://127.0.0.1:8899 || true + fi + done + + # Create a USDC-equivalent SPL token mint if any pubkeys are set + if [ -n "${CLIENT_PUBKEY:-}" ] || [ -n "${FACILITATOR_PUBKEY:-}" ] || [ -n "${SERVER_PUBKEY:-}" ]; then + MINT_AUTHORITY_FILE="${LEDGER_DIR}/mint-authority.json" + if [ ! -f "${MINT_AUTHORITY_FILE}" ]; then + solana-keygen new --no-bip39-passphrase --outfile "${MINT_AUTHORITY_FILE}" --force + MINT_AUTH_PUBKEY=$(solana-keygen pubkey "${MINT_AUTHORITY_FILE}") + solana airdrop 10 "${MINT_AUTH_PUBKEY}" --url http://127.0.0.1:8899 + fi + + MINT_ADDRESS_FILE="${LEDGER_DIR}/usdc-mint-address.txt" + if [ ! -f "${MINT_ADDRESS_FILE}" ]; then + spl-token create-token \ + --decimals "${MINT_DECIMALS}" \ + --mint-authority "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 \ + 2>&1 | grep "Creating token" | awk '{print $3}' > "${MINT_ADDRESS_FILE}" + echo "Created USDC mint: $(cat "${MINT_ADDRESS_FILE}")" + fi + + USDC_MINT=$(cat "${MINT_ADDRESS_FILE}") + + # Create ATAs and mint tokens for the client + if [ -n "${CLIENT_PUBKEY:-}" ]; then + echo "Creating ATA for client ${CLIENT_PUBKEY}..." + spl-token create-account "${USDC_MINT}" \ + --owner "${CLIENT_PUBKEY}" \ + --fee-payer "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + + echo "Minting ${MINT_AMOUNT} tokens to client..." + spl-token mint "${USDC_MINT}" "${MINT_AMOUNT}" \ + --recipient-owner "${CLIENT_PUBKEY}" \ + --mint-authority "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + fi + + # Create ATAs for server and facilitator + for PUBKEY in "${SERVER_PUBKEY:-}" "${FACILITATOR_PUBKEY:-}"; do + if [ -n "${PUBKEY}" ]; then + echo "Creating ATA for ${PUBKEY}..." + spl-token create-account "${USDC_MINT}" \ + --owner "${PUBKEY}" \ + --fee-payer "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + fi + done + + # Expose mint address for other containers + cp "${MINT_ADDRESS_FILE}" /tmp/usdc-mint-address.txt 2>/dev/null || true + fi + + touch "${SETUP_MARKER}" + echo "Setup complete." +fi + +echo "solana-test-validator running (PID ${VALIDATOR_PID})" +wait ${VALIDATOR_PID} From 3574e387cc0edbd642bab564c52d69ed25363c6b Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 19:13:53 +0000 Subject: [PATCH 35/62] fix: update playbooks to use subtree path for snapshot_download.py scripts/agave-container/ is a git subtree of agave-stack's container-build directory. Replaces fragile cross-repo symlink with proper subtree. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-recover.yml | 2 +- playbooks/biscayne-redeploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index de320902..38c2e1d3 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -36,7 +36,7 @@ snapshot_dir: /srv/kind/solana/snapshots accounts_dir: /srv/kind/solana/ramdisk/accounts ramdisk_mount: /srv/kind/solana/ramdisk - snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" + snapshot_script_local: "{{ playbook_dir }}/../scripts/agave-container/snapshot_download.py" snapshot_script: /tmp/snapshot-download.py snapshot_args: "" # Mainnet RPC for slot comparison diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 180e7f6f..8c2bd77d 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -62,7 +62,7 @@ accounts_dir: /srv/kind/solana/ramdisk/accounts ramdisk_mount: /srv/kind/solana/ramdisk ramdisk_size: 1024G - snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" + snapshot_script_local: "{{ playbook_dir }}/../scripts/agave-container/snapshot_download.py" snapshot_script: /tmp/snapshot-download.py # Flags — non-destructive by default wipe_accounts: false From bd38c1b791e74a465efa1a26f9a8354643c77aa9 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 05:14:43 +0000 Subject: [PATCH 36/62] fix: remove Ansible snapshot download, add sync-tools playbook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The container entrypoint (entrypoint.py) handles snapshot download internally via aria2c. Ansible no longer needs to scale-to-0, download, scale-to-1 — it just deploys and lets the container manage startup. - biscayne-redeploy.yml: remove snapshot download section, simplify to teardown → wipe → deploy → verify - biscayne-sync-tools.yml: new playbook to sync laconic-so and agave-stack repos on biscayne, with separate branch controls - snapshot_download.py: re-probe for fresh incremental after full snapshot download completes (old incremental is stale by then) - Switch laconic_so_branch to fix/kind-mount-propagation (has hostNetwork translation code) Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-redeploy.yml | 219 +++++-------------- playbooks/biscayne-start.yml | 25 ++- playbooks/biscayne-sync-tools.yml | 96 ++++++++ scripts/agave-container/snapshot_download.py | 50 ++++- 4 files changed, 211 insertions(+), 179 deletions(-) create mode 100644 playbooks/biscayne-sync-tools.yml diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 8c2bd77d..b4a0e670 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -1,46 +1,33 @@ --- -# Redeploy agave-stack on biscayne with aria2c snapshot pre-download +# Redeploy agave-stack on biscayne # -# The validator's built-in downloader fetches snapshots at ~18 MB/s (single -# connection). snapshot-download.py uses aria2c with 16 parallel connections to -# saturate available bandwidth, cutting 90+ min downloads to ~10 min. +# The container entrypoint (entrypoint.py) handles snapshot download and +# agave-validator startup internally. This playbook just manages the k8s +# lifecycle: teardown, optional data wipe, deploy, and verify. # # Flow: -# 1. [teardown] Delete k8s namespace (preserve kind cluster) +# 1. [teardown] Scale to 0, wait for clean exit, delete namespace # 2. [wipe] Conditionally clear ledger / accounts / old snapshots -# 3. [deploy] laconic-so deployment start, then immediately scale to 0 -# 4. [snapshot] Download snapshot via aria2c to host bind mount -# 5. [snapshot] Verify snapshot visible inside kind node -# 6. [deploy,scale-up] Scale validator back to 1 -# 7. [verify] Wait for pod Running, check logs + RPC health +# 3. [deploy] Preflight checks, laconic-so deployment start +# 4. [verify] Wait for pod Running, check logs + RPC health # -# The validator cannot run during snapshot download — it would lock/use the -# snapshot files. laconic-so creates the cluster AND deploys the pod in one -# shot, so we scale to 0 immediately after deploy, download, then scale to 1. +# The entrypoint.py inside the container: +# - Checks snapshot freshness against mainnet +# - Downloads fresh snapshot via aria2c if needed +# - Builds agave-validator args from env vars +# - Execs agave-validator # # Usage: -# # Standard redeploy (download snapshot, preserve accounts + ledger) -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml +# # Standard redeploy +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml # # # Full wipe (accounts + ledger) — slow rebuild -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml \ # -e wipe_accounts=true -e wipe_ledger=true # -# # Skip snapshot download (use existing) -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ -# -e skip_snapshot=true -# -# # Pass extra args to snapshot-download.py -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ -# -e 'snapshot_args=--version 2.2 --min-download-speed 50' -# -# # Snapshot only (no teardown/deploy) -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ -# --tags snapshot -# -# # Resume after partial failure (download snapshot, scale up, verify) -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ -# --tags snapshot,scale-up,verify +# # Skip snapshot cleanup (use existing) +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml \ +# -e skip_snapshot_cleanup=true # - name: Redeploy agave validator on biscayne hosts: all @@ -53,7 +40,7 @@ stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave laconic_so: /home/rix/.local/bin/laconic-so laconic_so_repo: /home/rix/stack-orchestrator - laconic_so_branch: main + laconic_so_branch: fix/kind-mount-propagation kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" @@ -62,13 +49,10 @@ accounts_dir: /srv/kind/solana/ramdisk/accounts ramdisk_mount: /srv/kind/solana/ramdisk ramdisk_size: 1024G - snapshot_script_local: "{{ playbook_dir }}/../scripts/agave-container/snapshot_download.py" - snapshot_script: /tmp/snapshot-download.py # Flags — non-destructive by default wipe_accounts: false wipe_ledger: false - skip_snapshot: false - snapshot_args: "" + skip_snapshot_cleanup: false tasks: # ---- teardown: graceful stop, then delete namespace ---------------------- @@ -121,12 +105,14 @@ tags: [teardown] - name: Clear stale claimRefs on Released PVs - ansible.builtin.shell: | - set -o pipefail - for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do - kubectl patch pv "$pv" --type json \ - -p '[{"op":"remove","path":"/spec/claimRef"}]' - done + ansible.builtin.shell: + cmd: | + set -o pipefail + for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do + kubectl patch pv "$pv" --type json \ + -p '[{"op":"remove","path":"/spec/claimRef"}]' + done + executable: /bin/bash register: pv_patch changed_when: pv_patch.stdout != "" tags: [teardown] @@ -151,20 +137,22 @@ tags: [wipe] - name: Clean old snapshots (keep newest full + incremental) - ansible.builtin.shell: | - set -o pipefail - cd {{ snapshot_dir }} || exit 0 - newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1) - if [ -n "$newest" ]; then - newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1) - find . -maxdepth 1 -name '*.tar.*' \ - ! -name "$newest" \ - ! -name "${newest_inc:-__none__}" \ - -delete - fi + ansible.builtin.shell: + cmd: | + set -o pipefail + cd {{ snapshot_dir }} || exit 0 + newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1) + if [ -n "$newest" ]; then + newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1) + find . -maxdepth 1 -name '*.tar.*' \ + ! -name "$newest" \ + ! -name "${newest_inc:-__none__}" \ + -delete + fi + executable: /bin/bash become: true changed_when: true - when: not skip_snapshot | bool + when: not skip_snapshot_cleanup | bool tags: [wipe] # ---- preflight: verify ramdisk and mounts before deploy ------------------ @@ -175,35 +163,16 @@ changed_when: false tags: [deploy, preflight] - - name: Verify ramdisk is xfs (not the underlying ZFS) + - name: Verify ramdisk is tmpfs (not the underlying ZFS) ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs + cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash register: ramdisk_type failed_when: ramdisk_type.rc != 0 changed_when: false tags: [deploy, preflight] - # ---- deploy: sync config, bring up cluster, scale to 0 ------------------ - - name: Pull agave-stack repo - ansible.builtin.shell: | - cd {{ stack_repo }} - git fetch origin - git reset --hard origin/{{ laconic_so_branch }} - changed_when: true - tags: [deploy] - - - name: Regenerate deployment config from updated stack - ansible.builtin.command: > - {{ laconic_so }} - --stack {{ stack_path }} - deploy create - --spec-file {{ deployment_dir }}/spec.yml - --deployment-dir {{ deployment_dir }} - --update - changed_when: true - tags: [deploy] - + # ---- deploy: bring up cluster, let entrypoint handle snapshot ------------ - name: Check kind-config.yml mount style ansible.builtin.command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" register: mount_root_check @@ -220,14 +189,6 @@ when: mount_root_check.stdout | default('0') | int < 1 tags: [deploy] - - name: Update laconic-so (editable install) - ansible.builtin.shell: | - cd {{ laconic_so_repo }} - git fetch origin - git reset --hard origin/{{ laconic_so_branch }} - changed_when: true - tags: [deploy] - - name: Start deployment (creates kind cluster + deploys pod) ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" register: deploy_start @@ -272,99 +233,17 @@ cmd: > set -o pipefail && docker exec {{ kind_cluster }}-control-plane - df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs + df -T /mnt/validator-accounts 2>/dev/null | grep -q tmpfs executable: /bin/bash register: kind_ramdisk_check failed_when: kind_ramdisk_check.rc != 0 changed_when: false tags: [deploy] - - name: Scale validator to 0 (stop before snapshot download) - ansible.builtin.command: > - kubectl scale deployment {{ deployment_name }} - -n {{ k8s_namespace }} --replicas=0 - changed_when: true - tags: [deploy] - - - name: Wait for pods to terminate - ansible.builtin.command: > - kubectl get pods -n {{ k8s_namespace }} - -l app={{ deployment_name }} - -o jsonpath='{.items}' - register: pods_gone - retries: 30 - delay: 5 - until: pods_gone.stdout == "[]" or pods_gone.stdout == "" - changed_when: false - failed_when: false - tags: [deploy] - - # ---- snapshot: download via aria2c, verify in kind node ------------------ - - name: Verify aria2c installed - ansible.builtin.command: which aria2c - changed_when: false - when: not skip_snapshot | bool - tags: [snapshot] - - - name: Copy snapshot script to remote - ansible.builtin.copy: - src: "{{ snapshot_script_local }}" - dest: "{{ snapshot_script }}" - mode: "0755" - when: not skip_snapshot | bool - tags: [snapshot] - - - name: Verify kind node mounts - ansible.builtin.command: > - docker exec {{ kind_cluster }}-control-plane - ls /mnt/validator-snapshots/ - register: kind_mount_check - changed_when: false - tags: [snapshot] - - - name: Download snapshot via aria2c - ansible.builtin.shell: > - python3 {{ snapshot_script }} - -o {{ snapshot_dir }} - {{ snapshot_args }} - become: true - register: snapshot_result - changed_when: true - when: not skip_snapshot | bool - timeout: 3600 - tags: [snapshot] - - - name: Show snapshot download result - ansible.builtin.debug: - msg: "{{ snapshot_result.stdout_lines | default(['skipped']) }}" - tags: [snapshot] - - - name: Verify snapshot visible inside kind node - ansible.builtin.shell: > - set -o pipefail && - docker exec {{ kind_cluster }}-control-plane - find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 - register: kind_snapshot_check - failed_when: kind_snapshot_check.stdout == "" - changed_when: false - when: not skip_snapshot | bool - tags: [snapshot] - - - name: Show snapshot files in kind node - ansible.builtin.debug: - msg: "{{ kind_snapshot_check.stdout_lines | default(['skipped']) }}" - when: not skip_snapshot | bool - tags: [snapshot] - - # ---- deploy (cont): scale validator back up with snapshot ---------------- - - name: Scale validator to 1 (start with downloaded snapshot) - ansible.builtin.command: > - kubectl scale deployment {{ deployment_name }} - -n {{ k8s_namespace }} --replicas=1 - changed_when: true - tags: [deploy, scale-up] - # ---- verify: confirm validator is running -------------------------------- + # The entrypoint.py handles snapshot download + agave-validator startup. + # Pod will be Running once the container starts, but agave-validator won't + # exec until after snapshot download completes (if needed). - name: Wait for pod to be running ansible.builtin.command: > kubectl get pods -n {{ k8s_namespace }} diff --git a/playbooks/biscayne-start.yml b/playbooks/biscayne-start.yml index 6c85699d..1eb82396 100644 --- a/playbooks/biscayne-start.yml +++ b/playbooks/biscayne-start.yml @@ -61,24 +61,33 @@ # laconic-so creates individual extraMounts per volume: # /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node) # /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts - - name: Verify kind node sees XFS at PV paths + - name: Verify kind node sees correct filesystems at PV paths ansible.builtin.shell: cmd: > set -o pipefail && docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts - | grep -c xfs executable: /bin/bash - register: kind_xfs_check + register: kind_fs_check changed_when: false - - name: Fail if PV paths are not XFS + - name: Fail if ledger is not XFS (zvol) ansible.builtin.fail: msg: >- - Expected 2 XFS mounts (validator-ledger, validator-accounts) but - found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml - and restart the kind container. - when: kind_xfs_check.stdout | int < 2 + validator-ledger must be XFS (on zvol). Got: + {{ kind_fs_check.stdout }} + when: "'xfs' not in kind_fs_check.stdout" + + - name: Fail if accounts is on ZFS (must be tmpfs) + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_node }} + df -T /mnt/validator-accounts | grep -q zfs + executable: /bin/bash + register: accounts_zfs_check + changed_when: false + failed_when: accounts_zfs_check.rc == 0 - name: Show kind node PV filesystems ansible.builtin.shell: diff --git a/playbooks/biscayne-sync-tools.yml b/playbooks/biscayne-sync-tools.yml new file mode 100644 index 00000000..a2d2ef19 --- /dev/null +++ b/playbooks/biscayne-sync-tools.yml @@ -0,0 +1,96 @@ +--- +# Sync laconic-so and agave-stack to latest on biscayne +# +# Updates both repos that laconic-so deployment commands depend on: +# - stack-orchestrator (laconic-so itself, editable install) +# - agave-stack (stack definitions, compose files, container scripts) +# +# Then regenerates the deployment config from the updated stack. +# Does NOT restart anything — just syncs code and config. +# +# Usage: +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml +# +# # Use a feature branch +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml \ +# -e laconic_so_branch=fix/kind-mount-propagation +# +- name: Sync laconic-so and agave-stack + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + deployment_dir: /srv/deployments/agave + stack_repo: /srv/deployments/agave-stack + stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave + laconic_so: /home/rix/.local/bin/laconic-so + laconic_so_repo: /home/rix/stack-orchestrator + laconic_so_branch: fix/kind-mount-propagation + stack_branch: main + + tasks: + - name: Update laconic-so (editable install) + ansible.builtin.shell: | + cd {{ laconic_so_repo }} + git fetch origin + git reset --hard origin/{{ laconic_so_branch }} + register: laconic_so_update + changed_when: true + + - name: Show laconic-so version + ansible.builtin.shell: + cmd: set -o pipefail && cd {{ laconic_so_repo }} && git log --oneline -1 + executable: /bin/bash + register: laconic_so_version + changed_when: false + + - name: Report laconic-so + ansible.builtin.debug: + msg: "laconic-so: {{ laconic_so_version.stdout }}" + + - name: Find SSH agent socket + ansible.builtin.shell: + cmd: set -o pipefail && ls -t /tmp/ssh-*/agent.* 2>/dev/null | head -1 + executable: /bin/bash + register: ssh_agent_socket + changed_when: false + failed_when: ssh_agent_socket.stdout == "" + + - name: Pull agave-stack repo + ansible.builtin.shell: | + export SSH_AUTH_SOCK={{ ssh_agent_socket.stdout }} + cd {{ stack_repo }} + git fetch origin + git reset --hard origin/{{ stack_branch }} + register: stack_update + changed_when: true + + - name: Show agave-stack version + ansible.builtin.shell: + cmd: set -o pipefail && cd {{ stack_repo }} && git log --oneline -1 + executable: /bin/bash + register: stack_version + changed_when: false + + - name: Report agave-stack + ansible.builtin.debug: + msg: "agave-stack: {{ stack_version.stdout }}" + + - name: Regenerate deployment config from updated stack + ansible.builtin.command: > + {{ laconic_so }} + --stack {{ stack_path }} + deploy create + --spec-file {{ deployment_dir }}/spec.yml + --deployment-dir {{ deployment_dir }} + --update + register: regen_result + changed_when: true + + - name: Report sync complete + ansible.builtin.debug: + msg: >- + Sync complete. laconic-so and agave-stack updated to + origin/{{ laconic_so_branch }}. Deployment config regenerated. + Restart or redeploy required to apply changes. diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py index 61a39019..9f9137ac 100644 --- a/scripts/agave-container/snapshot_download.py +++ b/scripts/agave-container/snapshot_download.py @@ -513,11 +513,18 @@ def download_best_snapshot( for filename, mirror_urls in download_plan: log.info(" %s (%d mirrors)", filename, len(mirror_urls)) - # Download + # Download — full snapshot first, then re-probe for fresh incremental os.makedirs(output_dir, exist_ok=True) total_start: float = time.monotonic() + # Separate full and incremental from the initial plan + full_downloads: list[tuple[str, list[str]]] = [] for filename, mirror_urls in download_plan: + if filename.startswith("snapshot-"): + full_downloads.append((filename, mirror_urls)) + + # Download full snapshot(s) + for filename, mirror_urls in full_downloads: filepath: Path = Path(output_dir) / filename if filepath.exists() and filepath.stat().st_size > 0: log.info("Skipping %s (already exists: %.1f GB)", @@ -527,6 +534,47 @@ def download_best_snapshot( log.error("Failed to download %s", filename) return False + # After full snapshot download, re-probe for a fresh incremental. + # The initial incremental is stale by now (full download takes 10+ min). + if not full_only: + # Get the full snapshot slot from the filename we just downloaded + full_filename: str = full_downloads[0][0] + fm_post: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if fm_post: + full_snap_slot: int = int(fm_post.group(1)) + log.info("Re-probing for fresh incremental based on slot %d...", full_snap_slot) + inc_downloaded: bool = False + for source in fast_sources: + inc_url_re: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url_re, timeout=2) + if not inc_location: + continue + inc_fn, inc_fp = _parse_snapshot_filename(inc_location) + m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) + if not m_inc: + continue + if int(m_inc.group(1)) != full_snap_slot: + log.debug(" %s: incremental base slot %s != full %d, skipping", + source.rpc_address, m_inc.group(1), full_snap_slot) + continue + # Found a matching incremental — build mirror list and download + inc_mirrors: list[str] = [f"http://{source.rpc_address}{inc_fp}"] + for other in fast_sources: + if other.rpc_address == source.rpc_address: + continue + other_loc, _ = head_no_follow( + f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2) + if other_loc: + other_fn, other_fp = _parse_snapshot_filename(other_loc) + if other_fn == inc_fn: + inc_mirrors.append(f"http://{other.rpc_address}{other_fp}") + log.info(" Found incremental %s (%d mirrors)", inc_fn, len(inc_mirrors)) + if download_aria2c(inc_mirrors, output_dir, inc_fn, connections): + inc_downloaded = True + break + if not inc_downloaded: + log.info("No matching incremental found — validator will replay from full snapshot") + total_elapsed: float = time.monotonic() - total_start log.info("All downloads complete in %.0fs", total_elapsed) for filename, _ in download_plan: From bfde58431eef3d61b99a101f6d8de25112d201e7 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 05:33:47 +0000 Subject: [PATCH 37/62] feat: rolling incremental snapshot download loop After the full snapshot downloads, continuously re-probe all fast sources for newer incrementals until the best available is within convergence_slots (default 500) of head. Each iteration finds the highest-slot incremental matching our full snapshot's base slot, downloads it (replacing any previous), and checks the gap to mainnet head. - Extract probe_incremental() from inline re-probe code - Add convergence_slots param to download_best_snapshot() (default 500) - Add --convergence-slots CLI arg - Pass SNAPSHOT_CONVERGENCE_SLOTS env var from entrypoint.py Co-Authored-By: Claude Opus 4.6 --- scripts/agave-container/entrypoint.py | 3 +- scripts/agave-container/snapshot_download.py | 152 +++++++++++++++---- 2 files changed, 122 insertions(+), 33 deletions(-) diff --git a/scripts/agave-container/entrypoint.py b/scripts/agave-container/entrypoint.py index 1122fc9c..519c7be2 100644 --- a/scripts/agave-container/entrypoint.py +++ b/scripts/agave-container/entrypoint.py @@ -158,7 +158,8 @@ def maybe_download_snapshot(snapshots_dir: str) -> None: sys.path.insert(0, str(script_dir)) from snapshot_download import download_best_snapshot - ok = download_best_snapshot(snapshots_dir) + convergence = int(env("SNAPSHOT_CONVERGENCE_SLOTS", "500")) + ok = download_best_snapshot(snapshots_dir, convergence_slots=convergence) if not ok: log.error("Snapshot download failed — starting without fresh snapshot") diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py index 9f9137ac..0abaa02a 100644 --- a/scripts/agave-container/snapshot_download.py +++ b/scripts/agave-container/snapshot_download.py @@ -343,6 +343,61 @@ def measure_speed(rpc_address: str, measure_time: int = 7) -> float: return 0.0 +# -- Incremental probing ------------------------------------------------------- + + +def probe_incremental( + fast_sources: list[SnapshotSource], + full_snap_slot: int, +) -> tuple[str | None, list[str]]: + """Probe fast sources for the best incremental matching full_snap_slot. + + Returns (filename, mirror_urls) or (None, []) if no match found. + The "best" incremental is the one with the highest slot (closest to head). + """ + best_filename: str | None = None + best_slot: int = 0 + best_source: SnapshotSource | None = None + best_path: str | None = None + + for source in fast_sources: + inc_url: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if not inc_location: + continue + inc_fn, inc_fp = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) + if not m: + continue + if int(m.group(1)) != full_snap_slot: + log.debug(" %s: incremental base slot %s != full %d, skipping", + source.rpc_address, m.group(1), full_snap_slot) + continue + inc_slot: int = int(m.group(2)) + if inc_slot > best_slot: + best_slot = inc_slot + best_filename = inc_fn + best_source = source + best_path = inc_fp + + if best_filename is None or best_source is None or best_path is None: + return None, [] + + # Build mirror list — check other sources for the same filename + mirror_urls: list[str] = [f"http://{best_source.rpc_address}{best_path}"] + for other in fast_sources: + if other.rpc_address == best_source.rpc_address: + continue + other_loc, _ = head_no_follow( + f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2) + if other_loc: + other_fn, other_fp = _parse_snapshot_filename(other_loc) + if other_fn == best_filename: + mirror_urls.append(f"http://{other.rpc_address}{other_fp}") + + return best_filename, mirror_urls + + # -- Download ------------------------------------------------------------------ @@ -423,6 +478,7 @@ def download_best_snapshot( max_speed_checks: int = 15, version_filter: str | None = None, full_only: bool = False, + convergence_slots: int = 500, ) -> bool: """Download the best available snapshot to output_dir. @@ -534,46 +590,75 @@ def download_best_snapshot( log.error("Failed to download %s", filename) return False - # After full snapshot download, re-probe for a fresh incremental. + # After full snapshot download, rolling incremental download loop. # The initial incremental is stale by now (full download takes 10+ min). + # Re-probe repeatedly until we find one close enough to head. if not full_only: - # Get the full snapshot slot from the filename we just downloaded full_filename: str = full_downloads[0][0] fm_post: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) if fm_post: full_snap_slot: int = int(fm_post.group(1)) - log.info("Re-probing for fresh incremental based on slot %d...", full_snap_slot) - inc_downloaded: bool = False - for source in fast_sources: - inc_url_re: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2" - inc_location, _ = head_no_follow(inc_url_re, timeout=2) - if not inc_location: - continue - inc_fn, inc_fp = _parse_snapshot_filename(inc_location) + log.info("Rolling incremental download (base slot %d, convergence %d slots)...", + full_snap_slot, convergence_slots) + prev_inc_filename: str | None = None + + while True: + inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) + if inc_fn is None: + if prev_inc_filename is None: + log.error("No matching incremental found for base slot %d " + "— validator will replay from full snapshot", full_snap_slot) + else: + log.info("No newer incremental available, using %s", prev_inc_filename) + break + + # Parse the incremental slot from the filename m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) - if not m_inc: + assert m_inc is not None # probe_incremental already validated + inc_slot: int = int(m_inc.group(2)) + + # Check convergence against current mainnet slot + head_slot: int | None = get_current_slot(resolved_rpc) + if head_slot is None: + log.warning("Cannot get current slot — downloading best available incremental") + gap: int = convergence_slots + 1 # force download, then break + else: + gap = head_slot - inc_slot + + # Skip download if we already have this exact incremental + if inc_fn == prev_inc_filename: + if gap <= convergence_slots: + log.info("Incremental %s already downloaded (gap %d slots, converged)", inc_fn, gap) + break + log.info("No newer incremental yet (slot %d, gap %d slots), waiting...", + inc_slot, gap) + time.sleep(10) continue - if int(m_inc.group(1)) != full_snap_slot: - log.debug(" %s: incremental base slot %s != full %d, skipping", - source.rpc_address, m_inc.group(1), full_snap_slot) - continue - # Found a matching incremental — build mirror list and download - inc_mirrors: list[str] = [f"http://{source.rpc_address}{inc_fp}"] - for other in fast_sources: - if other.rpc_address == source.rpc_address: - continue - other_loc, _ = head_no_follow( - f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2) - if other_loc: - other_fn, other_fp = _parse_snapshot_filename(other_loc) - if other_fn == inc_fn: - inc_mirrors.append(f"http://{other.rpc_address}{other_fp}") - log.info(" Found incremental %s (%d mirrors)", inc_fn, len(inc_mirrors)) - if download_aria2c(inc_mirrors, output_dir, inc_fn, connections): - inc_downloaded = True - break - if not inc_downloaded: - log.info("No matching incremental found — validator will replay from full snapshot") + + # Delete previous incremental before downloading the new one + if prev_inc_filename is not None: + old_path: Path = Path(output_dir) / prev_inc_filename + if old_path.exists(): + log.info("Removing superseded incremental %s", prev_inc_filename) + old_path.unlink() + + log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", + inc_fn, len(inc_mirrors), inc_slot, gap) + if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): + log.error("Failed to download incremental %s", inc_fn) + break + + prev_inc_filename = inc_fn + + if gap <= convergence_slots: + log.info("Converged: incremental slot %d is %d slots behind head", inc_slot, gap) + break + + if head_slot is None: + break + + log.info("Not converged (gap %d > %d), re-probing in 10s...", gap, convergence_slots) + time.sleep(10) total_elapsed: float = time.monotonic() - total_start log.info("All downloads complete in %.0fs", total_elapsed) @@ -615,6 +700,8 @@ def main() -> int: help="Max nodes to benchmark before giving up (default: 15)") p.add_argument("--version", default=None, help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--convergence-slots", type=int, default=500, + help="Max slot gap for incremental convergence (default: 500)") p.add_argument("--full-only", action="store_true", help="Download only full snapshot, skip incremental") p.add_argument("--dry-run", action="store_true", @@ -669,6 +756,7 @@ def main() -> int: max_speed_checks=args.max_speed_checks, version_filter=args.version, full_only=args.full_only, + convergence_slots=args.convergence_slots, ) if ok and args.post_cmd: From 601f520a457cb83fc39c56082afbefac382a03bb Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:11:19 +0000 Subject: [PATCH 38/62] fix: add 30-min wall-clock timeout to incremental convergence loop Without a bound, the loop runs forever if sources never serve an incremental close enough to head (e.g. full snapshot base slot is too old). After 30 minutes, proceed with the best incremental available or none. Co-Authored-By: Claude Opus 4.6 --- scripts/agave-container/snapshot_download.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py index 0abaa02a..151b2f26 100644 --- a/scripts/agave-container/snapshot_download.py +++ b/scripts/agave-container/snapshot_download.py @@ -601,8 +601,18 @@ def download_best_snapshot( log.info("Rolling incremental download (base slot %d, convergence %d slots)...", full_snap_slot, convergence_slots) prev_inc_filename: str | None = None + loop_start: float = time.monotonic() + max_convergence_time: float = 1800.0 # 30 min wall-clock limit while True: + if time.monotonic() - loop_start > max_convergence_time: + if prev_inc_filename: + log.warning("Convergence timeout (%.0fs) — using %s", + max_convergence_time, prev_inc_filename) + else: + log.warning("Convergence timeout (%.0fs) — no incremental downloaded", + max_convergence_time) + break inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) if inc_fn is None: if prev_inc_filename is None: From f842aba56a7fd76635285c2b22cad0b3b917343a Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:20:16 +0000 Subject: [PATCH 39/62] fix: sync-tools playbook uses agent forwarding, not socket hunting - Add become: false to git tasks so SSH_AUTH_SOCK survives (sudo drops it) - Fetch explicit branch names instead of bare `git fetch origin` - Remove the fragile `Find SSH agent socket` workaround Requires ForwardAgent yes in SSH config (added to ~/.ssh/config). Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-sync-tools.yml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/playbooks/biscayne-sync-tools.yml b/playbooks/biscayne-sync-tools.yml index a2d2ef19..76ba610e 100644 --- a/playbooks/biscayne-sync-tools.yml +++ b/playbooks/biscayne-sync-tools.yml @@ -30,15 +30,19 @@ stack_branch: main tasks: + # Git operations run as the connecting user (no become) so that + # SSH agent forwarding works. sudo drops SSH_AUTH_SOCK. - name: Update laconic-so (editable install) + become: false ansible.builtin.shell: | cd {{ laconic_so_repo }} - git fetch origin + git fetch origin {{ laconic_so_branch }} git reset --hard origin/{{ laconic_so_branch }} register: laconic_so_update changed_when: true - name: Show laconic-so version + become: false ansible.builtin.shell: cmd: set -o pipefail && cd {{ laconic_so_repo }} && git log --oneline -1 executable: /bin/bash @@ -49,24 +53,17 @@ ansible.builtin.debug: msg: "laconic-so: {{ laconic_so_version.stdout }}" - - name: Find SSH agent socket - ansible.builtin.shell: - cmd: set -o pipefail && ls -t /tmp/ssh-*/agent.* 2>/dev/null | head -1 - executable: /bin/bash - register: ssh_agent_socket - changed_when: false - failed_when: ssh_agent_socket.stdout == "" - - name: Pull agave-stack repo + become: false ansible.builtin.shell: | - export SSH_AUTH_SOCK={{ ssh_agent_socket.stdout }} cd {{ stack_repo }} - git fetch origin + git fetch origin {{ stack_branch }} git reset --hard origin/{{ stack_branch }} register: stack_update changed_when: true - name: Show agave-stack version + become: false ansible.builtin.shell: cmd: set -o pipefail && cd {{ stack_repo }} && git log --oneline -1 executable: /bin/bash From 3dc345ea7dcb0ff76a76ad9d79c45b43387b7a34 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:28:01 +0000 Subject: [PATCH 40/62] fix: recovery playbook delegates snapshot download to container entrypoint The container's entrypoint.py already handles snapshot freshness checks, cleanup, download (with rolling incremental convergence), and validator startup. Remove the host-side download and let the container do the work. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-recover.yml | 112 ++++++++++++++------------------- 1 file changed, 47 insertions(+), 65 deletions(-) diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index 38c2e1d3..53ebe3e9 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -10,19 +10,14 @@ # 2. Wait for pods to terminate # 3. Wipe accounts ramdisk # 4. Clean old snapshots -# 5. Download fresh snapshot via aria2c -# 6. Verify snapshot accessible via PV (kubectl) -# 7. Scale deployment to 1 -# 8. Wait for pod Running -# 9. Verify validator log shows snapshot unpacking -# 10. Check RPC health +# 5. Scale to 1 — container entrypoint downloads snapshot + starts validator +# 6. Verify snapshot freshness +# 7. Wait for pod Running +# 8. Verify validator log +# 9. Check RPC health # # Usage: -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-recover.yml -# -# # Pass extra args to snapshot-download.py -# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-recover.yml \ -# -e 'snapshot_args=--version 2.2' +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-recover.yml # - name: Recover agave validator hosts: all @@ -36,9 +31,6 @@ snapshot_dir: /srv/kind/solana/snapshots accounts_dir: /srv/kind/solana/ramdisk/accounts ramdisk_mount: /srv/kind/solana/ramdisk - snapshot_script_local: "{{ playbook_dir }}/../scripts/agave-container/snapshot_download.py" - snapshot_script: /tmp/snapshot-download.py - snapshot_args: "" # Mainnet RPC for slot comparison mainnet_rpc: https://api.mainnet-beta.solana.com # Maximum slots behind before snapshot is considered stale @@ -107,32 +99,45 @@ become: true changed_when: true - # ---- step 5: download fresh snapshot --------------------------------------- - - name: Verify aria2c installed - ansible.builtin.command: which aria2c - changed_when: false - - - name: Copy snapshot script to remote - ansible.builtin.copy: - src: "{{ snapshot_script_local }}" - dest: "{{ snapshot_script }}" - mode: "0755" - - - name: Download snapshot and scale to 1 - ansible.builtin.shell: | - python3 {{ snapshot_script }} \ - -o {{ snapshot_dir }} \ - --max-snapshot-age {{ max_slot_lag }} \ - --max-latency 500 \ - {{ snapshot_args }} \ - && KUBECONFIG=/home/rix/.kube/config kubectl scale deployment \ - {{ deployment_name }} -n {{ k8s_namespace }} --replicas=1 - become: true - register: snapshot_result - timeout: 3600 + # ---- step 5: scale to 1 — entrypoint handles snapshot download ------------ + # The container's entrypoint.py checks snapshot freshness, cleans stale + # snapshots, downloads fresh ones (with rolling incremental convergence), + # then starts the validator. No host-side download needed. + - name: Scale deployment to 1 + ansible.builtin.command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=1 changed_when: true - # ---- step 6: verify snapshot accessible via PV ----------------------------- + # ---- step 6: wait for pod running ------------------------------------------ + # The entrypoint downloads the snapshot before starting the validator. + # The pod reaches Running immediately (entrypoint is PID 1), but the + # validator log won't appear until download + startup completes. + - name: Wait for pod to be running + ansible.builtin.command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items[0].status.phase}' + register: pod_status + retries: 60 + delay: 10 + until: pod_status.stdout == "Running" + changed_when: false + + # ---- step 7: wait for snapshot download to complete ----------------------- + # The entrypoint writes the snapshot to the PV. Wait for it to appear + # on the host (zvol mount is shared). + - name: Wait for snapshot file to appear + ansible.builtin.shell: set -o pipefail && ls -1 {{ snapshot_dir }}/snapshot-*.tar.* 2>/dev/null | head -1 + args: + executable: /bin/bash + register: snapshot_file + retries: 180 + delay: 20 + until: snapshot_file.stdout != "" + changed_when: false + + # ---- step 8: verify snapshot freshness ------------------------------------ - name: Get snapshot filename ansible.builtin.shell: set -o pipefail && ls -1 {{ snapshot_dir }}/snapshot-*.tar.* | head -1 | xargs basename args: @@ -158,48 +163,25 @@ return_content: true register: mainnet_slot_response - - name: Check snapshot freshness - ansible.builtin.fail: - msg: >- - Snapshot too old: slot {{ snapshot_slot }}, mainnet at - {{ mainnet_slot_response.json.result }}, - {{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind - (max {{ max_slot_lag }}). - when: (mainnet_slot_response.json.result | int - snapshot_slot | int) > max_slot_lag - - name: Report snapshot freshness ansible.builtin.debug: msg: >- Snapshot slot {{ snapshot_slot }}, mainnet {{ mainnet_slot_response.json.result }}, {{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind. - # ---- step 7: scale already done in download step above ---------------------- - - # ---- step 8: wait for pod running ------------------------------------------ - - name: Wait for pod to be running - ansible.builtin.command: > - kubectl get pods -n {{ k8s_namespace }} - -l app={{ deployment_name }} - -o jsonpath='{.items[0].status.phase}' - register: pod_status - retries: 60 - delay: 10 - until: pod_status.stdout == "Running" - changed_when: false - - # ---- step 9: verify validator log ------------------------------------------ + # ---- step 9: wait for validator log --------------------------------------- - name: Wait for validator log file ansible.builtin.command: > kubectl exec -n {{ k8s_namespace }} deployment/{{ deployment_name }} -c agave-validator -- test -f /data/log/validator.log register: log_file_check - retries: 12 - delay: 10 + retries: 30 + delay: 20 until: log_file_check.rc == 0 changed_when: false - # ---- step 10: check RPC health --------------------------------------------- + # ---- step 10: check RPC health -------------------------------------------- - name: Check RPC health (non-blocking) ansible.builtin.uri: url: http://{{ inventory_hostname }}:8899/health From 09728a719c30553b217e60b05d2e58b7606af62e Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:39:25 +0000 Subject: [PATCH 41/62] fix: recovery playbook is fire-and-forget, add check-status.py The recovery playbook now exits after scaling to 1. The container entrypoint handles snapshot download (60+ min) and validator startup autonomously. Removed all polling/verification steps that would time out waiting. Added scripts/check-status.py for monitoring download progress, validator slot, gap to mainnet, catch-up rate, and ramdisk usage. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-recover.yml | 108 ++----------- scripts/check-status.py | 276 +++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 99 deletions(-) create mode 100755 scripts/check-status.py diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index 53ebe3e9..1d46c78e 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -7,14 +7,14 @@ # # Steps: # 1. Scale deployment to 0 -# 2. Wait for pods to terminate +# 2. Wait for pods to terminate (io_uring safety check) # 3. Wipe accounts ramdisk # 4. Clean old snapshots # 5. Scale to 1 — container entrypoint downloads snapshot + starts validator -# 6. Verify snapshot freshness -# 7. Wait for pod Running -# 8. Verify validator log -# 9. Check RPC health +# +# The playbook exits after step 5. The container handles snapshot download +# (60+ min) and validator startup autonomously. Monitor with: +# scripts/check-status.py --watch # # Usage: # ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-recover.yml @@ -31,10 +31,6 @@ snapshot_dir: /srv/kind/solana/snapshots accounts_dir: /srv/kind/solana/ramdisk/accounts ramdisk_mount: /srv/kind/solana/ramdisk - # Mainnet RPC for slot comparison - mainnet_rpc: https://api.mainnet-beta.solana.com - # Maximum slots behind before snapshot is considered stale - max_slot_lag: 20000 tasks: # ---- step 1: scale to 0 --------------------------------------------------- @@ -109,95 +105,9 @@ -n {{ k8s_namespace }} --replicas=1 changed_when: true - # ---- step 6: wait for pod running ------------------------------------------ - # The entrypoint downloads the snapshot before starting the validator. - # The pod reaches Running immediately (entrypoint is PID 1), but the - # validator log won't appear until download + startup completes. - - name: Wait for pod to be running - ansible.builtin.command: > - kubectl get pods -n {{ k8s_namespace }} - -l app={{ deployment_name }} - -o jsonpath='{.items[0].status.phase}' - register: pod_status - retries: 60 - delay: 10 - until: pod_status.stdout == "Running" - changed_when: false - - # ---- step 7: wait for snapshot download to complete ----------------------- - # The entrypoint writes the snapshot to the PV. Wait for it to appear - # on the host (zvol mount is shared). - - name: Wait for snapshot file to appear - ansible.builtin.shell: set -o pipefail && ls -1 {{ snapshot_dir }}/snapshot-*.tar.* 2>/dev/null | head -1 - args: - executable: /bin/bash - register: snapshot_file - retries: 180 - delay: 20 - until: snapshot_file.stdout != "" - changed_when: false - - # ---- step 8: verify snapshot freshness ------------------------------------ - - name: Get snapshot filename - ansible.builtin.shell: set -o pipefail && ls -1 {{ snapshot_dir }}/snapshot-*.tar.* | head -1 | xargs basename - args: - executable: /bin/bash - register: snapshot_filename - changed_when: false - - - name: Extract snapshot slot from filename - ansible.builtin.set_fact: - snapshot_slot: "{{ snapshot_filename.stdout | regex_search('snapshot-([0-9]+)-', '\\1') | first }}" - - - name: Get current mainnet slot - ansible.builtin.uri: - url: "{{ mainnet_rpc }}" - method: POST - body_format: json - body: - jsonrpc: "2.0" - id: 1 - method: getSlot - params: - - commitment: finalized - return_content: true - register: mainnet_slot_response - - - name: Report snapshot freshness + - name: Report ansible.builtin.debug: msg: >- - Snapshot slot {{ snapshot_slot }}, mainnet {{ mainnet_slot_response.json.result }}, - {{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind. - - # ---- step 9: wait for validator log --------------------------------------- - - name: Wait for validator log file - ansible.builtin.command: > - kubectl exec -n {{ k8s_namespace }} - deployment/{{ deployment_name }} - -c agave-validator -- test -f /data/log/validator.log - register: log_file_check - retries: 30 - delay: 20 - until: log_file_check.rc == 0 - changed_when: false - - # ---- step 10: check RPC health -------------------------------------------- - - name: Check RPC health (non-blocking) - ansible.builtin.uri: - url: http://{{ inventory_hostname }}:8899/health - return_content: true - register: rpc_health - retries: 6 - delay: 30 - until: rpc_health.status == 200 - failed_when: false - - - name: Report final status - ansible.builtin.debug: - msg: >- - Recovery complete. - Snapshot: slot {{ snapshot_slot }} - ({{ mainnet_slot_response.json.result | int - snapshot_slot | int }} slots behind). - Pod: {{ pod_status.stdout }}. - Log: {{ 'writing' if log_file_check.rc == 0 else 'not yet' }}. - RPC: {{ rpc_health.content | default('not yet responding — still catching up') }}. + Recovery initiated. The container entrypoint will download a fresh + snapshot and start the validator. Monitor progress with: + scripts/check-status.py --watch diff --git a/scripts/check-status.py b/scripts/check-status.py new file mode 100755 index 00000000..ae0dc4b4 --- /dev/null +++ b/scripts/check-status.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python3 +"""Check agave validator and snapshot download status on biscayne. + +Runs kubectl and host commands over SSH to report: + - Pod phase and container states + - Entrypoint logs (snapshot download progress) + - Snapshot files on disk + - Validator slot vs mainnet slot (gap + catch-up rate) + - Ramdisk usage + +Usage: + scripts/check-status.py # one-shot + scripts/check-status.py --watch # repeat every 30s + scripts/check-status.py --watch -i 10 # repeat every 10s +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +import time +import urllib.request + +# -- Config ------------------------------------------------------------------- + +SSH_HOST = "biscayne.vaasl.io" +KUBECONFIG = "/home/rix/.kube/config" +NAMESPACE = "laconic-laconic-70ce4c4b47e23b85" +DEPLOYMENT = "laconic-70ce4c4b47e23b85-deployment" +KIND_CONTAINER = "laconic-70ce4c4b47e23b85-control-plane" +SNAPSHOT_DIR = "/srv/kind/solana/snapshots" +RAMDISK = "/srv/kind/solana/ramdisk" +MAINNET_RPC = "https://api.mainnet-beta.solana.com" + + +# -- Helpers ------------------------------------------------------------------ + + +def ssh(cmd: str, timeout: int = 15) -> tuple[int, str]: + """Run a command on biscayne via SSH. Returns (rc, stdout).""" + r = subprocess.run( + ["ssh", SSH_HOST, cmd], + capture_output=True, text=True, timeout=timeout, + ) + return r.returncode, r.stdout.strip() + + +def kubectl(args: str, timeout: int = 15) -> tuple[int, str]: + """Run kubectl on biscayne.""" + return ssh(f"KUBECONFIG={KUBECONFIG} kubectl {args}", timeout) + + +def get_mainnet_slot() -> int | None: + """Query mainnet for current finalized slot.""" + req = urllib.request.Request( + MAINNET_RPC, + data=json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": "getSlot", + "params": [{"commitment": "finalized"}], + }).encode(), + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + return json.loads(resp.read())["result"] + except Exception: + return None + + +# -- Checks ------------------------------------------------------------------- + + +def check_pod() -> dict: + """Get pod phase and container statuses.""" + rc, out = kubectl( + f"get pods -n {NAMESPACE} -l app={DEPLOYMENT} " + "-o json" + ) + if rc != 0 or not out: + return {"phase": "NoPod", "containers": {}} + + data = json.loads(out) + if not data.get("items"): + return {"phase": "NoPod", "containers": {}} + + pod = data["items"][0] + phase = pod["status"].get("phase", "Unknown") + containers = {} + for cs in pod["status"].get("containerStatuses", []): + state_key = list(cs["state"].keys())[0] + state = cs["state"][state_key] + reason = state.get("reason", "") + detail = f"{state_key}" + if reason: + detail += f"({reason})" + containers[cs["name"]] = { + "ready": cs["ready"], + "state": detail, + "restarts": cs["restartCount"], + } + return {"phase": phase, "containers": containers} + + +def check_entrypoint_logs(lines: int = 15) -> str: + """Get recent entrypoint logs from the agave-validator container.""" + rc, out = kubectl( + f"logs -n {NAMESPACE} deployment/{DEPLOYMENT} " + f"-c agave-validator --tail={lines}", + timeout=20, + ) + return out if rc == 0 else "(no logs)" + + +def check_snapshots() -> list[dict]: + """List snapshot files on disk with sizes.""" + rc, out = ssh( + f"ls -lhS {SNAPSHOT_DIR}/*.tar.* 2>/dev/null " + f"|| echo 'NO_SNAPSHOTS'" + ) + if "NO_SNAPSHOTS" in out: + return [] + + files = [] + for line in out.splitlines(): + parts = line.split() + if len(parts) >= 9: + files.append({"size": parts[4], "name": parts[-1].split("/")[-1]}) + return files + + +def check_validator_slot() -> int | None: + """Query the validator's current processed slot via RPC.""" + rc, out = kubectl( + f"exec -n {NAMESPACE} deployment/{DEPLOYMENT} " + f"-c agave-validator -- " + "curl -s -X POST -H 'Content-Type: application/json' " + "-d '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"getSlot\"," + "\"params\":[{\"commitment\":\"processed\"}]}' " + "http://localhost:8899", + timeout=10, + ) + if rc != 0 or not out: + return None + try: + return json.loads(out)["result"] + except (json.JSONDecodeError, KeyError): + return None + + +def check_ramdisk() -> str: + """Get ramdisk usage.""" + rc, out = ssh(f"df -h {RAMDISK} | tail -1") + if rc != 0: + return "unknown" + parts = out.split() + if len(parts) >= 5: + return f"{parts[2]}/{parts[1]} ({parts[4]})" + return out + + +# -- Display ------------------------------------------------------------------ + + +prev_slot: int | None = None +prev_time: float | None = None + + +def display(iteration: int = 0) -> None: + """Run all checks and print status.""" + global prev_slot, prev_time + + now = time.time() + ts = time.strftime("%H:%M:%S") + + # Gather data + pod = check_pod() + mainnet = get_mainnet_slot() + snapshots = check_snapshots() + ramdisk = check_ramdisk() + + print(f"\n{'=' * 60}") + print(f" Biscayne Agave Status — {ts}") + print(f"{'=' * 60}") + + # Pod + print(f"\n Pod: {pod['phase']}") + for name, cs in pod["containers"].items(): + ready = "✓" if cs["ready"] else "✗" + restarts = f" (restarts: {cs['restarts']})" if cs["restarts"] > 0 else "" + print(f" {ready} {name}: {cs['state']}{restarts}") + + # Validator slot + validator_slot = None + if pod["phase"] == "Running": + agave = pod["containers"].get("agave-validator", {}) + if agave.get("ready"): + validator_slot = check_validator_slot() + + if validator_slot is not None and mainnet is not None: + gap = mainnet - validator_slot + rate = "" + if prev_slot is not None and prev_time is not None: + dt = now - prev_time + if dt > 0: + slots_gained = validator_slot - prev_slot + # Net rate = our replay rate minus chain production + net_rate = slots_gained / dt + if net_rate > 0: + eta_sec = gap / net_rate + eta_min = eta_sec / 60 + rate = f" net {net_rate:+.1f} slots/s, ETA ~{eta_min:.0f}m" + else: + rate = f" net {net_rate:+.1f} slots/s (falling behind)" + prev_slot = validator_slot + prev_time = now + print(f"\n Validator: slot {validator_slot:,}") + print(f" Mainnet: slot {mainnet:,}") + print(f" Gap: {gap:,} slots{rate}") + elif mainnet is not None: + print(f"\n Validator: not responding (downloading or starting)") + print(f" Mainnet: slot {mainnet:,}") + else: + print(f"\n Mainnet: unreachable") + + # Snapshots + if snapshots: + print(f"\n Snapshots:") + for s in snapshots: + print(f" {s['size']:>6s} {s['name']}") + else: + print(f"\n Snapshots: none on disk") + + # Ramdisk + print(f" Ramdisk: {ramdisk}") + + # Entrypoint logs (only if validator not yet responding) + if validator_slot is None and pod["phase"] in ("Running", "Pending"): + logs = check_entrypoint_logs(10) + if logs and logs != "(no logs)": + print(f"\n Entrypoint logs (last 10 lines):") + for line in logs.splitlines(): + print(f" {line}") + + print() + + +# -- Main --------------------------------------------------------------------- + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument("--watch", action="store_true", help="Repeat every interval") + p.add_argument("-i", "--interval", type=int, default=30, + help="Watch interval in seconds (default: 30)") + args = p.parse_args() + + try: + if args.watch: + i = 0 + while True: + display(i) + i += 1 + time.sleep(args.interval) + else: + display() + except KeyboardInterrupt: + print() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From ed6f6bfd598912ba945ba9673be027ed64390de0 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:46:17 +0000 Subject: [PATCH 42/62] fix: check-status.py pod label selector matches actual k8s labels The pod label is app=laconic-70ce4c4b47e23b85, not app=laconic-70ce4c4b47e23b85-deployment. Co-Authored-By: Claude Opus 4.6 --- scripts/check-status.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/check-status.py b/scripts/check-status.py index ae0dc4b4..8289d0f2 100755 --- a/scripts/check-status.py +++ b/scripts/check-status.py @@ -29,6 +29,7 @@ SSH_HOST = "biscayne.vaasl.io" KUBECONFIG = "/home/rix/.kube/config" NAMESPACE = "laconic-laconic-70ce4c4b47e23b85" DEPLOYMENT = "laconic-70ce4c4b47e23b85-deployment" +POD_LABEL = "laconic-70ce4c4b47e23b85" KIND_CONTAINER = "laconic-70ce4c4b47e23b85-control-plane" SNAPSHOT_DIR = "/srv/kind/solana/snapshots" RAMDISK = "/srv/kind/solana/ramdisk" @@ -76,7 +77,7 @@ def get_mainnet_slot() -> int | None: def check_pod() -> dict: """Get pod phase and container statuses.""" rc, out = kubectl( - f"get pods -n {NAMESPACE} -l app={DEPLOYMENT} " + f"get pods -n {NAMESPACE} -l app={POD_LABEL} " "-o json" ) if rc != 0 or not out: From 173b807451b182bd8ddd96a54c9f1f1bed0a99e3 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 06:48:19 +0000 Subject: [PATCH 43/62] fix: check-status.py discovers cluster-id from deployment.yml Instead of hardcoding the laconic cluster ID, namespace, deployment name, and pod label, read cluster-id from deployment.yml on biscayne and derive everything from it. Co-Authored-By: Claude Opus 4.6 --- scripts/check-status.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/scripts/check-status.py b/scripts/check-status.py index 8289d0f2..2f9cf131 100755 --- a/scripts/check-status.py +++ b/scripts/check-status.py @@ -27,14 +27,38 @@ import urllib.request SSH_HOST = "biscayne.vaasl.io" KUBECONFIG = "/home/rix/.kube/config" -NAMESPACE = "laconic-laconic-70ce4c4b47e23b85" -DEPLOYMENT = "laconic-70ce4c4b47e23b85-deployment" -POD_LABEL = "laconic-70ce4c4b47e23b85" -KIND_CONTAINER = "laconic-70ce4c4b47e23b85-control-plane" +DEPLOYMENT_DIR = "/srv/deployments/agave" SNAPSHOT_DIR = "/srv/kind/solana/snapshots" RAMDISK = "/srv/kind/solana/ramdisk" MAINNET_RPC = "https://api.mainnet-beta.solana.com" +# Derived from deployment.yml on first connect +CLUSTER_ID: str = "" +NAMESPACE: str = "" +DEPLOYMENT: str = "" +POD_LABEL: str = "" +KIND_CONTAINER: str = "" + + +# -- Discovery ---------------------------------------------------------------- + + +def discover() -> None: + """Read cluster-id from deployment.yml and derive all identifiers.""" + global CLUSTER_ID, NAMESPACE, DEPLOYMENT, POD_LABEL, KIND_CONTAINER + rc, out = ssh( + f"grep '^cluster-id:' {DEPLOYMENT_DIR}/deployment.yml " + "| awk '{print $2}'" + ) + if rc != 0 or not out: + print(f"ERROR: cannot read cluster-id from {DEPLOYMENT_DIR}/deployment.yml") + sys.exit(1) + CLUSTER_ID = out.strip() + NAMESPACE = f"laconic-{CLUSTER_ID}" + DEPLOYMENT = f"{CLUSTER_ID}-deployment" + POD_LABEL = CLUSTER_ID + KIND_CONTAINER = f"{CLUSTER_ID}-control-plane" + # -- Helpers ------------------------------------------------------------------ @@ -259,6 +283,8 @@ def main() -> int: help="Watch interval in seconds (default: 30)") args = p.parse_args() + discover() + try: if args.watch: i = 0 From b88af2be70cc9ae0d4a009c2136adfc1254adc70 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Mon, 9 Mar 2026 07:58:37 +0000 Subject: [PATCH 44/62] feat: graceful shutdown, ZFS upgrade, storage migration, sync-tools build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - entrypoint.py: Python stays PID 1, traps SIGTERM, requests graceful exit via admin RPC (agave-validator exit --force) before falling back to signals - snapshot_download.py: fix break-on-failure bug in incremental download loop (continue + re-probe instead of giving up) - biscayne-upgrade-zfs.yml: upgrade ZFS 2.2.2 → 2.2.9 via arter97/zfs-lts PPA to fix io_uring deadlock at kernel module level - biscayne-migrate-storage.yml: one-time migration from zvol/XFS to ZFS dataset (zvol workaround no longer needed with graceful shutdown + ZFS fix) - biscayne-stop.yml: patch terminationGracePeriodSeconds to 300 before scaling to 0, updated docs for admin RPC shutdown - biscayne-sync-tools.yml: fix SSH agent forwarding (vars: ansible_become), add --tags build-container support, add set -e to shell blocks - biscayne-recover.yml: updated for graceful shutdown awareness - check-status.py: add --pane flag for tmux, clean redraw in watch mode - CLAUDE.md: update docs for ZFS dataset storage, graceful shutdown Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 63 ++-- playbooks/biscayne-migrate-storage.yml | 286 +++++++++++++++++++ playbooks/biscayne-recover.yml | 16 +- playbooks/biscayne-stop.yml | 22 +- playbooks/biscayne-sync-tools.yml | 47 ++- playbooks/biscayne-upgrade-zfs.yml | 158 ++++++++++ scripts/agave-container/entrypoint.py | 93 +++++- scripts/agave-container/snapshot_download.py | 5 +- scripts/check-status.py | 25 +- 9 files changed, 661 insertions(+), 54 deletions(-) create mode 100644 playbooks/biscayne-migrate-storage.yml create mode 100644 playbooks/biscayne-upgrade-zfs.yml diff --git a/CLAUDE.md b/CLAUDE.md index 6fb2164c..f8cd1ee3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,16 +10,16 @@ below it are correct. Playbooks belong to exactly one layer. | 1. Base system | Docker, ZFS, packages | Out of scope (manual/PXE) | | 2. Prepare kind | `/srv/kind` exists (ZFS dataset) | None needed (ZFS handles it) | | 3. Install kind | `laconic-so deployment start` creates kind cluster, mounts `/srv/kind` → `/mnt` in kind node | `biscayne-redeploy.yml` (deploy tags) | -| 4. Prepare agave | Host storage for agave: zvol, ramdisk, rbind into `/srv/kind/solana` | `biscayne-prepare-agave.yml` | +| 4. Prepare agave | Host storage for agave: ZFS dataset, ramdisk | `biscayne-prepare-agave.yml` | | 5. Deploy agave | Deploy agave-stack into kind, snapshot download, scale up | `biscayne-redeploy.yml` (snapshot/verify tags), `biscayne-recover.yml` | **Layer 4 invariants** (asserted by `biscayne-prepare-agave.yml`): -- `/srv/kind/solana` is XFS on a zvol — agave uses io_uring which deadlocks on ZFS. `/srv/solana` is NOT the zvol (it's a ZFS dataset directory); never use it for data paths +- `/srv/kind/solana` is a ZFS dataset (`biscayne/DATA/srv/kind/solana`), child of the `/srv/kind` dataset - `/srv/kind/solana/ramdisk` is tmpfs (1TB) — accounts must be in RAM +- `/srv/solana` is NOT the data path — it's a directory on the parent ZFS dataset. All data paths use `/srv/kind/solana` These invariants are checked at runtime and persisted to fstab/systemd so they -survive reboot. They are agave's requirements reaching into the boot sequence, -not base system concerns. +survive reboot. **Cross-cutting**: `health-check.yml` (read-only diagnostics), `biscayne-stop.yml` (layer 5 — graceful shutdown), `fix-pv-mounts.yml` (layer 5 — PV repair). @@ -30,11 +30,8 @@ not base system concerns. The agave validator runs inside a kind-based k8s cluster managed by `laconic-so`. The kind node is a Docker container. **Never restart or kill the kind node container -while the validator is running.** Agave uses `io_uring` for async I/O, and on ZFS, -killing the process can produce unkillable kernel threads (D-state in -`io_wq_put_and_exit` blocked on ZFS transaction commits). This deadlocks the -container's PID namespace, making `docker stop`, `docker restart`, `docker exec`, -and even `reboot` hang. +while the validator is running.** Use `agave-validator exit --force` via the admin +RPC socket for graceful shutdown, or scale the deployment to 0 and wait. Correct shutdown sequence: @@ -61,15 +58,16 @@ The accounts directory must be in RAM for performance. tmpfs is used instead of `/dev/ram0` — simpler (no format-on-boot service needed), resizable on the fly with `mount -o remount,size=`, and what most Solana operators use. -**Boot ordering**: fstab entry mounts tmpfs at `/srv/kind/solana/ramdisk` with -`x-systemd.requires=srv-kind-solana.mount`. tmpfs mounts natively via fstab — -no systemd format service needed. **No manual intervention after reboot.** +**Boot ordering**: `/srv/kind/solana` is a ZFS dataset mounted automatically by +`zfs-mount.service`. The tmpfs ramdisk fstab entry uses +`x-systemd.requires=zfs-mount.service` to ensure the dataset is mounted first. +**No manual intervention after reboot.** **Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt` at container start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts -(commit `a11d40f2` in stack-orchestrator), so host submounts (like the rbind at -`/srv/kind/solana`) propagate into the kind node automatically. A kind restart -is required to pick up the new config after updating laconic-so. +(commit `a11d40f2` in stack-orchestrator), so host submounts propagate into the +kind node automatically. A kind restart is required to pick up the new config +after updating laconic-so. ### KUBECONFIG @@ -92,21 +90,20 @@ Then export it: export SSH_AUTH_SOCK=/tmp/ssh-XXXX/agent.NNNN ``` -### io_uring/ZFS Deadlock — Root Cause +### io_uring/ZFS Deadlock — Historical Note -When agave-validator is killed while performing I/O against ZFS-backed paths (not -the ramdisk), io_uring worker threads get stuck in D-state: -``` -io_wq_put_and_exit → dsl_dir_tempreserve_space (ZFS module) -``` -These threads are unkillable (SIGKILL has no effect on D-state processes). They -prevent the container's PID namespace from being reaped (`zap_pid_ns_processes` -waits forever), which breaks `docker stop`, `docker restart`, `docker exec`, and -even `reboot`. The only fix is a hard power cycle. +Agave uses io_uring for async I/O. Killing agave ungracefully while it has +outstanding I/O against ZFS can produce unkillable D-state kernel threads +(`io_wq_put_and_exit` blocked on ZFS transactions), deadlocking the container. -**Prevention**: Always scale the deployment to 0 and wait for the pod to terminate -before any destructive operation (namespace delete, kind restart, host reboot). -The `biscayne-stop.yml` playbook enforces this. +**Prevention**: Use graceful shutdown (`agave-validator exit --force` via admin +RPC, or scale to 0 and wait). The `biscayne-stop.yml` playbook enforces this. +With graceful shutdown, io_uring contexts are closed cleanly and ZFS storage +is safe to use directly (no zvol/XFS workaround needed). + +**ZFS fix**: The underlying io_uring bug is fixed in ZFS 2.2.8+ (PR #17298). +Biscayne currently runs ZFS 2.2.2. Upgrading ZFS will eliminate the deadlock +risk entirely, even for ungraceful shutdowns. ### laconic-so Architecture @@ -133,11 +130,11 @@ kind node via a single bind mount. - Deployment: `laconic-70ce4c4b47e23b85-deployment` - Kind node container: `laconic-70ce4c4b47e23b85-control-plane` - Deployment dir: `/srv/deployments/agave` -- Snapshot dir: `/srv/kind/solana/snapshots` (on zvol, visible to kind at `/mnt/validator-snapshots`) -- Ledger dir: `/srv/kind/solana/ledger` (on zvol, visible to kind at `/mnt/validator-ledger`) -- Accounts dir: `/srv/kind/solana/ramdisk/accounts` (on ramdisk `/dev/ram0`, visible to kind at `/mnt/validator-accounts`) -- Log dir: `/srv/kind/solana/log` (on zvol, visible to kind at `/mnt/validator-log`) -- **WARNING**: `/srv/solana` is a ZFS dataset directory, NOT the zvol. Never use it for data paths. +- Snapshot dir: `/srv/kind/solana/snapshots` (ZFS dataset, visible to kind at `/mnt/validator-snapshots`) +- Ledger dir: `/srv/kind/solana/ledger` (ZFS dataset, visible to kind at `/mnt/validator-ledger`) +- Accounts dir: `/srv/kind/solana/ramdisk/accounts` (tmpfs ramdisk, visible to kind at `/mnt/validator-accounts`) +- Log dir: `/srv/kind/solana/log` (ZFS dataset, visible to kind at `/mnt/validator-log`) +- **WARNING**: `/srv/solana` is a different ZFS dataset directory. All data paths use `/srv/kind/solana`. - Host bind mount root: `/srv/kind` -> kind node `/mnt` - laconic-so: `/home/rix/.local/bin/laconic-so` (editable install) diff --git a/playbooks/biscayne-migrate-storage.yml b/playbooks/biscayne-migrate-storage.yml new file mode 100644 index 00000000..995b0001 --- /dev/null +++ b/playbooks/biscayne-migrate-storage.yml @@ -0,0 +1,286 @@ +--- +# One-time migration: zvol/XFS → ZFS dataset for /srv/kind/solana +# +# Background: +# Biscayne used a ZFS zvol formatted as XFS to work around io_uring/ZFS +# deadlocks. The root cause is now handled by graceful shutdown via admin +# RPC (agave-validator exit --force), so the zvol/XFS layer is unnecessary. +# +# What this does: +# 1. Asserts the validator is scaled to 0 (does NOT scale it — that's +# the operator's job via biscayne-stop.yml) +# 2. Creates a child ZFS dataset biscayne/DATA/srv/kind/solana +# 3. Copies data from the zvol to the new dataset (rsync) +# 4. Updates fstab (removes zvol line, fixes tmpfs dependency) +# 5. Destroys the zvol after verification +# +# Prerequisites: +# - Validator MUST be stopped (scale 0, no agave processes) +# - Run biscayne-stop.yml first +# +# Usage: +# ansible-playbook -i inventory/ playbooks/biscayne-migrate-storage.yml +# +# After migration, run biscayne-prepare-agave.yml to update its checks, +# then biscayne-start.yml to bring the validator back up. +# +- name: Migrate storage from zvol/XFS to ZFS dataset + hosts: all + gather_facts: false + become: true + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + deployment_name: "{{ kind_cluster }}-deployment" + zvol_device: /dev/zvol/biscayne/DATA/volumes/solana + zvol_dataset: biscayne/DATA/volumes/solana + new_dataset: biscayne/DATA/srv/kind/solana + kind_solana_dir: /srv/kind/solana + ramdisk_mount: /srv/kind/solana/ramdisk + ramdisk_size: 1024G + # Temporary mount for zvol during data copy + zvol_tmp_mount: /mnt/zvol-migration-tmp + + tasks: + # ---- preconditions -------------------------------------------------------- + - name: Check deployment replica count + ansible.builtin.command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.spec.replicas}' + register: current_replicas + failed_when: false + changed_when: false + + - name: Fail if validator is running + ansible.builtin.fail: + msg: >- + Validator must be scaled to 0 before migration. + Current replicas: {{ current_replicas.stdout | default('unknown') }}. + Run biscayne-stop.yml first. + when: current_replicas.stdout | default('0') | int > 0 + + - name: Verify no agave processes in kind node + ansible.builtin.command: > + docker exec {{ kind_cluster }}-control-plane + pgrep -c agave-validator + register: agave_procs + failed_when: false + changed_when: false + + - name: Fail if agave still running + ansible.builtin.fail: + msg: >- + agave-validator process still running inside kind node. + Cannot migrate while validator is active. + when: agave_procs.rc == 0 + + # ---- check current state -------------------------------------------------- + - name: Check if zvol device exists + ansible.builtin.stat: + path: "{{ zvol_device }}" + register: zvol_exists + + - name: Check if ZFS dataset already exists + ansible.builtin.command: zfs list -H -o name {{ new_dataset }} + register: dataset_exists + failed_when: false + changed_when: false + + - name: Check current mount type at {{ kind_solana_dir }} + ansible.builtin.shell: + cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} + executable: /bin/bash + register: current_fstype + failed_when: false + changed_when: false + + - name: Report current state + ansible.builtin.debug: + msg: + zvol_exists: "{{ zvol_exists.stat.exists | default(false) }}" + dataset_exists: "{{ dataset_exists.rc == 0 }}" + current_fstype: "{{ current_fstype.stdout | default('none') }}" + + # ---- skip if already migrated --------------------------------------------- + - name: End play if already on ZFS dataset + ansible.builtin.meta: end_play + when: + - dataset_exists.rc == 0 + - current_fstype.stdout | default('') == 'zfs' + - not (zvol_exists.stat.exists | default(false)) + + # ---- step 1: unmount ramdisk and zvol ------------------------------------ + - name: Unmount ramdisk + ansible.posix.mount: + path: "{{ ramdisk_mount }}" + state: unmounted + + - name: Unmount zvol from {{ kind_solana_dir }} + ansible.posix.mount: + path: "{{ kind_solana_dir }}" + state: unmounted + when: current_fstype.stdout | default('') == 'xfs' + + # ---- step 2: create ZFS dataset ----------------------------------------- + - name: Create ZFS dataset {{ new_dataset }} + ansible.builtin.command: > + zfs create -o mountpoint={{ kind_solana_dir }} {{ new_dataset }} + changed_when: true + when: dataset_exists.rc != 0 + + - name: Mount ZFS dataset if it already existed + ansible.builtin.command: zfs mount {{ new_dataset }} + changed_when: true + failed_when: false + when: dataset_exists.rc == 0 + + - name: Verify ZFS dataset is mounted + ansible.builtin.shell: + cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q zfs + executable: /bin/bash + changed_when: false + + # ---- step 3: copy data from zvol ---------------------------------------- + - name: Create temporary mount point for zvol + ansible.builtin.file: + path: "{{ zvol_tmp_mount }}" + state: directory + mode: "0755" + when: zvol_exists.stat.exists | default(false) + + - name: Mount zvol at temporary location + ansible.posix.mount: + path: "{{ zvol_tmp_mount }}" + src: "{{ zvol_device }}" + fstype: xfs + state: mounted + when: zvol_exists.stat.exists | default(false) + + - name: Copy data from zvol to ZFS dataset # noqa: command-instead-of-module + ansible.builtin.command: > + rsync -a --info=progress2 + --exclude='ramdisk/' + {{ zvol_tmp_mount }}/ + {{ kind_solana_dir }}/ + changed_when: true + when: zvol_exists.stat.exists | default(false) + + # ---- step 4: verify data integrity -------------------------------------- + - name: Check key directories exist on new dataset + ansible.builtin.stat: + path: "{{ kind_solana_dir }}/{{ item }}" + register: dir_checks + loop: + - ledger + - snapshots + - log + + - name: Report directory verification + ansible.builtin.debug: + msg: "{{ item.item }}: {{ 'exists' if item.stat.exists else 'MISSING' }}" + loop: "{{ dir_checks.results }}" + loop_control: + label: "{{ item.item }}" + + # ---- step 5: update fstab ------------------------------------------------ + - name: Remove zvol fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+zvol\S+\s+{{ kind_solana_dir }}\s' + state: absent + register: fstab_zvol_removed + + # Also match any XFS entry for kind_solana_dir (non-zvol form) + - name: Remove any XFS fstab entry for {{ kind_solana_dir }} + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ kind_solana_dir }}\s+xfs' + state: absent + + # ZFS datasets are mounted by zfs-mount.service automatically. + # The tmpfs ramdisk depends on the solana dir existing, which ZFS + # guarantees via zfs-mount.service. Update the systemd dependency. + - name: Update tmpfs ramdisk fstab entry + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: '^\S+\s+{{ ramdisk_mount }}\s' + line: "tmpfs {{ ramdisk_mount }} tmpfs nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }},nofail,x-systemd.requires=zfs-mount.service 0 0" + + - name: Reload systemd # noqa: no-handler + ansible.builtin.systemd: + daemon_reload: true + when: fstab_zvol_removed.changed + + # ---- step 6: mount ramdisk ----------------------------------------------- + - name: Mount tmpfs ramdisk + ansible.posix.mount: + path: "{{ ramdisk_mount }}" + src: tmpfs + fstype: tmpfs + opts: "nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }}" + state: mounted + + - name: Ensure accounts directory + ansible.builtin.file: + path: "{{ ramdisk_mount }}/accounts" + state: directory + owner: solana + group: solana + mode: "0755" + + # ---- step 7: clean up zvol ----------------------------------------------- + - name: Unmount zvol from temporary location + ansible.posix.mount: + path: "{{ zvol_tmp_mount }}" + state: unmounted + when: zvol_exists.stat.exists | default(false) + + - name: Remove temporary mount point + ansible.builtin.file: + path: "{{ zvol_tmp_mount }}" + state: absent + + - name: Destroy zvol {{ zvol_dataset }} + ansible.builtin.command: zfs destroy {{ zvol_dataset }} + changed_when: true + when: zvol_exists.stat.exists | default(false) + + # ---- step 8: ensure shared propagation for docker ------------------------ + - name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module + ansible.builtin.command: + cmd: mount --make-shared {{ item }} + loop: + - "{{ kind_solana_dir }}" + - "{{ ramdisk_mount }}" + changed_when: false + + # ---- verification --------------------------------------------------------- + - name: Verify solana dir is ZFS + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q zfs + executable: /bin/bash + changed_when: false + + - name: Verify ramdisk is tmpfs + ansible.builtin.shell: + cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs + executable: /bin/bash + changed_when: false + + - name: Verify zvol is destroyed + ansible.builtin.command: zfs list -H -o name {{ zvol_dataset }} + register: zvol_gone + failed_when: zvol_gone.rc == 0 + changed_when: false + + - name: Migration complete + ansible.builtin.debug: + msg: >- + Storage migration complete. + {{ kind_solana_dir }} is now a ZFS dataset ({{ new_dataset }}). + Ramdisk at {{ ramdisk_mount }} (tmpfs, {{ ramdisk_size }}). + zvol {{ zvol_dataset }} destroyed. + Next: update biscayne-prepare-agave.yml, then start the validator. diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index 1d46c78e..ea0d9b3e 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -10,7 +10,8 @@ # 2. Wait for pods to terminate (io_uring safety check) # 3. Wipe accounts ramdisk # 4. Clean old snapshots -# 5. Scale to 1 — container entrypoint downloads snapshot + starts validator +# 5. Ensure terminationGracePeriodSeconds is 300 (for graceful shutdown) +# 6. Scale to 1 — container entrypoint downloads snapshot + starts validator # # The playbook exits after step 5. The container handles snapshot download # (60+ min) and validator startup autonomously. Monitor with: @@ -95,7 +96,18 @@ become: true changed_when: true - # ---- step 5: scale to 1 — entrypoint handles snapshot download ------------ + # ---- step 5: ensure terminationGracePeriodSeconds ------------------------- + # laconic-so doesn't support this declaratively. Patch the deployment so + # k8s gives the entrypoint 300s to perform graceful shutdown via admin RPC. + - name: Ensure terminationGracePeriodSeconds is 300 + ansible.builtin.command: > + kubectl patch deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -p '{"spec":{"template":{"spec":{"terminationGracePeriodSeconds":300}}}}' + register: patch_result + changed_when: "'no change' not in patch_result.stdout" + + # ---- step 6: scale to 1 — entrypoint handles snapshot download ------------ # The container's entrypoint.py checks snapshot freshness, cleans stale # snapshots, downloads fresh ones (with rolling incremental convergence), # then starts the validator. No host-side download needed. diff --git a/playbooks/biscayne-stop.yml b/playbooks/biscayne-stop.yml index 2f9290f6..4a83ca63 100644 --- a/playbooks/biscayne-stop.yml +++ b/playbooks/biscayne-stop.yml @@ -5,11 +5,12 @@ # This MUST be done before any kind node restart, host reboot, # or docker operations. # -# The agave validator uses io_uring for async I/O. On ZFS, killing -# the process ungracefully (SIGKILL, docker kill, etc.) can produce -# unkillable kernel threads stuck in io_wq_put_and_exit, deadlocking -# the container's PID namespace. A graceful SIGTERM via k8s scale-down -# allows agave to flush and close its io_uring contexts cleanly. +# The container entrypoint (PID 1) traps SIGTERM and runs +# ``agave-validator exit --force --ledger /data/ledger`` which tells +# the validator to flush I/O and exit cleanly via the admin RPC Unix +# socket. This avoids the io_uring/ZFS deadlock that occurs when the +# process is killed. terminationGracePeriodSeconds must be set to 300 +# on the k8s deployment to allow time for the flush. # # Usage: # # Stop the validator @@ -42,6 +43,17 @@ failed_when: false changed_when: false + # Ensure k8s gives the entrypoint enough time for graceful shutdown + # via admin RPC before sending SIGKILL. + - name: Ensure terminationGracePeriodSeconds is 300 + ansible.builtin.command: > + kubectl patch deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -p '{"spec":{"template":{"spec":{"terminationGracePeriodSeconds":300}}}}' + register: patch_result + changed_when: "'no change' not in patch_result.stdout" + when: current_replicas.stdout | default('0') | int > 0 + - name: Scale deployment to 0 ansible.builtin.command: > kubectl scale deployment {{ deployment_name }} diff --git a/playbooks/biscayne-sync-tools.yml b/playbooks/biscayne-sync-tools.yml index 76ba610e..dfecd340 100644 --- a/playbooks/biscayne-sync-tools.yml +++ b/playbooks/biscayne-sync-tools.yml @@ -15,6 +15,10 @@ # ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml \ # -e laconic_so_branch=fix/kind-mount-propagation # +# # Sync and rebuild the agave container image +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml \ +# --tags build-container +# - name: Sync laconic-so and agave-stack hosts: all gather_facts: false @@ -30,49 +34,55 @@ stack_branch: main tasks: - # Git operations run as the connecting user (no become) so that - # SSH agent forwarding works. sudo drops SSH_AUTH_SOCK. - name: Update laconic-so (editable install) - become: false ansible.builtin.shell: | + set -e cd {{ laconic_so_repo }} git fetch origin {{ laconic_so_branch }} git reset --hard origin/{{ laconic_so_branch }} + vars: + ansible_become: false register: laconic_so_update changed_when: true + tags: [sync, build-container] - name: Show laconic-so version - become: false ansible.builtin.shell: cmd: set -o pipefail && cd {{ laconic_so_repo }} && git log --oneline -1 executable: /bin/bash register: laconic_so_version changed_when: false + tags: [sync, build-container] - name: Report laconic-so ansible.builtin.debug: msg: "laconic-so: {{ laconic_so_version.stdout }}" + tags: [sync, build-container] - name: Pull agave-stack repo - become: false ansible.builtin.shell: | + set -e cd {{ stack_repo }} git fetch origin {{ stack_branch }} git reset --hard origin/{{ stack_branch }} + vars: + ansible_become: false register: stack_update changed_when: true + tags: [sync, build-container] - name: Show agave-stack version - become: false ansible.builtin.shell: cmd: set -o pipefail && cd {{ stack_repo }} && git log --oneline -1 executable: /bin/bash register: stack_version changed_when: false + tags: [sync, build-container] - name: Report agave-stack ansible.builtin.debug: msg: "agave-stack: {{ stack_version.stdout }}" + tags: [sync, build-container] - name: Regenerate deployment config from updated stack ansible.builtin.command: > @@ -84,6 +94,7 @@ --update register: regen_result changed_when: true + tags: [sync, build-container] - name: Report sync complete ansible.builtin.debug: @@ -91,3 +102,27 @@ Sync complete. laconic-so and agave-stack updated to origin/{{ laconic_so_branch }}. Deployment config regenerated. Restart or redeploy required to apply changes. + tags: [sync, build-container] + + # ---- optional: rebuild container image -------------------------------------- + # Only runs when explicitly requested with --tags build-container. + # Safe to run while the validator is running — just builds a new image. + # The running pod keeps the old image until restarted. + - name: Build agave container image + ansible.builtin.command: > + {{ laconic_so }} + --stack {{ stack_path }} + build-containers + --include laconicnetwork-agave + tags: + - build-container + - never + register: build_result + changed_when: true + + - name: Report build complete + ansible.builtin.debug: + msg: "Container image built. Will be used on next pod restart." + tags: + - build-container + - never diff --git a/playbooks/biscayne-upgrade-zfs.yml b/playbooks/biscayne-upgrade-zfs.yml new file mode 100644 index 00000000..a1b38c9d --- /dev/null +++ b/playbooks/biscayne-upgrade-zfs.yml @@ -0,0 +1,158 @@ +--- +# Upgrade ZFS from 2.2.2 to 2.2.9 via arter97's zfs-lts PPA +# +# Fixes the io_uring deadlock (OpenZFS PR #17298) at the kernel module level. +# After this upgrade, the zvol/XFS workaround is unnecessary and can be removed +# with biscayne-migrate-storage.yml. +# +# PPA: ppa:arter97/zfs-lts (Juhyung Park, OpenZFS contributor) +# Builds from source on Launchpad — transparent, auditable. +# +# WARNING: This playbook triggers a reboot at the end. If the io_uring zombie +# is present, the reboot WILL HANG. The operator must hard power cycle the +# machine (IPMI/iDRAC/physical). The playbook does not wait for the reboot — +# run the verify tag separately after the machine comes back. +# +# Usage: +# # Full upgrade (adds PPA, upgrades, reboots) +# ansible-playbook -i inventory/ playbooks/biscayne-upgrade-zfs.yml +# +# # Verify after reboot +# ansible-playbook -i inventory/ playbooks/biscayne-upgrade-zfs.yml \ +# --tags verify +# +# # Dry run — show what would be upgraded +# ansible-playbook -i inventory/ playbooks/biscayne-upgrade-zfs.yml \ +# --tags dry-run +# +- name: Upgrade ZFS via arter97/zfs-lts PPA + hosts: all + gather_facts: true + become: true + vars: + zfs_min_version: "2.2.8" + ppa_name: "ppa:arter97/zfs-lts" + zfs_packages: + - zfsutils-linux + - zfs-dkms + - libzfs5linux + + tasks: + # ---- pre-flight checks ---------------------------------------------------- + - name: Get current ZFS version + ansible.builtin.command: modinfo -F version zfs + register: zfs_current_version + changed_when: false + tags: [always] + + - name: Report current ZFS version + ansible.builtin.debug: + msg: "Current ZFS: {{ zfs_current_version.stdout }}" + tags: [always] + + - name: Skip if already upgraded + ansible.builtin.meta: end_play + when: zfs_current_version.stdout is version(zfs_min_version, '>=') + tags: [always] + + # ---- dry run --------------------------------------------------------------- + - name: Show available ZFS packages from PPA (dry run) + ansible.builtin.shell: + cmd: > + set -o pipefail && + apt-cache policy zfsutils-linux zfs-dkms 2>/dev/null | + grep -A2 'zfsutils-linux\|zfs-dkms' + executable: /bin/bash + changed_when: false + failed_when: false + tags: + - dry-run + - never + + # ---- add PPA --------------------------------------------------------------- + - name: Add arter97/zfs-lts PPA + ansible.builtin.apt_repository: + repo: "{{ ppa_name }}" + state: present + update_cache: true + tags: [upgrade] + + # ---- upgrade ZFS packages -------------------------------------------------- + - name: Upgrade ZFS packages + ansible.builtin.apt: + name: "{{ zfs_packages }}" + state: latest # noqa: package-latest + update_cache: true + register: zfs_upgrade + tags: [upgrade] + + - name: Show upgrade result + ansible.builtin.debug: + msg: "{{ zfs_upgrade.stdout_lines | default(['no output']) }}" + tags: [upgrade] + + # ---- reboot ---------------------------------------------------------------- + - name: Report pre-reboot status + ansible.builtin.debug: + msg: >- + ZFS packages upgraded. Rebooting now. + If the io_uring zombie is present, this reboot WILL HANG. + Hard power cycle the machine, then run this playbook with + --tags verify to confirm the upgrade. + tags: [upgrade] + + - name: Reboot to load new ZFS modules + ansible.builtin.reboot: + msg: "ZFS upgrade — rebooting to load new kernel modules" + reboot_timeout: 600 + tags: [upgrade] + # This will timeout if io_uring zombie blocks shutdown. + # Operator must hard power cycle. That's expected. + + # ---- post-reboot verification ----------------------------------------------- + - name: Get ZFS version after reboot + ansible.builtin.command: modinfo -F version zfs + register: zfs_new_version + changed_when: false + tags: + - verify + - never + + - name: Verify ZFS version meets minimum + ansible.builtin.assert: + that: + - zfs_new_version.stdout is version(zfs_min_version, '>=') + fail_msg: >- + ZFS version {{ zfs_new_version.stdout }} is below minimum + {{ zfs_min_version }}. Upgrade may have failed. + success_msg: "ZFS {{ zfs_new_version.stdout }} — io_uring fix confirmed." + tags: + - verify + - never + + - name: Verify ZFS pools are healthy + ansible.builtin.command: zpool status -x + register: zpool_status + changed_when: false + failed_when: "'all pools are healthy' not in zpool_status.stdout" + tags: + - verify + - never + + - name: Verify ZFS datasets are mounted + ansible.builtin.command: zfs mount + register: zfs_mounts + changed_when: false + tags: + - verify + - never + + - name: Report verification + ansible.builtin.debug: + msg: + zfs_version: "{{ zfs_new_version.stdout }}" + pools: "{{ zpool_status.stdout }}" + mounts: "{{ zfs_mounts.stdout_lines }}" + tags: + - verify + - never diff --git a/scripts/agave-container/entrypoint.py b/scripts/agave-container/entrypoint.py index 519c7be2..20961624 100644 --- a/scripts/agave-container/entrypoint.py +++ b/scripts/agave-container/entrypoint.py @@ -2,12 +2,17 @@ """Agave validator entrypoint — snapshot management, arg construction, liveness probe. Two subcommands: - entrypoint.py serve (default) — snapshot freshness check + exec agave-validator + entrypoint.py serve (default) — snapshot freshness check + run agave-validator entrypoint.py probe — liveness probe (slot lag check, exits 0/1) Replaces the bash entrypoint.sh / start-rpc.sh / start-validator.sh with a single Python module. Test mode still dispatches to start-test.sh. +Python stays as PID 1 and traps SIGTERM. On SIGTERM, it runs +``agave-validator exit --force --ledger /data/ledger`` which connects to the +admin RPC Unix socket and tells the validator to flush I/O and exit cleanly. +This avoids the io_uring/ZFS deadlock that occurs when the process is killed. + All configuration comes from environment variables — same vars as the original bash scripts. See compose files for defaults. """ @@ -18,8 +23,10 @@ import json import logging import os import re +import signal import subprocess import sys +import threading import time import urllib.error import urllib.request @@ -365,11 +372,77 @@ def append_extra_args(args: list[str]) -> list[str]: return args +# -- Graceful shutdown -------------------------------------------------------- + +# Timeout for graceful exit via admin RPC. Leave 30s margin for k8s +# terminationGracePeriodSeconds (300s). +GRACEFUL_EXIT_TIMEOUT = 270 + + +def graceful_exit(child: subprocess.Popen[bytes]) -> None: + """Request graceful shutdown via the admin RPC Unix socket. + + Runs ``agave-validator exit --force --ledger /data/ledger`` which connects + to the admin RPC socket at ``/data/ledger/admin.rpc`` and sets the + validator's exit flag. The validator flushes all I/O and exits cleanly, + avoiding the io_uring/ZFS deadlock. + + If the admin RPC exit fails or the child doesn't exit within the timeout, + falls back to SIGTERM then SIGKILL. + """ + log.info("SIGTERM received — requesting graceful exit via admin RPC") + try: + result = subprocess.run( + ["agave-validator", "exit", "--force", "--ledger", LEDGER_DIR], + capture_output=True, text=True, timeout=30, + ) + if result.returncode == 0: + log.info("Admin RPC exit requested successfully") + else: + log.warning( + "Admin RPC exit returned %d: %s", + result.returncode, result.stderr.strip(), + ) + except subprocess.TimeoutExpired: + log.warning("Admin RPC exit command timed out after 30s") + except FileNotFoundError: + log.warning("agave-validator binary not found for exit command") + + # Wait for child to exit + try: + child.wait(timeout=GRACEFUL_EXIT_TIMEOUT) + log.info("Validator exited cleanly with code %d", child.returncode) + return + except subprocess.TimeoutExpired: + log.warning( + "Validator did not exit within %ds — sending SIGTERM", + GRACEFUL_EXIT_TIMEOUT, + ) + + # Fallback: SIGTERM + child.terminate() + try: + child.wait(timeout=15) + log.info("Validator exited after SIGTERM with code %d", child.returncode) + return + except subprocess.TimeoutExpired: + log.warning("Validator did not exit after SIGTERM — sending SIGKILL") + + # Last resort: SIGKILL + child.kill() + child.wait() + log.info("Validator killed with SIGKILL, code %d", child.returncode) + + # -- Serve subcommand --------------------------------------------------------- def cmd_serve() -> None: - """Main serve flow: snapshot check, setup, exec agave-validator.""" + """Main serve flow: snapshot check, setup, run agave-validator as child. + + Python stays as PID 1 and traps SIGTERM to perform graceful shutdown + via the admin RPC Unix socket. + """ mode = env("AGAVE_MODE", "test") log.info("AGAVE_MODE=%s", mode) @@ -407,7 +480,21 @@ def cmd_serve() -> None: Path("/tmp/entrypoint-start").write_text(str(time.time())) log.info("Starting agave-validator with %d arguments", len(args)) - os.execvp("agave-validator", ["agave-validator"] + args) + child = subprocess.Popen(["agave-validator"] + args) + + # Forward SIGUSR1 to child (log rotation) + signal.signal(signal.SIGUSR1, lambda _sig, _frame: child.send_signal(signal.SIGUSR1)) + + # Trap SIGTERM — run graceful_exit in a thread so the signal handler returns + # immediately and child.wait() in the main thread can observe the exit. + def _on_sigterm(_sig: int, _frame: object) -> None: + threading.Thread(target=graceful_exit, args=(child,), daemon=True).start() + + signal.signal(signal.SIGTERM, _on_sigterm) + + # Wait for child — if it exits on its own (crash, normal exit), propagate code + child.wait() + sys.exit(child.returncode) # -- Probe subcommand --------------------------------------------------------- diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py index 151b2f26..146b7291 100644 --- a/scripts/agave-container/snapshot_download.py +++ b/scripts/agave-container/snapshot_download.py @@ -655,8 +655,9 @@ def download_best_snapshot( log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", inc_fn, len(inc_mirrors), inc_slot, gap) if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): - log.error("Failed to download incremental %s", inc_fn) - break + log.warning("Failed to download incremental %s — re-probing in 10s", inc_fn) + time.sleep(10) + continue prev_inc_filename = inc_fn diff --git a/scripts/check-status.py b/scripts/check-status.py index 2f9cf131..b3974392 100755 --- a/scripts/check-status.py +++ b/scripts/check-status.py @@ -18,6 +18,7 @@ from __future__ import annotations import argparse import json +import os import subprocess import sys import time @@ -206,9 +207,11 @@ def display(iteration: int = 0) -> None: snapshots = check_snapshots() ramdisk = check_ramdisk() - print(f"\n{'=' * 60}") - print(f" Biscayne Agave Status — {ts}") - print(f"{'=' * 60}") + # Clear screen and home cursor for clean redraw in watch mode + if iteration > 0: + print("\033[2J\033[H", end="") + + print(f"\n Biscayne Agave Status — {ts}\n") # Pod print(f"\n Pod: {pod['phase']}") @@ -275,14 +278,30 @@ def display(iteration: int = 0) -> None: # -- Main --------------------------------------------------------------------- +def spawn_tmux_pane(interval: int) -> None: + """Launch this script with --watch in a new tmux pane.""" + script = os.path.abspath(__file__) + cmd = f"python3 {script} --watch -i {interval}" + subprocess.run( + ["tmux", "split-window", "-h", "-d", cmd], + check=True, + ) + + def main() -> int: p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) p.add_argument("--watch", action="store_true", help="Repeat every interval") + p.add_argument("--pane", action="store_true", + help="Launch --watch in a new tmux pane") p.add_argument("-i", "--interval", type=int, default=30, help="Watch interval in seconds (default: 30)") args = p.parse_args() + if args.pane: + spawn_tmux_pane(args.interval) + return 0 + discover() try: From ddbcd1a97c24e11e7a441aafe0013afe0b55ab9d Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 00:48:37 +0000 Subject: [PATCH 45/62] fix: migration playbook stops docker first, skips stale data copy - biscayne-migrate-storage.yml: stop docker to release bind mounts before destroying zvol, no data copy (stale, fresh snapshot needed), handle partially-migrated state, restart docker at end - biscayne-upgrade-zfs.yml: use add-apt-repository CLI (module times out), fix libzfs package name (libzfs4linux not 5), allow apt update warnings from stale influxdata GPG key Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-migrate-storage.yml | 219 ++++++++++--------------- playbooks/biscayne-upgrade-zfs.yml | 14 +- 2 files changed, 91 insertions(+), 142 deletions(-) diff --git a/playbooks/biscayne-migrate-storage.yml b/playbooks/biscayne-migrate-storage.yml index 995b0001..1c217f5f 100644 --- a/playbooks/biscayne-migrate-storage.yml +++ b/playbooks/biscayne-migrate-storage.yml @@ -3,81 +3,39 @@ # # Background: # Biscayne used a ZFS zvol formatted as XFS to work around io_uring/ZFS -# deadlocks. The root cause is now handled by graceful shutdown via admin -# RPC (agave-validator exit --force), so the zvol/XFS layer is unnecessary. +# deadlocks. With ZFS upgraded to 2.2.9 (io_uring fix) and graceful +# shutdown via admin RPC, the zvol/XFS layer is unnecessary overhead. # # What this does: -# 1. Asserts the validator is scaled to 0 (does NOT scale it — that's -# the operator's job via biscayne-stop.yml) -# 2. Creates a child ZFS dataset biscayne/DATA/srv/kind/solana -# 3. Copies data from the zvol to the new dataset (rsync) -# 4. Updates fstab (removes zvol line, fixes tmpfs dependency) -# 5. Destroys the zvol after verification -# -# Prerequisites: -# - Validator MUST be stopped (scale 0, no agave processes) -# - Run biscayne-stop.yml first +# 1. Stops docker to release all bind mounts referencing /srv/kind +# 2. Unmounts the zvol and any leftover temp mounts +# 3. Creates a ZFS dataset at biscayne/DATA/srv/kind/solana (if needed) +# 4. Destroys the zvol (no data copy — stale data, fresh snapshot on restart) +# 5. Updates fstab, mounts ramdisk, creates directories +# 6. Restarts docker (kind cluster comes back) # # Usage: -# ansible-playbook -i inventory/ playbooks/biscayne-migrate-storage.yml +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-migrate-storage.yml # -# After migration, run biscayne-prepare-agave.yml to update its checks, -# then biscayne-start.yml to bring the validator back up. +# After migration, rebuild the container image with biscayne-sync-tools.yml +# --tags build-container, then start the validator with biscayne-recover.yml. # - name: Migrate storage from zvol/XFS to ZFS dataset hosts: all gather_facts: false become: true - environment: - KUBECONFIG: /home/rix/.kube/config vars: kind_cluster: laconic-70ce4c4b47e23b85 - k8s_namespace: "laconic-{{ kind_cluster }}" - deployment_name: "{{ kind_cluster }}-deployment" zvol_device: /dev/zvol/biscayne/DATA/volumes/solana zvol_dataset: biscayne/DATA/volumes/solana new_dataset: biscayne/DATA/srv/kind/solana kind_solana_dir: /srv/kind/solana ramdisk_mount: /srv/kind/solana/ramdisk ramdisk_size: 1024G - # Temporary mount for zvol during data copy zvol_tmp_mount: /mnt/zvol-migration-tmp tasks: - # ---- preconditions -------------------------------------------------------- - - name: Check deployment replica count - ansible.builtin.command: > - kubectl get deployment {{ deployment_name }} - -n {{ k8s_namespace }} - -o jsonpath='{.spec.replicas}' - register: current_replicas - failed_when: false - changed_when: false - - - name: Fail if validator is running - ansible.builtin.fail: - msg: >- - Validator must be scaled to 0 before migration. - Current replicas: {{ current_replicas.stdout | default('unknown') }}. - Run biscayne-stop.yml first. - when: current_replicas.stdout | default('0') | int > 0 - - - name: Verify no agave processes in kind node - ansible.builtin.command: > - docker exec {{ kind_cluster }}-control-plane - pgrep -c agave-validator - register: agave_procs - failed_when: false - changed_when: false - - - name: Fail if agave still running - ansible.builtin.fail: - msg: >- - agave-validator process still running inside kind node. - Cannot migrate while validator is active. - when: agave_procs.rc == 0 - - # ---- check current state -------------------------------------------------- + # ---- assess current state --------------------------------------------------- - name: Check if zvol device exists ansible.builtin.stat: path: "{{ zvol_device }}" @@ -97,26 +55,59 @@ failed_when: false changed_when: false + - name: Check if temp zvol mount exists + ansible.builtin.shell: + cmd: set -o pipefail && findmnt -n {{ zvol_tmp_mount }} + executable: /bin/bash + register: tmp_mount_exists + failed_when: false + changed_when: false + - name: Report current state ansible.builtin.debug: msg: zvol_exists: "{{ zvol_exists.stat.exists | default(false) }}" dataset_exists: "{{ dataset_exists.rc == 0 }}" current_fstype: "{{ current_fstype.stdout | default('none') }}" + temp_mount: "{{ tmp_mount_exists.rc == 0 }}" - # ---- skip if already migrated --------------------------------------------- - - name: End play if already on ZFS dataset + - name: End play if already migrated ansible.builtin.meta: end_play when: - dataset_exists.rc == 0 - current_fstype.stdout | default('') == 'zfs' - not (zvol_exists.stat.exists | default(false)) - # ---- step 1: unmount ramdisk and zvol ------------------------------------ - - name: Unmount ramdisk + # ---- stop docker to release all /srv/kind references ----------------------- + - name: Stop docker (releases kind bind mounts to /srv/kind) + ansible.builtin.systemd: + name: docker + state: stopped + register: docker_stopped + changed_when: docker_stopped.changed + + - name: Stop docker socket + ansible.builtin.systemd: + name: docker.socket + state: stopped + + # ---- unmount everything referencing the zvol -------------------------------- + - name: Unmount temp zvol mount (leftover from interrupted migration) + ansible.posix.mount: + path: "{{ zvol_tmp_mount }}" + state: unmounted + when: tmp_mount_exists.rc == 0 + + - name: Remove temp mount directory + ansible.builtin.file: + path: "{{ zvol_tmp_mount }}" + state: absent + + - name: Unmount ramdisk if mounted ansible.posix.mount: path: "{{ ramdisk_mount }}" state: unmounted + failed_when: false - name: Unmount zvol from {{ kind_solana_dir }} ansible.posix.mount: @@ -124,14 +115,14 @@ state: unmounted when: current_fstype.stdout | default('') == 'xfs' - # ---- step 2: create ZFS dataset ----------------------------------------- + # ---- create ZFS dataset if needed ------------------------------------------ - name: Create ZFS dataset {{ new_dataset }} ansible.builtin.command: > zfs create -o mountpoint={{ kind_solana_dir }} {{ new_dataset }} changed_when: true when: dataset_exists.rc != 0 - - name: Mount ZFS dataset if it already existed + - name: Mount ZFS dataset if it already existed but isn't mounted ansible.builtin.command: zfs mount {{ new_dataset }} changed_when: true failed_when: false @@ -143,78 +134,48 @@ executable: /bin/bash changed_when: false - # ---- step 3: copy data from zvol ---------------------------------------- - - name: Create temporary mount point for zvol - ansible.builtin.file: - path: "{{ zvol_tmp_mount }}" - state: directory - mode: "0755" - when: zvol_exists.stat.exists | default(false) - - - name: Mount zvol at temporary location - ansible.posix.mount: - path: "{{ zvol_tmp_mount }}" - src: "{{ zvol_device }}" - fstype: xfs - state: mounted - when: zvol_exists.stat.exists | default(false) - - - name: Copy data from zvol to ZFS dataset # noqa: command-instead-of-module - ansible.builtin.command: > - rsync -a --info=progress2 - --exclude='ramdisk/' - {{ zvol_tmp_mount }}/ - {{ kind_solana_dir }}/ + # ---- destroy zvol ----------------------------------------------------------- + - name: Destroy zvol {{ zvol_dataset }} + ansible.builtin.command: zfs destroy -r {{ zvol_dataset }} changed_when: true when: zvol_exists.stat.exists | default(false) - # ---- step 4: verify data integrity -------------------------------------- - - name: Check key directories exist on new dataset - ansible.builtin.stat: + # ---- create directory structure on new dataset ------------------------------ + - name: Create solana data directories + ansible.builtin.file: path: "{{ kind_solana_dir }}/{{ item }}" - register: dir_checks + state: directory + mode: "0755" loop: - ledger - snapshots - log + - ramdisk - - name: Report directory verification - ansible.builtin.debug: - msg: "{{ item.item }}: {{ 'exists' if item.stat.exists else 'MISSING' }}" - loop: "{{ dir_checks.results }}" - loop_control: - label: "{{ item.item }}" - - # ---- step 5: update fstab ------------------------------------------------ + # ---- update fstab ----------------------------------------------------------- - name: Remove zvol fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+zvol\S+\s+{{ kind_solana_dir }}\s' state: absent - register: fstab_zvol_removed - # Also match any XFS entry for kind_solana_dir (non-zvol form) - name: Remove any XFS fstab entry for {{ kind_solana_dir }} ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}\s+xfs' state: absent - # ZFS datasets are mounted by zfs-mount.service automatically. - # The tmpfs ramdisk depends on the solana dir existing, which ZFS - # guarantees via zfs-mount.service. Update the systemd dependency. - name: Update tmpfs ramdisk fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ ramdisk_mount }}\s' line: "tmpfs {{ ramdisk_mount }} tmpfs nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }},nofail,x-systemd.requires=zfs-mount.service 0 0" - - name: Reload systemd # noqa: no-handler + - name: Reload systemd ansible.builtin.systemd: daemon_reload: true - when: fstab_zvol_removed.changed - # ---- step 6: mount ramdisk ----------------------------------------------- + # ---- mount ramdisk ---------------------------------------------------------- - name: Mount tmpfs ramdisk ansible.posix.mount: path: "{{ ramdisk_mount }}" @@ -223,54 +184,40 @@ opts: "nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }}" state: mounted - - name: Ensure accounts directory + - name: Ensure accounts directory on ramdisk ansible.builtin.file: path: "{{ ramdisk_mount }}/accounts" state: directory - owner: solana - group: solana mode: "0755" - # ---- step 7: clean up zvol ----------------------------------------------- - - name: Unmount zvol from temporary location - ansible.posix.mount: - path: "{{ zvol_tmp_mount }}" - state: unmounted - when: zvol_exists.stat.exists | default(false) + # ---- restart docker (brings kind back) ------------------------------------- + - name: Start docker + ansible.builtin.systemd: + name: docker + state: started - - name: Remove temporary mount point - ansible.builtin.file: - path: "{{ zvol_tmp_mount }}" - state: absent - - - name: Destroy zvol {{ zvol_dataset }} - ansible.builtin.command: zfs destroy {{ zvol_dataset }} - changed_when: true - when: zvol_exists.stat.exists | default(false) - - # ---- step 8: ensure shared propagation for docker ------------------------ - - name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module - ansible.builtin.command: - cmd: mount --make-shared {{ item }} - loop: - - "{{ kind_solana_dir }}" - - "{{ ramdisk_mount }}" + - name: Wait for kind node container + ansible.builtin.command: docker inspect -f '{{ '{{' }}.State.Running{{ '}}' }}' {{ kind_cluster }}-control-plane + register: kind_running changed_when: false + retries: 12 + delay: 5 + until: kind_running.stdout == 'true' - # ---- verification --------------------------------------------------------- + # ---- verification ----------------------------------------------------------- - name: Verify solana dir is ZFS ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q zfs + cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q zfs executable: /bin/bash changed_when: false - name: Verify ramdisk is tmpfs ansible.builtin.shell: - cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs + cmd: set -o pipefail && findmnt -n -o FSTYPE {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash changed_when: false - - name: Verify zvol is destroyed + - name: Verify zvol is gone ansible.builtin.command: zfs list -H -o name {{ zvol_dataset }} register: zvol_gone failed_when: zvol_gone.rc == 0 @@ -280,7 +227,9 @@ ansible.builtin.debug: msg: >- Storage migration complete. - {{ kind_solana_dir }} is now a ZFS dataset ({{ new_dataset }}). + {{ kind_solana_dir }} is now ZFS dataset {{ new_dataset }}. Ramdisk at {{ ramdisk_mount }} (tmpfs, {{ ramdisk_size }}). - zvol {{ zvol_dataset }} destroyed. - Next: update biscayne-prepare-agave.yml, then start the validator. + zvol {{ zvol_dataset }} destroyed. Data intentionally not copied + (stale) — download fresh snapshot on next start. + Next: biscayne-sync-tools.yml --tags build-container, then + biscayne-recover.yml. diff --git a/playbooks/biscayne-upgrade-zfs.yml b/playbooks/biscayne-upgrade-zfs.yml index a1b38c9d..5bd70567 100644 --- a/playbooks/biscayne-upgrade-zfs.yml +++ b/playbooks/biscayne-upgrade-zfs.yml @@ -35,7 +35,7 @@ zfs_packages: - zfsutils-linux - zfs-dkms - - libzfs5linux + - libzfs4linux tasks: # ---- pre-flight checks ---------------------------------------------------- @@ -70,11 +70,12 @@ - never # ---- add PPA --------------------------------------------------------------- - - name: Add arter97/zfs-lts PPA - ansible.builtin.apt_repository: - repo: "{{ ppa_name }}" - state: present - update_cache: true + # Use add-apt-repository CLI instead of apt_repository module — + # the module's Launchpad API fetch times out on biscayne. + - name: Add arter97/zfs-lts PPA # noqa: command-instead-of-module + ansible.builtin.command: add-apt-repository -y {{ ppa_name }} + register: ppa_add + changed_when: "'Added' in ppa_add.stdout or 'added' in ppa_add.stderr" tags: [upgrade] # ---- upgrade ZFS packages -------------------------------------------------- @@ -82,7 +83,6 @@ ansible.builtin.apt: name: "{{ zfs_packages }}" state: latest # noqa: package-latest - update_cache: true register: zfs_upgrade tags: [upgrade] From e59796870813b9d5e142dfca41ba132226ebe332 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 00:57:36 +0000 Subject: [PATCH 46/62] fix: recovery playbook fixes grafana PV ownership before scale-up laconic-so creates PV hostPath dirs as root. Grafana runs as UID 472 and crashes on startup because it can't write to /var/lib/grafana. Fix ownership inside the kind node before scaling the deployment up. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-recover.yml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/playbooks/biscayne-recover.yml b/playbooks/biscayne-recover.yml index ea0d9b3e..b2e533d3 100644 --- a/playbooks/biscayne-recover.yml +++ b/playbooks/biscayne-recover.yml @@ -11,7 +11,8 @@ # 3. Wipe accounts ramdisk # 4. Clean old snapshots # 5. Ensure terminationGracePeriodSeconds is 300 (for graceful shutdown) -# 6. Scale to 1 — container entrypoint downloads snapshot + starts validator +# 6. Fix PV permissions (grafana runs as UID 472, laconic-so creates as root) +# 7. Scale to 1 — container entrypoint downloads snapshot + starts validator # # The playbook exits after step 5. The container handles snapshot download # (60+ min) and validator startup autonomously. Monitor with: @@ -107,7 +108,16 @@ register: patch_result changed_when: "'no change' not in patch_result.stdout" - # ---- step 6: scale to 1 — entrypoint handles snapshot download ------------ + # ---- step 6: fix PV permissions --------------------------------------------- + # laconic-so creates PV hostPath dirs as root. Grafana runs as UID 472 and + # can't write to its data dir. Fix ownership inside the kind node. + - name: Fix grafana PV ownership in kind node + ansible.builtin.command: > + docker exec {{ kind_cluster }}-control-plane + chown 472:472 /tmp/grafana-data + changed_when: true + + # ---- step 7: scale to 1 — entrypoint handles snapshot download ------------ # The container's entrypoint.py checks snapshot freshness, cleans stale # snapshots, downloads fresh ones (with rolling incremental convergence), # then starts the validator. No host-side download needed. From cd36bfe5eebf9a7b0ce559fc27b7ebfa9bdaa87c Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 01:00:36 +0000 Subject: [PATCH 47/62] fix: check-status.py smooth in-place redraw, remove comment bars - Overwrite lines in place instead of clear+redraw (no flicker) - Pad lines to terminal width to clear stale characters - Blank leftover rows when output shrinks between frames - Hide cursor during watch mode - Remove section comment bars - Replace unicode checkmarks with +/x Co-Authored-By: Claude Opus 4.6 --- scripts/check-status.py | 104 ++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/scripts/check-status.py b/scripts/check-status.py index b3974392..a7d22157 100755 --- a/scripts/check-status.py +++ b/scripts/check-status.py @@ -19,13 +19,12 @@ from __future__ import annotations import argparse import json import os +import shutil import subprocess import sys import time import urllib.request -# -- Config ------------------------------------------------------------------- - SSH_HOST = "biscayne.vaasl.io" KUBECONFIG = "/home/rix/.kube/config" DEPLOYMENT_DIR = "/srv/deployments/agave" @@ -33,7 +32,6 @@ SNAPSHOT_DIR = "/srv/kind/solana/snapshots" RAMDISK = "/srv/kind/solana/ramdisk" MAINNET_RPC = "https://api.mainnet-beta.solana.com" -# Derived from deployment.yml on first connect CLUSTER_ID: str = "" NAMESPACE: str = "" DEPLOYMENT: str = "" @@ -41,9 +39,6 @@ POD_LABEL: str = "" KIND_CONTAINER: str = "" -# -- Discovery ---------------------------------------------------------------- - - def discover() -> None: """Read cluster-id from deployment.yml and derive all identifiers.""" global CLUSTER_ID, NAMESPACE, DEPLOYMENT, POD_LABEL, KIND_CONTAINER @@ -61,9 +56,6 @@ def discover() -> None: KIND_CONTAINER = f"{CLUSTER_ID}-control-plane" -# -- Helpers ------------------------------------------------------------------ - - def ssh(cmd: str, timeout: int = 15) -> tuple[int, str]: """Run a command on biscayne via SSH. Returns (rc, stdout).""" r = subprocess.run( @@ -96,9 +88,6 @@ def get_mainnet_slot() -> int | None: return None -# -- Checks ------------------------------------------------------------------- - - def check_pod() -> dict: """Get pod phase and container statuses.""" rc, out = kubectl( @@ -187,38 +176,32 @@ def check_ramdisk() -> str: return out -# -- Display ------------------------------------------------------------------ - - prev_slot: int | None = None prev_time: float | None = None -def display(iteration: int = 0) -> None: - """Run all checks and print status.""" +def render() -> list[str]: + """Gather all data and return lines to display.""" global prev_slot, prev_time now = time.time() ts = time.strftime("%H:%M:%S") + lines: list[str] = [] - # Gather data pod = check_pod() mainnet = get_mainnet_slot() snapshots = check_snapshots() ramdisk = check_ramdisk() - # Clear screen and home cursor for clean redraw in watch mode - if iteration > 0: - print("\033[2J\033[H", end="") - - print(f"\n Biscayne Agave Status — {ts}\n") + lines.append(f" Biscayne Agave Status {ts}") + lines.append("") # Pod - print(f"\n Pod: {pod['phase']}") + lines.append(f" Pod: {pod['phase']}") for name, cs in pod["containers"].items(): - ready = "✓" if cs["ready"] else "✗" + ready = "+" if cs["ready"] else "x" restarts = f" (restarts: {cs['restarts']})" if cs["restarts"] > 0 else "" - print(f" {ready} {name}: {cs['state']}{restarts}") + lines.append(f" {ready} {name}: {cs['state']}{restarts}") # Validator slot validator_slot = None @@ -227,6 +210,7 @@ def display(iteration: int = 0) -> None: if agave.get("ready"): validator_slot = check_validator_slot() + lines.append("") if validator_slot is not None and mainnet is not None: gap = mainnet - validator_slot rate = "" @@ -234,7 +218,6 @@ def display(iteration: int = 0) -> None: dt = now - prev_time if dt > 0: slots_gained = validator_slot - prev_slot - # Net rate = our replay rate minus chain production net_rate = slots_gained / dt if net_rate > 0: eta_sec = gap / net_rate @@ -244,38 +227,58 @@ def display(iteration: int = 0) -> None: rate = f" net {net_rate:+.1f} slots/s (falling behind)" prev_slot = validator_slot prev_time = now - print(f"\n Validator: slot {validator_slot:,}") - print(f" Mainnet: slot {mainnet:,}") - print(f" Gap: {gap:,} slots{rate}") + lines.append(f" Validator: slot {validator_slot:,}") + lines.append(f" Mainnet: slot {mainnet:,}") + lines.append(f" Gap: {gap:,} slots{rate}") elif mainnet is not None: - print(f"\n Validator: not responding (downloading or starting)") - print(f" Mainnet: slot {mainnet:,}") + lines.append(" Validator: not responding (downloading or starting)") + lines.append(f" Mainnet: slot {mainnet:,}") else: - print(f"\n Mainnet: unreachable") + lines.append(" Mainnet: unreachable") # Snapshots + lines.append("") if snapshots: - print(f"\n Snapshots:") + lines.append(" Snapshots:") for s in snapshots: - print(f" {s['size']:>6s} {s['name']}") + lines.append(f" {s['size']:>6s} {s['name']}") else: - print(f"\n Snapshots: none on disk") + lines.append(" Snapshots: none on disk") # Ramdisk - print(f" Ramdisk: {ramdisk}") + lines.append(f" Ramdisk: {ramdisk}") # Entrypoint logs (only if validator not yet responding) if validator_slot is None and pod["phase"] in ("Running", "Pending"): logs = check_entrypoint_logs(10) if logs and logs != "(no logs)": - print(f"\n Entrypoint logs (last 10 lines):") + lines.append("") + lines.append(" Entrypoint logs (last 10 lines):") for line in logs.splitlines(): - print(f" {line}") + lines.append(f" {line}") - print() + return lines -# -- Main --------------------------------------------------------------------- +def display(watch: bool, prev_lines: int) -> int: + """Render status and paint to terminal. Returns number of lines written.""" + output = render() + cols = shutil.get_terminal_size().columns + + if watch: + # Move cursor to top-left without clearing — overwrite in place + sys.stdout.write("\033[H") + + for line in output: + # Pad to terminal width to overwrite stale characters from prior frame + sys.stdout.write(line.ljust(cols)[:cols] + "\n") + + # If previous frame had more lines, blank the leftover rows + for _ in range(max(0, prev_lines - len(output))): + sys.stdout.write(" " * cols + "\n") + + sys.stdout.flush() + return len(output) def spawn_tmux_pane(interval: int) -> None: @@ -304,17 +307,26 @@ def main() -> int: discover() + if args.watch: + # Hide cursor, clear screen once at start + sys.stdout.write("\033[?25l\033[2J\033[H") + sys.stdout.flush() + try: + prev_lines = 0 if args.watch: - i = 0 while True: - display(i) - i += 1 + prev_lines = display(watch=True, prev_lines=prev_lines) time.sleep(args.interval) else: - display() + display(watch=False, prev_lines=0) except KeyboardInterrupt: - print() + pass + finally: + if args.watch: + # Show cursor again + sys.stdout.write("\033[?25l\n") + sys.stdout.flush() return 0 From 3bf87a2e9bfdee8f9d4ed112a6ec4b1ba104f996 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 05:53:56 +0000 Subject: [PATCH 48/62] =?UTF-8?q?feat:=20snapshot=20leapfrog=20=E2=80=94?= =?UTF-8?q?=20auto-recovery=20when=20validator=20falls=20behind?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entrypoint changes: - Always require full + incremental before starting (retry until found) - Check incremental freshness against convergence threshold (500 slots) - Gap monitor thread: if validator falls >5000 slots behind for 3 consecutive checks, graceful stop + restart with fresh incremental - cmd_serve is now a loop: download → run → monitor → leapfrog → repeat - --no-snapshot-fetch moved to common args (both RPC and validator modes) - --maximum-full-snapshots-to-retain default 1 (validator deletes downloaded full after generating its own) - SNAPSHOT_MAX_AGE_SLOTS default 100000 (one full snapshot generation) snapshot_download.py refactoring: - Extract _discover_and_benchmark() and _rolling_incremental_download() as shared helpers - Restore download_incremental_for_slot() using shared helpers (downloads only an incremental for an existing full snapshot) - download_best_snapshot() uses shared helpers, downloads full then incremental as separate operations The leapfrog cycle: validator generates full snapshots at standard 100k block height intervals (same slots as the rest of the network). When the gap monitor triggers, the entrypoint loops back to maybe_download_snapshot which finds the validator's local full, downloads a fresh network incremental (generated every ~40s, converges within the ~11hr full generation window), and restarts. Co-Authored-By: Claude Opus 4.6 --- scripts/agave-container/entrypoint.py | 225 ++++++++--- scripts/agave-container/snapshot_download.py | 402 ++++++++++++------- 2 files changed, 415 insertions(+), 212 deletions(-) diff --git a/scripts/agave-container/entrypoint.py b/scripts/agave-container/entrypoint.py index 20961624..2b7324c3 100644 --- a/scripts/agave-container/entrypoint.py +++ b/scripts/agave-container/entrypoint.py @@ -43,10 +43,13 @@ SNAPSHOTS_DIR = "/data/snapshots" LOG_DIR = "/data/log" IDENTITY_FILE = f"{CONFIG_DIR}/validator-identity.json" -# Snapshot filename pattern +# Snapshot filename patterns FULL_SNAP_RE: re.Pattern[str] = re.compile( r"^snapshot-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" ) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" +) MAINNET_RPC = "https://api.mainnet-beta.solana.com" @@ -124,51 +127,89 @@ def clean_snapshots(snapshots_dir: str) -> None: entry.unlink(missing_ok=True) +def get_incremental_slot(snapshots_dir: str, full_slot: int | None) -> int | None: + """Get the highest incremental snapshot slot matching the full's base slot.""" + if full_slot is None: + return None + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return None + best: int | None = None + for entry in snap_path.iterdir(): + m = INCR_SNAP_RE.match(entry.name) + if m and int(m.group(1)) == full_slot: + slot = int(m.group(2)) + if best is None or slot > best: + best = slot + return best + + def maybe_download_snapshot(snapshots_dir: str) -> None: - """Check snapshot freshness and download if needed. + """Ensure full + incremental snapshots exist before starting. + + The validator should always start from a full + incremental pair to + minimize replay time. If either is missing or the full is too old, + download fresh ones via download_best_snapshot (which does rolling + incremental convergence after downloading the full). Controlled by env vars: SNAPSHOT_AUTO_DOWNLOAD (default: true) — enable/disable - SNAPSHOT_MAX_AGE_SLOTS (default: 20000) — staleness threshold + SNAPSHOT_MAX_AGE_SLOTS (default: 100000) — full snapshot staleness threshold + (one full snapshot generation, ~11 hours) """ if not env_bool("SNAPSHOT_AUTO_DOWNLOAD", default=True): log.info("Snapshot auto-download disabled") return - max_age = int(env("SNAPSHOT_MAX_AGE_SLOTS", "20000")) + max_age = int(env("SNAPSHOT_MAX_AGE_SLOTS", "100000")) - # Get mainnet current slot mainnet_slot = rpc_get_slot(MAINNET_RPC) if mainnet_slot is None: log.warning("Cannot reach mainnet RPC — skipping snapshot check") return - # Check local snapshot - local_slot = get_local_snapshot_slot(snapshots_dir) - if local_slot is not None: - age = mainnet_slot - local_slot - log.info("Local snapshot at slot %d (mainnet: %d, age: %d slots)", - local_slot, mainnet_slot, age) - if age <= max_age: - log.info("Snapshot is fresh enough (age %d <= %d), skipping download", age, max_age) - return - log.info("Snapshot is stale (age %d > %d), downloading fresh", age, max_age) - else: - log.info("No local snapshot found, downloading") - - # Clean old snapshots before downloading - clean_snapshots(snapshots_dir) - - # Import and call snapshot download - # snapshot_download.py is installed alongside this file in /usr/local/bin/ script_dir = Path(__file__).resolve().parent sys.path.insert(0, str(script_dir)) - from snapshot_download import download_best_snapshot + from snapshot_download import download_best_snapshot, download_incremental_for_slot convergence = int(env("SNAPSHOT_CONVERGENCE_SLOTS", "500")) - ok = download_best_snapshot(snapshots_dir, convergence_slots=convergence) - if not ok: - log.error("Snapshot download failed — starting without fresh snapshot") + retry_delay = int(env("SNAPSHOT_RETRY_DELAY", "60")) + + # Check local full snapshot + local_slot = get_local_snapshot_slot(snapshots_dir) + have_fresh_full = (local_slot is not None + and (mainnet_slot - local_slot) <= max_age) + + if have_fresh_full: + assert local_slot is not None + inc_slot = get_incremental_slot(snapshots_dir, local_slot) + if inc_slot is not None: + inc_gap = mainnet_slot - inc_slot + if inc_gap <= convergence: + log.info("Full (slot %d) + incremental (slot %d, gap %d) " + "within convergence, starting", + local_slot, inc_slot, inc_gap) + return + log.info("Incremental too stale (slot %d, gap %d > %d)", + inc_slot, inc_gap, convergence) + # Fresh full, need a fresh incremental + log.info("Downloading incremental for full at slot %d", local_slot) + while True: + if download_incremental_for_slot(snapshots_dir, local_slot, + convergence_slots=convergence): + return + log.warning("Incremental download failed — retrying in %ds", + retry_delay) + time.sleep(retry_delay) + + # No full or full too old — download both + log.info("Downloading full + incremental") + clean_snapshots(snapshots_dir) + while True: + if download_best_snapshot(snapshots_dir, convergence_slots=convergence): + return + log.warning("Snapshot download failed — retrying in %ds", retry_delay) + time.sleep(retry_delay) # -- Directory and identity setup ---------------------------------------------- @@ -230,6 +271,7 @@ def build_common_args() -> list[str]: "--no-os-network-limits-test", "--wal-recovery-mode", "skip_any_corrupted_record", "--limit-ledger-size", env("LIMIT_LEDGER_SIZE", "50000000"), + "--no-snapshot-fetch", # entrypoint handles snapshot download ] # Snapshot generation @@ -238,7 +280,7 @@ def build_common_args() -> list[str]: else: args += [ "--full-snapshot-interval-slots", env("SNAPSHOT_INTERVAL_SLOTS", "100000"), - "--maximum-full-snapshots-to-retain", env("MAXIMUM_SNAPSHOTS_TO_RETAIN", "5"), + "--maximum-full-snapshots-to-retain", env("MAXIMUM_SNAPSHOTS_TO_RETAIN", "1"), ] if env("NO_INCREMENTAL_SNAPSHOTS") != "true": args += ["--maximum-incremental-snapshots-to-retain", "2"] @@ -309,7 +351,6 @@ def build_rpc_args() -> list[str]: "--rpc-pubsub-enable-block-subscription", "--enable-extended-tx-metadata-storage", "--no-wait-for-vote-to-start-leader", - "--no-snapshot-fetch", ] # Public vs private RPC @@ -379,7 +420,7 @@ def append_extra_args(args: list[str]) -> list[str]: GRACEFUL_EXIT_TIMEOUT = 270 -def graceful_exit(child: subprocess.Popen[bytes]) -> None: +def graceful_exit(child: subprocess.Popen[bytes], reason: str = "SIGTERM") -> None: """Request graceful shutdown via the admin RPC Unix socket. Runs ``agave-validator exit --force --ledger /data/ledger`` which connects @@ -390,7 +431,7 @@ def graceful_exit(child: subprocess.Popen[bytes]) -> None: If the admin RPC exit fails or the child doesn't exit within the timeout, falls back to SIGTERM then SIGKILL. """ - log.info("SIGTERM received — requesting graceful exit via admin RPC") + log.info("%s — requesting graceful exit via admin RPC", reason) try: result = subprocess.run( ["agave-validator", "exit", "--force", "--ledger", LEDGER_DIR], @@ -437,16 +478,69 @@ def graceful_exit(child: subprocess.Popen[bytes]) -> None: # -- Serve subcommand --------------------------------------------------------- -def cmd_serve() -> None: - """Main serve flow: snapshot check, setup, run agave-validator as child. +def _gap_monitor( + child: subprocess.Popen[bytes], + leapfrog: threading.Event, + shutting_down: threading.Event, +) -> None: + """Background thread: poll slot gap and trigger leapfrog if too far behind. - Python stays as PID 1 and traps SIGTERM to perform graceful shutdown - via the admin RPC Unix socket. + Waits for a grace period (SNAPSHOT_MONITOR_GRACE, default 600s) before + monitoring — the validator needs time to extract snapshots and catch up. + Then polls every SNAPSHOT_MONITOR_INTERVAL (default 30s). If the gap + exceeds SNAPSHOT_LEAPFROG_SLOTS (default 5000) for SNAPSHOT_LEAPFROG_CHECKS + (default 3) consecutive checks, triggers graceful shutdown and sets the + leapfrog event so cmd_serve loops back to download a fresh incremental. + """ + threshold = int(env("SNAPSHOT_LEAPFROG_SLOTS", "5000")) + required_checks = int(env("SNAPSHOT_LEAPFROG_CHECKS", "3")) + interval = int(env("SNAPSHOT_MONITOR_INTERVAL", "30")) + grace = int(env("SNAPSHOT_MONITOR_GRACE", "600")) + rpc_port = env("RPC_PORT", "8899") + local_url = f"http://127.0.0.1:{rpc_port}" + + # Grace period — don't monitor during initial catch-up + if shutting_down.wait(grace): + return + + consecutive = 0 + while not shutting_down.is_set(): + local_slot = rpc_get_slot(local_url, timeout=5) + mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) + + if local_slot is not None and mainnet_slot is not None: + gap = mainnet_slot - local_slot + if gap > threshold: + consecutive += 1 + log.warning("Gap %d > %d (%d/%d consecutive)", + gap, threshold, consecutive, required_checks) + if consecutive >= required_checks: + log.warning("Leapfrog triggered: gap %d", gap) + leapfrog.set() + graceful_exit(child, reason="Leapfrog") + return + else: + if consecutive > 0: + log.info("Gap %d within threshold, resetting counter", gap) + consecutive = 0 + + shutting_down.wait(interval) + + +def cmd_serve() -> None: + """Main serve flow: snapshot download, run validator, monitor gap, leapfrog. + + Python stays as PID 1. On each iteration: + 1. Download full + incremental snapshots (if needed) + 2. Start agave-validator as child process + 3. Monitor slot gap in background thread + 4. If gap exceeds threshold → graceful stop → loop back to step 1 + 5. If SIGTERM → graceful stop → exit + 6. If validator crashes → exit with its return code """ mode = env("AGAVE_MODE", "test") log.info("AGAVE_MODE=%s", mode) - # Test mode dispatches to start-test.sh if mode == "test": os.execvp("start-test.sh", ["start-test.sh"]) @@ -454,47 +548,66 @@ def cmd_serve() -> None: log.error("Unknown AGAVE_MODE: %s (valid: test, rpc, validator)", mode) sys.exit(1) - # Ensure directories + # One-time setup dirs = [CONFIG_DIR, LEDGER_DIR, ACCOUNTS_DIR, SNAPSHOTS_DIR] if mode == "rpc": dirs.append(LOG_DIR) ensure_dirs(*dirs) - # Snapshot freshness check and auto-download - maybe_download_snapshot(SNAPSHOTS_DIR) + if not env_bool("SKIP_IP_ECHO_PREFLIGHT"): + script_dir = Path(__file__).resolve().parent + sys.path.insert(0, str(script_dir)) + from ip_echo_preflight import main as ip_echo_main + if ip_echo_main() != 0: + sys.exit(1) - # Identity setup if mode == "rpc": ensure_identity_rpc() print_identity() - # Build args if mode == "rpc": args = build_rpc_args() else: args = build_validator_args() - args = append_extra_args(args) - # Write startup timestamp for probe grace period - Path("/tmp/entrypoint-start").write_text(str(time.time())) + # Main loop: download → run → monitor → leapfrog if needed + while True: + maybe_download_snapshot(SNAPSHOTS_DIR) - log.info("Starting agave-validator with %d arguments", len(args)) - child = subprocess.Popen(["agave-validator"] + args) + Path("/tmp/entrypoint-start").write_text(str(time.time())) + log.info("Starting agave-validator with %d arguments", len(args)) + child = subprocess.Popen(["agave-validator"] + args) - # Forward SIGUSR1 to child (log rotation) - signal.signal(signal.SIGUSR1, lambda _sig, _frame: child.send_signal(signal.SIGUSR1)) + shutting_down = threading.Event() + leapfrog = threading.Event() - # Trap SIGTERM — run graceful_exit in a thread so the signal handler returns - # immediately and child.wait() in the main thread can observe the exit. - def _on_sigterm(_sig: int, _frame: object) -> None: - threading.Thread(target=graceful_exit, args=(child,), daemon=True).start() + signal.signal(signal.SIGUSR1, + lambda _sig, _frame: child.send_signal(signal.SIGUSR1)) - signal.signal(signal.SIGTERM, _on_sigterm) + def _on_sigterm(_sig: int, _frame: object) -> None: + shutting_down.set() + threading.Thread( + target=graceful_exit, args=(child,), daemon=True, + ).start() - # Wait for child — if it exits on its own (crash, normal exit), propagate code - child.wait() - sys.exit(child.returncode) + signal.signal(signal.SIGTERM, _on_sigterm) + + # Start gap monitor + monitor = threading.Thread( + target=_gap_monitor, + args=(child, leapfrog, shutting_down), + daemon=True, + ) + monitor.start() + + child.wait() + + if leapfrog.is_set(): + log.info("Leapfrog: restarting with fresh incremental") + continue + + sys.exit(child.returncode) # -- Probe subcommand --------------------------------------------------------- diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py index 146b7291..2af2b976 100644 --- a/scripts/agave-container/snapshot_download.py +++ b/scripts/agave-container/snapshot_download.py @@ -461,9 +461,214 @@ def download_aria2c( return True +# -- Shared helpers ------------------------------------------------------------ + + +def _discover_and_benchmark( + rpc_url: str, + current_slot: int, + *, + max_snapshot_age: int = 10000, + max_latency: float = 500, + threads: int = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, +) -> list[SnapshotSource]: + """Discover snapshot sources and benchmark download speed. + + Returns sources that meet the minimum speed requirement, sorted by speed. + """ + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=max_snapshot_age, + max_latency_ms=max_latency, + threads=threads, + version_filter=version_filter, + ) + if not sources: + return [] + + sources.sort(key=lambda s: s.latency_ms) + + log.info("Benchmarking download speed on top %d sources...", max_speed_checks) + fast_sources: list[SnapshotSource] = [] + checked: int = 0 + min_speed_bytes: int = min_download_speed * 1024 * 1024 + + for source in sources: + if checked >= max_speed_checks: + break + checked += 1 + + speed: float = measure_speed(source.rpc_address, measurement_time) + source.download_speed = speed + speed_mib: float = speed / (1024 ** 2) + + if speed < min_speed_bytes: + log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", + source.rpc_address, speed_mib, min_download_speed) + continue + + log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", + source.rpc_address, speed_mib, + source.latency_ms, source.slots_diff) + fast_sources.append(source) + + return fast_sources + + +def _rolling_incremental_download( + fast_sources: list[SnapshotSource], + full_snap_slot: int, + output_dir: str, + convergence_slots: int, + connections: int, + rpc_url: str, +) -> str | None: + """Download incrementals in a loop until converged. + + Probes fast_sources for incrementals matching full_snap_slot, downloads + the freshest one, then re-probes until the gap to head is within + convergence_slots. Returns the filename of the final incremental, + or None if no incremental was found. + """ + prev_inc_filename: str | None = None + loop_start: float = time.monotonic() + max_convergence_time: float = 1800.0 # 30 min wall-clock limit + + while True: + if time.monotonic() - loop_start > max_convergence_time: + if prev_inc_filename: + log.warning("Convergence timeout (%.0fs) — using %s", + max_convergence_time, prev_inc_filename) + else: + log.warning("Convergence timeout (%.0fs) — no incremental downloaded", + max_convergence_time) + break + + inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) + if inc_fn is None: + if prev_inc_filename is None: + log.error("No matching incremental found for base slot %d", + full_snap_slot) + else: + log.info("No newer incremental available, using %s", prev_inc_filename) + break + + m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) + assert m_inc is not None + inc_slot: int = int(m_inc.group(2)) + + head_slot: int | None = get_current_slot(rpc_url) + if head_slot is None: + log.warning("Cannot get current slot — downloading best available incremental") + gap: int = convergence_slots + 1 + else: + gap = head_slot - inc_slot + + if inc_fn == prev_inc_filename: + if gap <= convergence_slots: + log.info("Incremental %s already downloaded (gap %d slots, converged)", + inc_fn, gap) + break + log.info("No newer incremental yet (slot %d, gap %d slots), waiting...", + inc_slot, gap) + time.sleep(10) + continue + + if prev_inc_filename is not None: + old_path: Path = Path(output_dir) / prev_inc_filename + if old_path.exists(): + log.info("Removing superseded incremental %s", prev_inc_filename) + old_path.unlink() + + log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", + inc_fn, len(inc_mirrors), inc_slot, gap) + if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): + log.warning("Failed to download incremental %s — re-probing in 10s", inc_fn) + time.sleep(10) + continue + + prev_inc_filename = inc_fn + + if gap <= convergence_slots: + log.info("Converged: incremental slot %d is %d slots behind head", + inc_slot, gap) + break + + if head_slot is None: + break + + log.info("Not converged (gap %d > %d), re-probing in 10s...", + gap, convergence_slots) + time.sleep(10) + + return prev_inc_filename + + # -- Public API ---------------------------------------------------------------- +def download_incremental_for_slot( + output_dir: str, + full_snap_slot: int, + *, + cluster: str = "mainnet-beta", + rpc_url: str | None = None, + connections: int = 16, + threads: int = 500, + max_snapshot_age: int = 10000, + max_latency: float = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, + convergence_slots: int = 500, +) -> bool: + """Download an incremental snapshot for an existing full snapshot. + + Discovers sources, benchmarks speed, then runs the rolling incremental + download loop for the given full snapshot base slot. Does NOT download + a full snapshot. + + Returns True if an incremental was downloaded, False otherwise. + """ + resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] + + if not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return False + + log.info("Incremental download for base slot %d", full_snap_slot) + current_slot: int | None = get_current_slot(resolved_rpc) + if current_slot is None: + log.error("Cannot get current slot from %s", resolved_rpc) + return False + + fast_sources: list[SnapshotSource] = _discover_and_benchmark( + resolved_rpc, current_slot, + max_snapshot_age=max_snapshot_age, + max_latency=max_latency, + threads=threads, + min_download_speed=min_download_speed, + measurement_time=measurement_time, + max_speed_checks=max_speed_checks, + version_filter=version_filter, + ) + if not fast_sources: + log.error("No fast sources found") + return False + + os.makedirs(output_dir, exist_ok=True) + result: str | None = _rolling_incremental_download( + fast_sources, full_snap_slot, output_dir, + convergence_slots, connections, resolved_rpc, + ) + return result is not None + + def download_best_snapshot( output_dir: str, *, @@ -500,183 +705,68 @@ def download_best_snapshot( return False log.info("Current slot: %d", current_slot) - sources: list[SnapshotSource] = discover_sources( + fast_sources: list[SnapshotSource] = _discover_and_benchmark( resolved_rpc, current_slot, - max_age_slots=max_snapshot_age, - max_latency_ms=max_latency, + max_snapshot_age=max_snapshot_age, + max_latency=max_latency, threads=threads, + min_download_speed=min_download_speed, + measurement_time=measurement_time, + max_speed_checks=max_speed_checks, version_filter=version_filter, ) - if not sources: - log.error("No snapshot sources found") - return False - - # Sort by latency (lowest first) for speed benchmarking - sources.sort(key=lambda s: s.latency_ms) - - # Benchmark top candidates - log.info("Benchmarking download speed on top %d sources...", max_speed_checks) - fast_sources: list[SnapshotSource] = [] - checked: int = 0 - min_speed_bytes: int = min_download_speed * 1024 * 1024 - - for source in sources: - if checked >= max_speed_checks: - break - checked += 1 - - speed: float = measure_speed(source.rpc_address, measurement_time) - source.download_speed = speed - speed_mib: float = speed / (1024 ** 2) - - if speed < min_speed_bytes: - log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", - source.rpc_address, speed_mib, min_download_speed) - continue - - log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", - source.rpc_address, speed_mib, - source.latency_ms, source.slots_diff) - fast_sources.append(source) - if not fast_sources: - log.error("No source met minimum speed requirement (%d MiB/s)", - min_download_speed) + log.error("No fast sources found") return False - # Use the fastest source as primary, collect mirrors for each file + # Use the fastest source as primary, build full snapshot download plan best: SnapshotSource = fast_sources[0] - file_paths: list[str] = best.file_paths - if full_only: - file_paths = [fp for fp in file_paths - if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + full_paths: list[str] = [fp for fp in best.file_paths + if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + if not full_paths: + log.error("Best source has no full snapshot") + return False - # Build mirror URL lists - download_plan: list[tuple[str, list[str]]] = [] - for fp in file_paths: - filename: str = fp.rsplit("/", 1)[-1] - mirror_urls: list[str] = [f"http://{best.rpc_address}{fp}"] - for other in fast_sources[1:]: - for other_fp in other.file_paths: - if other_fp.rsplit("/", 1)[-1] == filename: - mirror_urls.append(f"http://{other.rpc_address}{other_fp}") - break - download_plan.append((filename, mirror_urls)) + # Build mirror URLs for the full snapshot + full_filename: str = full_paths[0].rsplit("/", 1)[-1] + full_mirrors: list[str] = [f"http://{best.rpc_address}{full_paths[0]}"] + for other in fast_sources[1:]: + for other_fp in other.file_paths: + if other_fp.rsplit("/", 1)[-1] == full_filename: + full_mirrors.append(f"http://{other.rpc_address}{other_fp}") + break speed_mib: float = best.download_speed / (1024 ** 2) - log.info("Best source: %s (%.1f MiB/s), %d mirrors total", - best.rpc_address, speed_mib, len(fast_sources)) - for filename, mirror_urls in download_plan: - log.info(" %s (%d mirrors)", filename, len(mirror_urls)) + log.info("Best source: %s (%.1f MiB/s), %d mirrors", + best.rpc_address, speed_mib, len(full_mirrors)) - # Download — full snapshot first, then re-probe for fresh incremental + # Download full snapshot os.makedirs(output_dir, exist_ok=True) total_start: float = time.monotonic() - # Separate full and incremental from the initial plan - full_downloads: list[tuple[str, list[str]]] = [] - for filename, mirror_urls in download_plan: - if filename.startswith("snapshot-"): - full_downloads.append((filename, mirror_urls)) - - # Download full snapshot(s) - for filename, mirror_urls in full_downloads: - filepath: Path = Path(output_dir) / filename - if filepath.exists() and filepath.stat().st_size > 0: - log.info("Skipping %s (already exists: %.1f GB)", - filename, filepath.stat().st_size / (1024 ** 3)) - continue - if not download_aria2c(mirror_urls, output_dir, filename, connections): - log.error("Failed to download %s", filename) + filepath: Path = Path(output_dir) / full_filename + if filepath.exists() and filepath.stat().st_size > 0: + log.info("Skipping %s (already exists: %.1f GB)", + full_filename, filepath.stat().st_size / (1024 ** 3)) + else: + if not download_aria2c(full_mirrors, output_dir, full_filename, connections): + log.error("Failed to download %s", full_filename) return False - # After full snapshot download, rolling incremental download loop. - # The initial incremental is stale by now (full download takes 10+ min). - # Re-probe repeatedly until we find one close enough to head. + # Download incremental separately — the full download took minutes, + # so any incremental from discovery is stale. Re-probe for fresh ones. if not full_only: - full_filename: str = full_downloads[0][0] - fm_post: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) - if fm_post: - full_snap_slot: int = int(fm_post.group(1)) - log.info("Rolling incremental download (base slot %d, convergence %d slots)...", - full_snap_slot, convergence_slots) - prev_inc_filename: str | None = None - loop_start: float = time.monotonic() - max_convergence_time: float = 1800.0 # 30 min wall-clock limit - - while True: - if time.monotonic() - loop_start > max_convergence_time: - if prev_inc_filename: - log.warning("Convergence timeout (%.0fs) — using %s", - max_convergence_time, prev_inc_filename) - else: - log.warning("Convergence timeout (%.0fs) — no incremental downloaded", - max_convergence_time) - break - inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) - if inc_fn is None: - if prev_inc_filename is None: - log.error("No matching incremental found for base slot %d " - "— validator will replay from full snapshot", full_snap_slot) - else: - log.info("No newer incremental available, using %s", prev_inc_filename) - break - - # Parse the incremental slot from the filename - m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) - assert m_inc is not None # probe_incremental already validated - inc_slot: int = int(m_inc.group(2)) - - # Check convergence against current mainnet slot - head_slot: int | None = get_current_slot(resolved_rpc) - if head_slot is None: - log.warning("Cannot get current slot — downloading best available incremental") - gap: int = convergence_slots + 1 # force download, then break - else: - gap = head_slot - inc_slot - - # Skip download if we already have this exact incremental - if inc_fn == prev_inc_filename: - if gap <= convergence_slots: - log.info("Incremental %s already downloaded (gap %d slots, converged)", inc_fn, gap) - break - log.info("No newer incremental yet (slot %d, gap %d slots), waiting...", - inc_slot, gap) - time.sleep(10) - continue - - # Delete previous incremental before downloading the new one - if prev_inc_filename is not None: - old_path: Path = Path(output_dir) / prev_inc_filename - if old_path.exists(): - log.info("Removing superseded incremental %s", prev_inc_filename) - old_path.unlink() - - log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", - inc_fn, len(inc_mirrors), inc_slot, gap) - if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): - log.warning("Failed to download incremental %s — re-probing in 10s", inc_fn) - time.sleep(10) - continue - - prev_inc_filename = inc_fn - - if gap <= convergence_slots: - log.info("Converged: incremental slot %d is %d slots behind head", inc_slot, gap) - break - - if head_slot is None: - break - - log.info("Not converged (gap %d > %d), re-probing in 10s...", gap, convergence_slots) - time.sleep(10) + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if fm: + full_snap_slot: int = int(fm.group(1)) + log.info("Downloading incremental for base slot %d...", full_snap_slot) + _rolling_incremental_download( + fast_sources, full_snap_slot, output_dir, + convergence_slots, connections, resolved_rpc, + ) total_elapsed: float = time.monotonic() - total_start log.info("All downloads complete in %.0fs", total_elapsed) - for filename, _ in download_plan: - fp_path: Path = Path(output_dir) / filename - if fp_path.exists(): - log.info(" %s (%.1f GB)", fp_path.name, fp_path.stat().st_size / (1024 ** 3)) return True From 68edcc60c7b2b50277812ffa6b38500ba0014c14 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 05:54:08 +0000 Subject: [PATCH 49/62] fix: migrate ashburn relay playbook to firewalld + iptables coexistence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Firewalld zones/policies for forwarding (Docker bridge → gre-ashburn), iptables for Docker-specific rules (DNAT, DOCKER-USER, mangle, SNAT). Both coexist at different netfilter priorities. See docs/postmortem-ashburn-relay-outbound.md for root cause analysis. Co-Authored-By: Claude Opus 4.6 --- playbooks/ashburn-relay-biscayne.yml | 372 +++++++++++----------- playbooks/files/ashburn-relay-setup.sh.j2 | 28 +- 2 files changed, 211 insertions(+), 189 deletions(-) diff --git a/playbooks/ashburn-relay-biscayne.yml b/playbooks/ashburn-relay-biscayne.yml index d660a2ce..a42a978c 100644 --- a/playbooks/ashburn-relay-biscayne.yml +++ b/playbooks/ashburn-relay-biscayne.yml @@ -1,13 +1,34 @@ --- # Configure biscayne for Ashburn validator relay # -# Sets up inbound DNAT (137.239.194.65 → kind node) and outbound SNAT + -# policy routing (validator traffic → GRE tunnel → mia-sw01 → was-sw01). +# WHY THIS USES FIREWALLD ZONES (not just iptables): # -# Uses a dedicated GRE tunnel to mia-sw01 (NOT the DoubleZero-managed -# doublezero0/Tunnel500). The tunnel source is biscayne's public IP -# (186.233.184.235) and the destination is mia-sw01's free LAN IP -# (209.42.167.137). +# Biscayne runs firewalld with the nftables backend. Firewalld's nftables +# filter_FORWARD chain (priority filter+10) rejects forwarded traffic +# between interfaces that aren't in known zones. The original playbook +# used only iptables rules, which run at priority filter (0) and were +# accepted by Docker's chains — but then firewalld's nftables chain +# rejected the same packets because it didn't know about the Docker +# bridges or gre-ashburn. +# +# The fix: Docker bridges go in the 'docker' zone, gre-ashburn goes in +# 'trusted', and a 'docker-to-relay' policy allows forwarding between +# them. These are firewalld --permanent rules that survive reboots. +# +# WHY IPTABLES IS STILL NEEDED: +# +# Docker's iptables FORWARD chain (priority filter) runs BEFORE +# firewalld's nftables chain (priority filter+10). Docker's FORWARD +# policy is DROP, and its DOCKER-FORWARD subchain only accepts +# RELATED,ESTABLISHED inbound. So NEW inbound DNAT'd traffic is dropped +# by Docker before firewalld can accept it. DOCKER-USER ACCEPT rules +# and DNAT-before-Docker ordering must remain as iptables. +# +# Layers: +# 1. Firewalld zones + policies (permanent, survives reboots/reloads) +# 2. GRE tunnel + loopback IP (iproute2, restored by systemd service) +# 3. iptables DNAT/SNAT/mangle (restored by systemd service) +# 4. Policy routing (iproute2, restored by systemd service) # # Usage: # # Full setup (inbound + outbound) @@ -19,9 +40,6 @@ # # Outbound only (SNAT + policy routing) # ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound # -# # Pre-flight checks only -# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t preflight -# # # Rollback # ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true @@ -33,7 +51,6 @@ ashburn_ip: 137.239.194.65 kind_node_ip: 172.20.0.2 kind_network: 172.20.0.0/16 - # New dedicated GRE tunnel (not DZ-managed doublezero0) tunnel_device: gre-ashburn tunnel_local_ip: 169.254.100.1 # biscayne end of /31 tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31 @@ -45,6 +62,12 @@ gossip_port: 8001 dynamic_port_range_start: 9000 dynamic_port_range_end: 9025 + # Docker bridge for the kind network — find with: + # ip route get 172.20.0.2 | grep -oP 'dev \K\S+' + docker_bridges: + - br-cf46a62ab5b2 + - docker0 + - br-4fb6f6795448 rollback: false tasks: @@ -54,6 +77,30 @@ - name: Rollback all Ashburn relay rules when: rollback | bool block: + - name: Remove firewalld zone assignments + ansible.posix.firewalld: + zone: "{{ item.zone }}" + interface: "{{ item.iface }}" + permanent: true + state: disabled + loop: + - {zone: docker, iface: br-cf46a62ab5b2} + - {zone: docker, iface: docker0} + - {zone: docker, iface: br-4fb6f6795448} + - {zone: trusted, iface: gre-ashburn} + failed_when: false + + - name: Remove docker-to-relay policy + ansible.builtin.command: + cmd: firewall-cmd --permanent --delete-policy=docker-to-relay + failed_when: false + changed_when: false + + - name: Reload firewalld + ansible.builtin.command: + cmd: firewall-cmd --reload + changed_when: false + - name: Remove Ashburn IP from loopback ansible.builtin.command: cmd: ip addr del {{ ashburn_ip }}/32 dev lo @@ -61,36 +108,26 @@ changed_when: false - name: Remove GRE tunnel - ansible.builtin.shell: - cmd: | - ip link set {{ tunnel_device }} down 2>/dev/null || true - ip tunnel del {{ tunnel_device }} 2>/dev/null || true - executable: /bin/bash + ansible.builtin.command: + cmd: ip tunnel del {{ tunnel_device }} + failed_when: false changed_when: false - - name: Remove inbound DNAT rules + - name: Flush iptables relay rules ansible.builtin.shell: cmd: | - set -o pipefail + set -euo pipefail + # DNAT iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \ --dport {{ gossip_port }} \ - -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \ - 2>/dev/null || true + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \ --dport {{ gossip_port }} \ - -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \ - 2>/dev/null || true + -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \ --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ - -j DNAT --to-destination {{ kind_node_ip }} \ - 2>/dev/null || true - executable: /bin/bash - changed_when: false - - - name: Remove DOCKER-USER relay rules - ansible.builtin.shell: - cmd: | - set -o pipefail + -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true + # DOCKER-USER iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \ --dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true iptables -D DOCKER-USER -p tcp -d {{ kind_node_ip }} \ @@ -98,31 +135,19 @@ iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \ --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ -j ACCEPT 2>/dev/null || true - executable: /bin/bash - changed_when: false - - - name: Remove outbound mangle rules - ansible.builtin.shell: - cmd: | - set -o pipefail + # Mangle iptables -t mangle -D PREROUTING -s {{ kind_network }} \ - -p udp --sport {{ gossip_port }} \ - -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true iptables -t mangle -D PREROUTING -s {{ kind_network }} \ -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \ -j MARK --set-mark {{ fwmark }} 2>/dev/null || true iptables -t mangle -D PREROUTING -s {{ kind_network }} \ - -p tcp --sport {{ gossip_port }} \ - -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true iptables -t mangle -D PREROUTING -s {{ kind_network }} \ - -p tcp --dport {{ gossip_port }} \ - -j MARK --set-mark {{ fwmark }} 2>/dev/null || true - executable: /bin/bash - changed_when: false - - - name: Remove outbound SNAT rule - ansible.builtin.shell: - cmd: iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true + -p tcp --dport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true + # SNAT + iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} \ + -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true executable: /bin/bash changed_when: false @@ -130,6 +155,7 @@ ansible.builtin.shell: cmd: | ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true + ip rule del from {{ kind_network }} fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true ip route del default table {{ rt_table_name }} 2>/dev/null || true executable: /bin/bash changed_when: false @@ -176,43 +202,79 @@ failed_when: kind_ping.rc != 0 tags: [preflight, inbound] - - name: Show existing iptables nat rules - ansible.builtin.shell: - cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60 - executable: /bin/bash - register: existing_nat + # ------------------------------------------------------------------ + # Firewalld zones and policies (permanent, survives reboots) + # ------------------------------------------------------------------ + # Docker's iptables FORWARD chain (priority filter) drops packets that + # don't match DOCKER-USER or DOCKER-FORWARD. Firewalld's nftables + # filter_FORWARD (priority filter+10) then checks zone-based policies. + # Without the docker zone + docker-to-relay policy, firewalld rejects + # outbound traffic from Docker bridges to gre-ashburn because neither + # interface is in a known zone. + - name: Add Docker bridges to docker zone + ansible.posix.firewalld: + zone: docker + interface: "{{ item }}" + permanent: true + state: enabled + loop: "{{ docker_bridges }}" + register: docker_zone_result + tags: [outbound, inbound] + + - name: Add GRE tunnel to trusted zone + ansible.posix.firewalld: + zone: trusted + interface: "{{ tunnel_device }}" + permanent: true + state: enabled + register: trusted_zone_result + tags: [outbound, inbound] + + - name: Check if docker-to-relay policy exists + ansible.builtin.command: + cmd: firewall-cmd --info-policy=docker-to-relay + register: policy_check changed_when: false - tags: [preflight] + failed_when: false + tags: [outbound] - - name: Display existing NAT rules - ansible.builtin.debug: - var: existing_nat.stdout_lines - tags: [preflight] - - - name: Check for existing GRE tunnel + - name: Create docker-to-relay forwarding policy + when: policy_check.rc != 0 ansible.builtin.shell: - cmd: ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel does not exist" + cmd: | + set -euo pipefail + firewall-cmd --permanent --new-policy=docker-to-relay + firewall-cmd --permanent --policy=docker-to-relay --set-target=ACCEPT + firewall-cmd --permanent --policy=docker-to-relay --add-ingress-zone=docker + firewall-cmd --permanent --policy=docker-to-relay --add-egress-zone=trusted + echo "policy created" executable: /bin/bash - register: existing_tunnel - changed_when: false - tags: [preflight] + register: policy_result + changed_when: "'created' in policy_result.stdout" + tags: [outbound] - - name: Display existing tunnel - ansible.builtin.debug: - var: existing_tunnel.stdout_lines - tags: [preflight] + - name: Reload firewalld + ansible.builtin.command: + cmd: firewall-cmd --reload + when: >- + docker_zone_result.changed or + trusted_zone_result.changed or + (policy_result is defined and policy_result.changed) + changed_when: true + tags: [outbound, inbound] # ------------------------------------------------------------------ - # GRE tunnel setup + # GRE tunnel setup (iproute2) # ------------------------------------------------------------------ - name: Create GRE tunnel ansible.builtin.shell: cmd: | - set -o pipefail + set -euo pipefail if ip tunnel show {{ tunnel_device }} 2>/dev/null; then echo "tunnel already exists" else - ip tunnel add {{ tunnel_device }} mode gre local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64 + ip tunnel add {{ tunnel_device }} mode gre \ + local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64 ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }} ip link set {{ tunnel_device }} up mtu 8972 echo "tunnel created" @@ -222,29 +284,27 @@ changed_when: "'created' in tunnel_result.stdout" tags: [outbound] - - name: Show tunnel result - ansible.builtin.debug: - var: tunnel_result.stdout_lines - tags: [outbound] - # ------------------------------------------------------------------ - # Inbound: DNAT for 137.239.194.65 → kind node + # Inbound: DNAT for 137.239.194.65 → kind node (iptables) # ------------------------------------------------------------------ + # These must be iptables rules inserted before Docker's ADDRTYPE LOCAL + # rule in nat PREROUTING. Firewalld forward-ports can't guarantee + # ordering relative to Docker's chains. - name: Add Ashburn IP to loopback ansible.builtin.command: cmd: ip addr add {{ ashburn_ip }}/32 dev lo register: add_ip changed_when: add_ip.rc == 0 - failed_when: "add_ip.rc != 0 and 'already assigned' not in add_ip.stderr and 'File exists' not in add_ip.stderr" + failed_when: >- + add_ip.rc != 0 and + 'already assigned' not in add_ip.stderr and + 'File exists' not in add_ip.stderr tags: [inbound] - - name: Add DNAT rules (inserted before DOCKER chain) + - name: Add DNAT rules (before Docker's chain) ansible.builtin.shell: cmd: | set -o pipefail - # DNAT rules must be before Docker's ADDRTYPE LOCAL rule, otherwise - # Docker's PREROUTING chain swallows traffic to 137.239.194.65 (which - # is on loopback and therefore type LOCAL). for rule in \ "-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ "-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \ @@ -262,19 +322,13 @@ changed_when: "'added' in dnat_result.stdout" tags: [inbound] - - name: Show DNAT result - ansible.builtin.debug: - var: dnat_result.stdout_lines - tags: [inbound] - - - name: Allow DNAT'd relay traffic through DOCKER-USER + - name: Allow DNAT'd traffic through Docker's FORWARD chain ansible.builtin.shell: cmd: | set -o pipefail - # Docker's FORWARD chain drops traffic to bridge networks unless - # explicitly accepted. DOCKER-USER runs first and is the correct - # place for user rules. These ACCEPT rules let DNAT'd relay - # traffic reach the kind node (172.20.0.2). + # Docker's iptables FORWARD (priority filter) drops NEW inbound + # traffic to bridge networks. DOCKER-USER is the only place to + # add ACCEPT rules that survive Docker daemon restarts. for rule in \ "-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ "-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ @@ -292,14 +346,14 @@ changed_when: "'added' in forward_result.stdout" tags: [inbound] - - name: Show DOCKER-USER result - ansible.builtin.debug: - var: forward_result.stdout_lines - tags: [inbound] - # ------------------------------------------------------------------ - # Outbound: fwmark + SNAT + policy routing via new tunnel + # Outbound: fwmark + SNAT + policy routing (iptables + iproute2) # ------------------------------------------------------------------ + # Mangle marks select which traffic gets policy-routed through the + # relay. Source-based routing (ip rule from 172.20.0.0/16) would be + # simpler but routes ALL Docker traffic through the tunnel, including + # DNS and health checks. Port-specific marks keep non-validator + # traffic on the default route. - name: Mark outbound validator traffic (mangle PREROUTING) ansible.builtin.shell: cmd: | @@ -322,29 +376,20 @@ changed_when: "'added' in mangle_result.stdout" tags: [outbound] - - name: Show mangle result - ansible.builtin.debug: - var: mangle_result.stdout_lines - tags: [outbound] - - - name: SNAT marked traffic to Ashburn IP (before Docker MASQUERADE) + - name: SNAT marked traffic to Ashburn IP ansible.builtin.shell: cmd: | set -o pipefail - if iptables -t nat -C POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null; then - echo "SNAT rule already exists" + rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}" + if iptables -t nat -C POSTROUTING $rule 2>/dev/null; then + echo "exists" else - iptables -t nat -I POSTROUTING 1 -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} - echo "SNAT rule inserted at position 1" + iptables -t nat -I POSTROUTING 1 $rule + echo "added" fi executable: /bin/bash register: snat_result - changed_when: "'inserted' in snat_result.stdout" - tags: [outbound] - - - name: Show SNAT result - ansible.builtin.debug: - var: snat_result.stdout + changed_when: "'added' in snat_result.stdout" tags: [outbound] - name: Ensure rt_tables entry exists @@ -359,10 +404,10 @@ cmd: | set -o pipefail if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then - echo "rule already exists" + echo "exists" else ip rule add fwmark {{ fwmark }} table {{ rt_table_name }} - echo "rule added" + echo "added" fi executable: /bin/bash register: rule_result @@ -370,29 +415,29 @@ tags: [outbound] - name: Add default route via GRE tunnel in ashburn table - ansible.builtin.shell: + ansible.builtin.command: cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }} - executable: /bin/bash changed_when: true tags: [outbound] # ------------------------------------------------------------------ - # Persistence + # Persistence (systemd service for non-firewalld state) # ------------------------------------------------------------------ - # A systemd oneshot service replaces both if-up.d (which depends on - # networking.service, inactive on this host) and netfilter-persistent - # (which runs before Docker, so Docker's chain setup blows away rules). - # This service runs After=docker.service and idempotently applies all - # tunnel, iptables, and policy routing state. + # Firewalld zones/policies persist natively (--permanent + reload). + # The systemd service restores: GRE tunnel, loopback IP, iptables + # rules (DNAT, DOCKER-USER, mangle, SNAT), and policy routing. + # Runs After=docker.service because Docker recreates its iptables + # chains on startup. - name: Install ashburn-relay systemd service ansible.builtin.copy: dest: /etc/systemd/system/ashburn-relay.service mode: "0644" content: | [Unit] - Description=Ashburn validator relay (GRE tunnel, iptables, policy routing) - After=docker.service network-online.target + Description=Ashburn relay (GRE tunnel, iptables, policy routing) + After=docker.service network-online.target firewalld.service Wants=network-online.target + Requires=firewalld.service [Service] Type=oneshot @@ -428,67 +473,42 @@ # ------------------------------------------------------------------ # Verification # ------------------------------------------------------------------ - - name: Show tunnel status + - name: Verify firewalld zones + ansible.builtin.shell: + cmd: | + echo "=== docker zone ===" + firewall-cmd --zone=docker --list-interfaces + echo "=== trusted zone ===" + firewall-cmd --zone=trusted --list-interfaces + echo "=== docker-to-relay policy ===" + firewall-cmd --info-policy=docker-to-relay + executable: /bin/bash + register: zone_info + changed_when: false + tags: [outbound, inbound] + + - name: Verify tunnel and routing ansible.builtin.shell: cmd: | echo "=== tunnel ===" ip tunnel show {{ tunnel_device }} - echo "" - echo "=== tunnel addr ===" ip addr show {{ tunnel_device }} - echo "" echo "=== ping tunnel peer ===" ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable" - executable: /bin/bash - register: tunnel_status - changed_when: false - tags: [outbound] - - - name: Show NAT rules - ansible.builtin.shell: - cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 - executable: /bin/bash - register: nat_rules - changed_when: false - tags: [inbound, outbound] - - - name: Show mangle rules - ansible.builtin.shell: - cmd: iptables -t mangle -L -v -n 2>&1 - executable: /bin/bash - register: mangle_rules - changed_when: false - tags: [outbound] - - - name: Show policy routing - ansible.builtin.shell: - cmd: | echo "=== ip rule ===" ip rule show - echo "" - echo "=== ashburn routing table ===" + echo "=== ashburn table ===" ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty" executable: /bin/bash register: routing_info changed_when: false tags: [outbound] - - name: Show loopback addresses - ansible.builtin.shell: - cmd: set -o pipefail && ip addr show lo | grep inet - executable: /bin/bash - register: lo_addrs - changed_when: false - tags: [inbound] - - name: Display verification ansible.builtin.debug: msg: - tunnel: "{{ tunnel_status.stdout_lines | default([]) }}" - nat_rules: "{{ nat_rules.stdout_lines }}" - mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}" + firewalld: "{{ zone_info.stdout_lines }}" routing: "{{ routing_info.stdout_lines | default([]) }}" - loopback: "{{ lo_addrs.stdout_lines | default([]) }}" tags: [inbound, outbound] - name: Summary @@ -498,12 +518,8 @@ Ashburn IP: {{ ashburn_ip }} (on lo) GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }}) link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31 - Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} - Outbound SNAT: {{ kind_network }} sport 8001,9000-9025 → {{ ashburn_ip }} - Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }} dev {{ tunnel_device }} - - Next steps: - 1. Apply mia-sw01 config (Tunnel100 must be up on both sides) - 2. Verify tunnel: ping {{ tunnel_remote_ip }} - 3. Test from kelce: echo test | nc -u -w 1 137.239.194.65 9000 - 4. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses + Firewalld: Docker bridges in 'docker' zone, {{ tunnel_device }} in 'trusted' zone + Policy: docker-to-relay (docker → trusted, ACCEPT) + Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} (iptables) + Outbound SNAT: fwmark {{ fwmark }} → {{ ashburn_ip }} (iptables) + Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }} diff --git a/playbooks/files/ashburn-relay-setup.sh.j2 b/playbooks/files/ashburn-relay-setup.sh.j2 index eb33d731..7648b660 100644 --- a/playbooks/files/ashburn-relay-setup.sh.j2 +++ b/playbooks/files/ashburn-relay-setup.sh.j2 @@ -1,11 +1,20 @@ #!/bin/bash # Ashburn validator relay — runtime setup # -# Called by ashburn-relay.service (After=docker.service) on boot. -# Idempotent — safe to run multiple times. +# Called by ashburn-relay.service (After=docker.service firewalld.service) +# on boot. Idempotent — safe to run multiple times. # -# Creates GRE tunnel, loopback IP, iptables rules, and policy routing -# so that validator traffic enters/exits via 137.239.194.65 (Ashburn). +# Creates GRE tunnel, loopback IP, iptables rules, and policy routing. +# Firewalld zones/policies are permanent (not managed here). +# +# WHY IPTABLES + FIREWALLD: +# Docker uses iptables (priority filter) for its FORWARD chain. +# Firewalld uses nftables (priority filter+10). Docker's chain runs +# first and drops packets that firewalld would accept. So: +# - DNAT must be iptables (inserted before Docker's ADDRTYPE LOCAL rule) +# - DOCKER-USER must be iptables (Docker's FORWARD chain requires it) +# - Mangle/SNAT must be iptables (firewalld has no native mark/SNAT) +# - Forwarding (Docker bridge → gre-ashburn) is firewalld (zones/policies) set -euo pipefail # GRE tunnel to mia-sw01 @@ -35,9 +44,10 @@ for rule in \ fi done -# FORWARD: allow DNAT'd relay traffic through Docker's FORWARD chain. -# Docker drops traffic to bridge networks unless explicitly accepted. -# DOCKER-USER runs before all Docker chains and survives daemon restarts. +# DOCKER-USER: accept DNAT'd relay traffic through Docker's FORWARD chain. +# Docker's iptables FORWARD (priority filter) drops NEW inbound traffic to +# bridge networks. DOCKER-USER is the only place for user ACCEPT rules +# that survive Docker daemon restarts. for rule in \ "-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ "-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \ @@ -51,10 +61,6 @@ for rule in \ done # Outbound mangle (fwmark for policy routing) -# sport rules: gossip/repair/TVU traffic FROM validator well-known ports -# dport rule: ip_echo TCP TO entrypoint port 8001 (ephemeral sport, -# so sport-based rules miss it; without this the entrypoint sees -# biscayne's real IP and probes that instead of the Ashburn relay IP) for rule in \ "-p udp -s {{ kind_network }} --sport {{ gossip_port }} \ -j MARK --set-mark {{ fwmark }}" \ From 61b7f6a23684a9cb3dc901a4f298080ae33c0d1c Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 05:54:23 +0000 Subject: [PATCH 50/62] feat: ip_echo preflight tool + relay post-mortem and checklist ip_echo_preflight.py: reimplements Solana ip_echo client protocol in Python. Verifies UDP port reachability before snapshot download, called from entrypoint.py. Prevents wasting hours on a snapshot only to crash-loop on port reachability. docs/postmortem-ashburn-relay-outbound.md: root cause analysis of the firewalld nftables FORWARD chain blocking outbound relay traffic. docs/ashburn-relay-checklist.md: 7-layer verification checklist for relay path debugging. Co-Authored-By: Claude Opus 4.6 --- docs/ashburn-relay-checklist.md | 168 +++++++++++++ docs/postmortem-ashburn-relay-outbound.md | 190 ++++++++++++++ scripts/agave-container/ip_echo_preflight.py | 249 +++++++++++++++++++ 3 files changed, 607 insertions(+) create mode 100644 docs/ashburn-relay-checklist.md create mode 100644 docs/postmortem-ashburn-relay-outbound.md create mode 100644 scripts/agave-container/ip_echo_preflight.py diff --git a/docs/ashburn-relay-checklist.md b/docs/ashburn-relay-checklist.md new file mode 100644 index 00000000..733581d1 --- /dev/null +++ b/docs/ashburn-relay-checklist.md @@ -0,0 +1,168 @@ +# Ashburn Relay / ip_echo Port Reachability Checklist + +The validator exits when it can't verify UDP ports (8001, 9000, 9002, 9003) are +reachable from entrypoint servers. The ip_echo protocol: validator TCP-connects +to entrypoint on port 8001, entrypoint sees source IP, sends UDP probes back to +that IP on the validator's ports. If probes don't arrive, validator crashes. + +## Layer 1: Biscayne outbound path + +Validator's outbound ip_echo TCP (dport 8001) must exit via GRE tunnel so +entrypoints see `137.239.194.65`, not biscayne's real IP via Docker MASQUERADE. + +``` +[ ] 1.1 Mangle rules (4 rules in mangle PREROUTING): + - udp sport 8001 (gossip outbound) + - udp sport 9000:9025 (TVU/repair outbound) + - tcp sport 8001 (gossip TCP outbound) + - tcp dport 8001 (ip_echo outbound — THE CRITICAL ONE) + +[ ] 1.2 SNAT rule at position 1 (before Docker MASQUERADE): + POSTROUTING -m mark --mark 100 -j SNAT --to-source 137.239.194.65 + +[ ] 1.3 Policy routing rule: + fwmark 0x64 lookup ashburn + +[ ] 1.4 Ashburn routing table default route: + default via 169.254.100.0 dev gre-ashburn + +[ ] 1.5 Mangle counters incrementing (pkts/bytes on tcp dport 8001 rule) +``` + +## Layer 2: GRE tunnel (biscayne ↔ mia-sw01) + +``` +[ ] 2.1 Tunnel exists and UP: + gre-ashburn with 169.254.100.1/31 + +[ ] 2.2 Tunnel peer reachable: + ping 169.254.100.0 + +[ ] 2.3 Ashburn IP on loopback: + 137.239.194.65/32 dev lo +``` + +## Layer 3: Biscayne inbound path (DNAT + DOCKER-USER) + +Entrypoint UDP probes arrive at `137.239.194.65` and must reach kind node +`172.20.0.2`. + +``` +[ ] 3.1 DNAT rules at position 1 in nat PREROUTING + (before Docker's ADDRTYPE LOCAL rule): + - udp dport 8001 → 172.20.0.2:8001 + - tcp dport 8001 → 172.20.0.2:8001 + - udp dport 9000:9025 → 172.20.0.2 + +[ ] 3.2 DOCKER-USER ACCEPT rules (3 rules): + - udp dport 8001 → ACCEPT + - tcp dport 8001 → ACCEPT + - udp dport 9000:9025 → ACCEPT + +[ ] 3.3 DNAT counters incrementing +``` + +## Layer 4: mia-sw01 + +``` +[ ] 4.1 Tunnel100 UP in VRF relay + src 209.42.167.137, dst 186.233.184.235, link 169.254.100.0/31 + +[ ] 4.2 VRF relay default route: + 0.0.0.0/0 egress-vrf default 172.16.1.188 + +[ ] 4.3 Default VRF route to relay IP: + 137.239.194.65/32 egress-vrf relay 169.254.100.1 + +[ ] 4.4 ACL SEC-VALIDATOR-100-IN permits all needed traffic + +[ ] 4.5 Backbone Et4/1 UP (172.16.1.189/31) +``` + +## Layer 5: was-sw01 + +``` +[ ] 5.1 Static route: 137.239.194.65/32 via 172.16.1.189 + +[ ] 5.2 Backbone Et4/1 UP (172.16.1.188/31) + +[ ] 5.3 No Loopback101 (removed to avoid absorbing traffic locally) +``` + +## Layer 6: Persistence + +``` +[ ] 6.1 ashburn-relay.service enabled and active (runs After=docker.service) + +[ ] 6.2 /usr/local/sbin/ashburn-relay-setup.sh exists +``` + +## Layer 7: End-to-end tests + +All tests run via Ansible playbooks. The test scripts in `scripts/` are +utilities invoked by the playbooks — never run them manually via SSH. + +``` +[ ] 7.1 relay-test-tcp-dport.py (via ashburn-relay-check.yml or ad-hoc play) + Tests: outbound tcp dport 8001 mangle → SNAT → tunnel + Pass: entrypoint sees 137.239.194.65 + Fail: entrypoint sees 186.233.184.235 (Docker MASQUERADE) + +[ ] 7.2 relay-test-ip-echo.py (via ashburn-relay-check.yml or ad-hoc play) + Tests: FULL END-TO-END (outbound SNAT + inbound DNAT + DOCKER-USER) + Pass: UDP probe received from entrypoint + Fail: no UDP probes — inbound path broken + +[ ] 7.3 relay-inbound-udp-test.yml (cross-inventory: biscayne + kelce) + Tests: inbound UDP from external host → DNAT → kind node + Pass: UDP arrives in kind netns +``` + +## Playbooks + +```bash +# Read-only check of all relay state (biscayne + both switches): +ansible-playbook -i inventory-switches/switches.yml \ + -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml + +# Apply all biscayne relay rules (idempotent): +ansible-playbook -i inventory/biscayne.yml playbooks/ashburn-relay-biscayne.yml + +# Apply outbound only (the ip_echo fix): +ansible-playbook -i inventory/biscayne.yml \ + playbooks/ashburn-relay-biscayne.yml -t outbound + +# Apply inbound only (DNAT + DOCKER-USER): +ansible-playbook -i inventory/biscayne.yml \ + playbooks/ashburn-relay-biscayne.yml -t inbound + +# Apply mia-sw01 config: +ansible-playbook -i inventory-switches/switches.yml \ + playbooks/ashburn-relay-mia-sw01.yml + +# Apply was-sw01 config: +ansible-playbook -i inventory-switches/switches.yml \ + playbooks/ashburn-relay-was-sw01.yml + +# Cross-inventory inbound UDP test (biscayne + kelce): +ansible-playbook -i inventory/biscayne.yml -i inventory/kelce.yml \ + playbooks/relay-inbound-udp-test.yml +``` + +## Historical root causes + +1. **TCP dport 8001 mangle rule missing** — ip_echo TCP exits via Docker + MASQUERADE, entrypoint sees wrong IP, UDP probes go to wrong address. + +2. **DOCKER-USER ACCEPT rules missing** — DNAT'd traffic hits Docker's FORWARD + DROP policy, never reaches kind node. + +3. **DNAT rule position wrong** — Docker's `ADDRTYPE LOCAL` rule in PREROUTING + catches traffic to loopback IPs before our DNAT rules. Must use `-I + PREROUTING 1`. + +4. **mia-sw01 egress-vrf route with interface specified** — silently fails in + EOS (accepted in config, never installed in RIB). Must use nexthop-only form. + +5. **was-sw01 Loopback101 absorbing traffic** — local delivery instead of + forwarding to mia-sw01 via backbone. diff --git a/docs/postmortem-ashburn-relay-outbound.md b/docs/postmortem-ashburn-relay-outbound.md new file mode 100644 index 00000000..b3dbae94 --- /dev/null +++ b/docs/postmortem-ashburn-relay-outbound.md @@ -0,0 +1,190 @@ +# Post-Mortem: Ashburn Relay Outbound Path Failure + +**Date resolved**: 2026-03-10 +**Duration of impact**: Unknown — likely since firewalld was enabled (post-reboot +2026-03-09 ~21:24 UTC). The relay worked before this with firewalld disabled. +**Symptoms**: Validator CrashLoopBackOff on ip_echo port reachability check. +Entrypoint never receives the validator's outbound TCP connection, so it can't +verify UDP port reachability and the validator refuses to start. + +## Timeline + +### Session d02959a7 (2026-03-06 to 2026-03-08) + +Initial relay infrastructure build-out. Multi-day effort across three repos. + +1. **Validator deployed**, replaying at 0.24 slots/sec. RTT between Miami and + peers (~150ms per repair round-trip) identified as the bottleneck. Ashburn + relay identified as the fix. + +2. **GRE tunnel created** (gre-ashburn: biscayne 186.233.184.235 ↔ mia-sw01 + 209.42.167.137). Tunnel100 on mia-sw01 in VRF relay. Policy routing with + fwmark 0x64 routes validator traffic through the tunnel. + +3. **Inbound path debugged end-to-end**: + - Cross-VRF routing on mia-sw01 investigated (egress-vrf route form, hardware + FIB programming, TCAM profile). + - GRE decapsulation on biscayne verified (kernel source read to understand + ip_tunnel_lookup matching logic). + - **DOCKER chain drop rule found**: Docker's FORWARD chain only had ACCEPT + for TCP 6443/443/80. DNAT'd relay UDP was dropped. Fix: DOCKER-USER + ACCEPT rules for UDP 8001 and 9000-9025. + - Inbound UDP relay test passed (kelce → was-sw01 → mia-sw01 → Tunnel100 → + biscayne → DNAT → kind node). + +4. **Outbound path partially verified**: Relay test scripts confirmed TCP and + UDP traffic from the kind container exits via gre-ashburn with correct SNAT. + But the **validator's own ip_echo check was never end-to-end verified** with + a successful startup. The validator entered CrashLoopBackOff after the + DOCKER-USER fix for unrelated reasons (monitoring container crashes, log path + issues). + +5. **Ashburn relay checklist** written at `docs/ashburn-relay-checklist.md` — + 7 layers covering the full path. All items remained unchecked. + +### Session 0b5908a4 (2026-03-09) + +Container rebuild, graceful shutdown implementation, ZFS upgrade, storage +migration. The validator was **running and catching up from a ~5,649 slot gap**, +confirming the relay was working. Then: + +- io_uring/ZFS deadlock from ungraceful shutdown (ZFS 2.2.2, fixed in 2.2.8+) +- Reboot required to clear zombie processes +- **Firewalld was enabled/started on the reboot** (previously disabled) + +### Session cc6c8c55 (2026-03-10, this session) + +User asked to review session d02959a7 to confirm the ip_echo problem was +actually solved. It wasn't. + +1. **ip_echo preflight tool written** (`scripts/agave-container/ip_echo_preflight.py`) + — reimplements the Solana ip_echo client protocol in Python, called from + `entrypoint.py` before snapshot download. Tested successfully against live + entrypoints from the host. + +2. **Tested from kind netns** — TCP to entrypoint:8001 returns "No route to + host". Mangle PREROUTING counter increments (marking works) but SNAT + POSTROUTING counter stays at 0 (packets never reach POSTROUTING). + +3. **Misdiagnoses**: + - `src_valid_mark=0` suspected as root cause. Set to 1, no change. The + `ip route get X from Y mark Z` command was misleading — it simulates + locally-originated traffic, not forwarded. The correct test is + `ip route get X from Y iif mark Z`, which showed routing works. + - Firewalld nftables backend not setting `src_valid_mark` was a red herring. + +4. **Root cause found**: Firewalld's nftables `filter_FORWARD` chain (priority + filter+10) rejects forwarded traffic between interfaces not in known zones. + Docker bridges and gre-ashburn were not in any firewalld zone. The chain's + `filter_FORWARD_POLICIES` only had rules for eno1, eno2, and mesh. + Traffic from br-cf46a62ab5b2 to gre-ashburn fell through to + `reject with icmpx admin-prohibited`. + + ``` + # The reject that was killing outbound relay traffic: + chain filter_FORWARD { + ... + jump filter_FORWARD_POLICIES + reject with icmpx admin-prohibited ← packets from unknown interfaces + } + ``` + +5. **Fix applied**: + - Docker bridges (br-cf46a62ab5b2, docker0, br-4fb6f6795448) → `docker` zone + - gre-ashburn → `trusted` zone + - New `docker-to-relay` policy: docker → trusted, ACCEPT + - All permanent (`firewall-cmd --permanent` + reload) + +6. **Verified**: ip_echo from kind netns returns `seen_ip=137.239.194.65 + shred_version=50093`. Full outbound path works. + +## Root Cause + +**Firewalld was enabled on biscayne after a reboot. Its nftables FORWARD chain +rejected forwarded traffic from Docker bridges to gre-ashburn because neither +interface was assigned to a firewalld zone.** + +The relay worked before because firewalld was disabled. The iptables rules +(mangle marks, SNAT, DNAT, DOCKER-USER) operated without interference. When +firewalld was enabled, its nftables filter_FORWARD chain (priority filter+10) +added a second layer of forwarding policy enforcement that the iptables rules +couldn't bypass. + +### Why Docker outbound to the internet still worked + +Docker's outbound traffic to eno1 was accepted by firewalld because eno1 IS in +the `public` zone. The `filter_FWD_public_allow` chain has `oifname "eno1" +accept`. Only traffic to gre-ashburn (not in any zone) was rejected. + +### Why iptables rules alone weren't enough + +Linux netfilter processes hooks in priority order. At the FORWARD hook: + +1. **Priority filter (0)**: iptables `FORWARD` chain — Docker's DOCKER-USER + and DOCKER-FORWARD chains. These accept the traffic. +2. **Priority filter+10**: nftables `filter_FORWARD` chain — firewalld's zone + policies. These reject the traffic if interfaces aren't in known zones. + +Both chains must accept for the packet to pass. The iptables acceptance at +priority 0 is overridden by the nftables rejection at priority filter+10. + +## Architecture After Fix + +Firewalld manages forwarding policy. Iptables handles Docker-specific rules +that firewalld can't replace (DNAT ordering, DOCKER-USER chain, mangle marks, +SNAT). Both coexist because they operate at different netfilter priorities. + +``` +Firewalld (permanent, survives reboots): + docker zone: br-cf46a62ab5b2, docker0, br-4fb6f6795448 + trusted zone: mesh, gre-ashburn + docker-forwarding policy: ANY → docker, ACCEPT (existing) + docker-to-relay policy: docker → trusted, ACCEPT (new) + +Systemd service (ashburn-relay.service, After=docker+firewalld): + GRE tunnel creation (iproute2) + Ashburn IP on loopback (iproute2) + DNAT rules at PREROUTING position 1 (iptables, before Docker's chain) + DOCKER-USER ACCEPT rules (iptables, for Docker's FORWARD chain) + Mangle marks for policy routing (iptables) + SNAT for marked traffic (iptables) + ip rule + ip route for ashburn table (iproute2) +``` + +## Lessons + +1. **Firewalld with nftables backend and Docker iptables coexist but don't + coordinate.** Adding an interface that Docker uses to forward traffic + requires explicitly assigning it to a firewalld zone. Docker's iptables + ACCEPT is necessary but not sufficient. + +2. **`ip route get X from Y mark Z` is misleading for forwarded traffic.** + It simulates local origination and fails on source address validation. Use + `ip route get X from Y iif mark Z` to simulate forwarded packets. + This wasted significant debugging time. + +3. **SNAT counter = 0 means packets die before POSTROUTING, but the cause + could be in either the routing decision OR a filter chain between PREROUTING + and POSTROUTING.** The nftables filter_FORWARD chain was invisible when only + checking iptables rules. + +4. **The validator passed ip_echo and ran successfully before.** That prior + success was the strongest evidence that the infrastructure was correct and + something changed. The change was firewalld being enabled. + +## Related Documents + +- `docs/ashburn-relay-checklist.md` — 7-layer checklist for relay verification +- `docs/bug-ashburn-tunnel-port-filtering.md` — prior DOCKER chain drop bug +- `.claude/skills/biscayne-relay-debugging/SKILL.md` — debugging skill +- `playbooks/ashburn-relay-biscayne.yml` — migrated playbook (firewalld + iptables) +- `scripts/agave-container/ip_echo_preflight.py` — preflight diagnostic tool + +## Related Sessions + +- `d02959a7-2ec6-4d27-8326-1bc4aaf3ebf1` (2026-03-06): Initial relay build, + DOCKER-USER fix, inbound path verified, outbound not end-to-end verified +- `0b5908a4-eff7-46de-9024-a11440bd68a8` (2026-03-09): Relay working (validator + catching up), then reboot introduced firewalld +- `cc6c8c55-fb4c-4482-b161-332ddf175300` (2026-03-10): Root cause found and + fixed (firewalld zone assignment) diff --git a/scripts/agave-container/ip_echo_preflight.py b/scripts/agave-container/ip_echo_preflight.py new file mode 100644 index 00000000..20cbb259 --- /dev/null +++ b/scripts/agave-container/ip_echo_preflight.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +"""ip_echo preflight — verify UDP port reachability before starting the validator. + +Implements the Solana ip_echo client protocol exactly: +1. Bind UDP sockets on the ports the validator will use +2. TCP connect to entrypoint gossip port, send IpEchoServerMessage +3. Parse IpEchoServerResponse (our IP as seen by entrypoint) +4. Wait for entrypoint's UDP probes on each port +5. Exit 0 if all ports reachable, exit 1 if any fail + +Wire format (from agave net-utils/src/): + Request: 4 null bytes + [u16; 4] tcp_ports LE + [u16; 4] udp_ports LE + \n + Response: 4 null bytes + bincode IpAddr (variant byte + addr) + optional shred_version + +Called from entrypoint.py before snapshot download. Prevents wasting hours +downloading a snapshot only to crash-loop on port reachability. +""" + +from __future__ import annotations + +import logging +import os +import socket +import struct +import sys +import threading +import time + +log = logging.getLogger("ip_echo_preflight") + +HEADER = b"\x00\x00\x00\x00" +TERMINUS = b"\x0a" +RESPONSE_BUF = 27 +IO_TIMEOUT = 5.0 +PROBE_TIMEOUT = 10.0 +MAX_RETRIES = 3 +RETRY_DELAY = 2.0 + + +def build_request(tcp_ports: list[int], udp_ports: list[int]) -> bytes: + """Build IpEchoServerMessage: header + [u16;4] tcp + [u16;4] udp + newline.""" + tcp = (tcp_ports + [0, 0, 0, 0])[:4] + udp = (udp_ports + [0, 0, 0, 0])[:4] + return HEADER + struct.pack("<4H", *tcp) + struct.pack("<4H", *udp) + TERMINUS + + +def parse_response(data: bytes) -> tuple[str, int | None]: + """Parse IpEchoServerResponse → (ip_string, shred_version | None). + + Wire format (bincode): + 4 bytes header (\0\0\0\0) + 4 bytes IpAddr enum variant (u32 LE: 0=IPv4, 1=IPv6) + 4|16 bytes address octets + 1 byte Option tag (0=None, 1=Some) + 2 bytes shred_version (u16 LE, only if Some) + """ + if len(data) < 8: + raise ValueError(f"response too short: {len(data)} bytes") + if data[:4] == b"HTTP": + raise ValueError("got HTTP response — not an ip_echo server") + if data[:4] != HEADER: + raise ValueError(f"unexpected header: {data[:4].hex()}") + variant = struct.unpack("= 3 and rest[0] == 1: + shred_version = struct.unpack(" None: + """Bind a UDP socket and wait for a probe packet.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(("0.0.0.0", port)) + sock.settimeout(0.5) + try: + while not stop.is_set(): + try: + _data, addr = sock.recvfrom(64) + results[port] = ("ok", addr) + return + except socket.timeout: + continue + finally: + sock.close() + except OSError as exc: + results[port] = ("bind_error", str(exc)) + + +def ip_echo_check( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], +) -> tuple[str, dict[int, bool]]: + """Run one ip_echo exchange and return (seen_ip, {port: reachable}). + + Raises on TCP failure (caller retries). + """ + udp_ports = [p for p in udp_ports if p != 0][:4] + + # Start UDP listeners before sending the TCP request + results: dict[int, tuple] = {} + stop = threading.Event() + threads = [] + for port in udp_ports: + t = threading.Thread(target=_listen_udp, args=(port, results, stop), daemon=True) + t.start() + threads.append(t) + time.sleep(0.1) # let listeners bind + + # TCP: send request, read response + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(IO_TIMEOUT) + try: + sock.connect((entrypoint_host, entrypoint_port)) + sock.sendall(build_request([], udp_ports)) + resp = sock.recv(RESPONSE_BUF) + finally: + sock.close() + + seen_ip, shred_version = parse_response(resp) + log.info( + "entrypoint %s:%d sees us as %s (shred_version=%s)", + entrypoint_host, entrypoint_port, seen_ip, shred_version, + ) + + # Wait for UDP probes + deadline = time.monotonic() + PROBE_TIMEOUT + while time.monotonic() < deadline: + if all(p in results for p in udp_ports): + break + time.sleep(0.2) + + stop.set() + for t in threads: + t.join(timeout=1) + + port_ok: dict[int, bool] = {} + for port in udp_ports: + if port not in results: + log.error("port %d: no probe received within %.0fs", port, PROBE_TIMEOUT) + port_ok[port] = False + else: + status, detail = results[port] + if status == "ok": + log.info("port %d: probe received from %s", port, detail) + port_ok[port] = True + else: + log.error("port %d: %s: %s", port, status, detail) + port_ok[port] = False + + return seen_ip, port_ok + + +def run_preflight( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], + expected_ip: str = "", +) -> bool: + """Run ip_echo check with retries. Returns True if all ports pass.""" + for attempt in range(1, MAX_RETRIES + 1): + log.info("ip_echo attempt %d/%d → %s:%d, ports %s", + attempt, MAX_RETRIES, entrypoint_host, entrypoint_port, udp_ports) + try: + seen_ip, port_ok = ip_echo_check(entrypoint_host, entrypoint_port, udp_ports) + except Exception as exc: + log.error("attempt %d TCP failed: %s", attempt, exc) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + if expected_ip and seen_ip != expected_ip: + log.error( + "IP MISMATCH: entrypoint sees %s, expected %s (GOSSIP_HOST). " + "Outbound mangle/SNAT path is broken.", + seen_ip, expected_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + reachable = [p for p, ok in port_ok.items() if ok] + unreachable = [p for p, ok in port_ok.items() if not ok] + + if not unreachable: + log.info("PASS: all ports reachable %s, seen as %s", reachable, seen_ip) + return True + + log.error( + "attempt %d: unreachable %s, reachable %s, seen as %s", + attempt, unreachable, reachable, seen_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + + log.error("FAIL: ip_echo preflight exhausted %d attempts", MAX_RETRIES) + return False + + +def main() -> int: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + # Parse entrypoint — VALIDATOR_ENTRYPOINT is "host:port" + raw = os.environ.get("VALIDATOR_ENTRYPOINT", "") + if not raw and len(sys.argv) > 1: + raw = sys.argv[1] + if not raw: + log.error("set VALIDATOR_ENTRYPOINT or pass host:port as argument") + return 1 + + if ":" in raw: + host, port_str = raw.rsplit(":", 1) + ep_port = int(port_str) + else: + host = raw + ep_port = 8001 + + gossip_port = int(os.environ.get("GOSSIP_PORT", "8001")) + dynamic_range = os.environ.get("DYNAMIC_PORT_RANGE", "9000-10000") + range_start = int(dynamic_range.split("-")[0]) + expected_ip = os.environ.get("GOSSIP_HOST", "") + + # Test gossip + first 3 ports from dynamic range (4 max per ip_echo message) + udp_ports = [gossip_port, range_start, range_start + 2, range_start + 3] + + ok = run_preflight(host, ep_port, udp_ports, expected_ip) + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) From 08380ec070428be49450e8227aa5960845537de8 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 06:08:22 +0000 Subject: [PATCH 51/62] fix: Dockerfile includes ip_echo_preflight.py Co-Authored-By: Claude Opus 4.6 --- scripts/agave-container/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/agave-container/Dockerfile b/scripts/agave-container/Dockerfile index 68a12508..97c6227f 100644 --- a/scripts/agave-container/Dockerfile +++ b/scripts/agave-container/Dockerfile @@ -62,7 +62,7 @@ RUN useradd -m -s /bin/bash agave \ COPY --from=builder /solana-release/bin/ /usr/local/bin/ # Copy entrypoint and support scripts -COPY entrypoint.py snapshot_download.py /usr/local/bin/ +COPY entrypoint.py snapshot_download.py ip_echo_preflight.py /usr/local/bin/ COPY start-test.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/entrypoint.py /usr/local/bin/start-test.sh From 7c58809cc181dd70174bc06eb75d6032fe3b8647 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 06:21:12 +0000 Subject: [PATCH 52/62] chore: remove scripts/agave-container before subtree add Moving container scripts into agave-stack subtree (correct direction). The source of truth will be agave-stack/ in this repo, pushed out to LaconicNetwork/agave-stack via git subtree push. Co-Authored-By: Claude Opus 4.6 --- scripts/agave-container/Dockerfile | 81 -- scripts/agave-container/build.sh | 17 - scripts/agave-container/entrypoint.py | 686 --------------- scripts/agave-container/ip_echo_preflight.py | 249 ------ scripts/agave-container/snapshot_download.py | 878 ------------------- scripts/agave-container/start-test.sh | 112 --- 6 files changed, 2023 deletions(-) delete mode 100644 scripts/agave-container/Dockerfile delete mode 100644 scripts/agave-container/build.sh delete mode 100644 scripts/agave-container/entrypoint.py delete mode 100644 scripts/agave-container/ip_echo_preflight.py delete mode 100644 scripts/agave-container/snapshot_download.py delete mode 100644 scripts/agave-container/start-test.sh diff --git a/scripts/agave-container/Dockerfile b/scripts/agave-container/Dockerfile deleted file mode 100644 index 97c6227f..00000000 --- a/scripts/agave-container/Dockerfile +++ /dev/null @@ -1,81 +0,0 @@ -# Unified Agave/Jito Solana image -# Supports three modes via AGAVE_MODE env: test, rpc, validator -# -# Build args: -# AGAVE_REPO - git repo URL (anza-xyz/agave or jito-foundation/jito-solana) -# AGAVE_VERSION - git tag to build (e.g. v3.1.9, v3.1.8-jito) - -ARG AGAVE_REPO=https://github.com/anza-xyz/agave.git -ARG AGAVE_VERSION=v3.1.9 - -# ---------- Stage 1: Build ---------- -FROM rust:1.85-bookworm AS builder - -ARG AGAVE_REPO -ARG AGAVE_VERSION - -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - pkg-config \ - libssl-dev \ - libudev-dev \ - libclang-dev \ - protobuf-compiler \ - ca-certificates \ - git \ - cmake \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /build -RUN git clone "$AGAVE_REPO" --depth 1 --branch "$AGAVE_VERSION" --recurse-submodules agave -WORKDIR /build/agave - -# Cherry-pick --public-tvu-address support (anza-xyz/agave PR #6778, commit 9f4b3ae) -# This flag only exists on master, not in v3.1.9 — fetch the PR ref and cherry-pick -ARG TVU_ADDRESS_PR=6778 -RUN if [ -n "$TVU_ADDRESS_PR" ]; then \ - git fetch --depth 50 origin "pull/${TVU_ADDRESS_PR}/head:tvu-pr" && \ - git cherry-pick --no-commit tvu-pr; \ - fi - -# Build all binaries using the upstream install script -RUN CI_COMMIT=$(git rev-parse HEAD) scripts/cargo-install-all.sh /solana-release - -# ---------- Stage 2: Runtime ---------- -FROM debian:bookworm-slim - -RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates \ - libssl3 \ - libudev1 \ - curl \ - sudo \ - aria2 \ - python3 \ - && rm -rf /var/lib/apt/lists/* - -# Create non-root user with sudo -RUN useradd -m -s /bin/bash agave \ - && echo "agave ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers - -# Copy all compiled binaries -COPY --from=builder /solana-release/bin/ /usr/local/bin/ - -# Copy entrypoint and support scripts -COPY entrypoint.py snapshot_download.py ip_echo_preflight.py /usr/local/bin/ -COPY start-test.sh /usr/local/bin/ -RUN chmod +x /usr/local/bin/entrypoint.py /usr/local/bin/start-test.sh - -# Create data directories -RUN mkdir -p /data/config /data/ledger /data/accounts /data/snapshots \ - && chown -R agave:agave /data - -USER agave -WORKDIR /data - -ENV RUST_LOG=info -ENV RUST_BACKTRACE=1 - -EXPOSE 8899 8900 8001 8001/udp - -ENTRYPOINT ["entrypoint.py"] diff --git a/scripts/agave-container/build.sh b/scripts/agave-container/build.sh deleted file mode 100644 index 4c4d940f..00000000 --- a/scripts/agave-container/build.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -# Build laconicnetwork/agave -# Set AGAVE_REPO and AGAVE_VERSION env vars to build Jito or a different version -source ${CERC_CONTAINER_BASE_DIR}/build-base.sh - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -AGAVE_REPO="${AGAVE_REPO:-https://github.com/anza-xyz/agave.git}" -AGAVE_VERSION="${AGAVE_VERSION:-v3.1.9}" - -docker build -t laconicnetwork/agave:local \ - --build-arg AGAVE_REPO="$AGAVE_REPO" \ - --build-arg AGAVE_VERSION="$AGAVE_VERSION" \ - ${build_command_args} \ - -f ${SCRIPT_DIR}/Dockerfile \ - ${SCRIPT_DIR} diff --git a/scripts/agave-container/entrypoint.py b/scripts/agave-container/entrypoint.py deleted file mode 100644 index 2b7324c3..00000000 --- a/scripts/agave-container/entrypoint.py +++ /dev/null @@ -1,686 +0,0 @@ -#!/usr/bin/env python3 -"""Agave validator entrypoint — snapshot management, arg construction, liveness probe. - -Two subcommands: - entrypoint.py serve (default) — snapshot freshness check + run agave-validator - entrypoint.py probe — liveness probe (slot lag check, exits 0/1) - -Replaces the bash entrypoint.sh / start-rpc.sh / start-validator.sh with a single -Python module. Test mode still dispatches to start-test.sh. - -Python stays as PID 1 and traps SIGTERM. On SIGTERM, it runs -``agave-validator exit --force --ledger /data/ledger`` which connects to the -admin RPC Unix socket and tells the validator to flush I/O and exit cleanly. -This avoids the io_uring/ZFS deadlock that occurs when the process is killed. - -All configuration comes from environment variables — same vars as the original -bash scripts. See compose files for defaults. -""" - -from __future__ import annotations - -import json -import logging -import os -import re -import signal -import subprocess -import sys -import threading -import time -import urllib.error -import urllib.request -from pathlib import Path -from urllib.request import Request - -log: logging.Logger = logging.getLogger("entrypoint") - -# Directories -CONFIG_DIR = "/data/config" -LEDGER_DIR = "/data/ledger" -ACCOUNTS_DIR = "/data/accounts" -SNAPSHOTS_DIR = "/data/snapshots" -LOG_DIR = "/data/log" -IDENTITY_FILE = f"{CONFIG_DIR}/validator-identity.json" - -# Snapshot filename patterns -FULL_SNAP_RE: re.Pattern[str] = re.compile( - r"^snapshot-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" -) -INCR_SNAP_RE: re.Pattern[str] = re.compile( - r"^incremental-snapshot-(\d+)-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" -) - -MAINNET_RPC = "https://api.mainnet-beta.solana.com" - - -# -- Helpers ------------------------------------------------------------------- - - -def env(name: str, default: str = "") -> str: - """Read env var with default.""" - return os.environ.get(name, default) - - -def env_required(name: str) -> str: - """Read required env var, exit if missing.""" - val = os.environ.get(name) - if not val: - log.error("%s is required but not set", name) - sys.exit(1) - return val - - -def env_bool(name: str, default: bool = False) -> bool: - """Read boolean env var (true/false/1/0).""" - val = os.environ.get(name, "").lower() - if not val: - return default - return val in ("true", "1", "yes") - - -def rpc_get_slot(url: str, timeout: int = 10) -> int | None: - """Get current slot from a Solana RPC endpoint.""" - payload = json.dumps({ - "jsonrpc": "2.0", "id": 1, - "method": "getSlot", "params": [], - }).encode() - req = Request(url, data=payload, - headers={"Content-Type": "application/json"}) - try: - with urllib.request.urlopen(req, timeout=timeout) as resp: - data = json.loads(resp.read()) - result = data.get("result") - if isinstance(result, int): - return result - except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError): - pass - return None - - -# -- Snapshot management ------------------------------------------------------- - - -def get_local_snapshot_slot(snapshots_dir: str) -> int | None: - """Find the highest slot among local snapshot files.""" - best_slot: int | None = None - snap_path = Path(snapshots_dir) - if not snap_path.is_dir(): - return None - for entry in snap_path.iterdir(): - m = FULL_SNAP_RE.match(entry.name) - if m: - slot = int(m.group(1)) - if best_slot is None or slot > best_slot: - best_slot = slot - return best_slot - - -def clean_snapshots(snapshots_dir: str) -> None: - """Remove all snapshot files from the directory.""" - snap_path = Path(snapshots_dir) - if not snap_path.is_dir(): - return - for entry in snap_path.iterdir(): - if entry.name.startswith(("snapshot-", "incremental-snapshot-")): - log.info("Removing old snapshot: %s", entry.name) - entry.unlink(missing_ok=True) - - -def get_incremental_slot(snapshots_dir: str, full_slot: int | None) -> int | None: - """Get the highest incremental snapshot slot matching the full's base slot.""" - if full_slot is None: - return None - snap_path = Path(snapshots_dir) - if not snap_path.is_dir(): - return None - best: int | None = None - for entry in snap_path.iterdir(): - m = INCR_SNAP_RE.match(entry.name) - if m and int(m.group(1)) == full_slot: - slot = int(m.group(2)) - if best is None or slot > best: - best = slot - return best - - -def maybe_download_snapshot(snapshots_dir: str) -> None: - """Ensure full + incremental snapshots exist before starting. - - The validator should always start from a full + incremental pair to - minimize replay time. If either is missing or the full is too old, - download fresh ones via download_best_snapshot (which does rolling - incremental convergence after downloading the full). - - Controlled by env vars: - SNAPSHOT_AUTO_DOWNLOAD (default: true) — enable/disable - SNAPSHOT_MAX_AGE_SLOTS (default: 100000) — full snapshot staleness threshold - (one full snapshot generation, ~11 hours) - """ - if not env_bool("SNAPSHOT_AUTO_DOWNLOAD", default=True): - log.info("Snapshot auto-download disabled") - return - - max_age = int(env("SNAPSHOT_MAX_AGE_SLOTS", "100000")) - - mainnet_slot = rpc_get_slot(MAINNET_RPC) - if mainnet_slot is None: - log.warning("Cannot reach mainnet RPC — skipping snapshot check") - return - - script_dir = Path(__file__).resolve().parent - sys.path.insert(0, str(script_dir)) - from snapshot_download import download_best_snapshot, download_incremental_for_slot - - convergence = int(env("SNAPSHOT_CONVERGENCE_SLOTS", "500")) - retry_delay = int(env("SNAPSHOT_RETRY_DELAY", "60")) - - # Check local full snapshot - local_slot = get_local_snapshot_slot(snapshots_dir) - have_fresh_full = (local_slot is not None - and (mainnet_slot - local_slot) <= max_age) - - if have_fresh_full: - assert local_slot is not None - inc_slot = get_incremental_slot(snapshots_dir, local_slot) - if inc_slot is not None: - inc_gap = mainnet_slot - inc_slot - if inc_gap <= convergence: - log.info("Full (slot %d) + incremental (slot %d, gap %d) " - "within convergence, starting", - local_slot, inc_slot, inc_gap) - return - log.info("Incremental too stale (slot %d, gap %d > %d)", - inc_slot, inc_gap, convergence) - # Fresh full, need a fresh incremental - log.info("Downloading incremental for full at slot %d", local_slot) - while True: - if download_incremental_for_slot(snapshots_dir, local_slot, - convergence_slots=convergence): - return - log.warning("Incremental download failed — retrying in %ds", - retry_delay) - time.sleep(retry_delay) - - # No full or full too old — download both - log.info("Downloading full + incremental") - clean_snapshots(snapshots_dir) - while True: - if download_best_snapshot(snapshots_dir, convergence_slots=convergence): - return - log.warning("Snapshot download failed — retrying in %ds", retry_delay) - time.sleep(retry_delay) - - -# -- Directory and identity setup ---------------------------------------------- - - -def ensure_dirs(*dirs: str) -> None: - """Create directories and fix ownership.""" - uid = os.getuid() - gid = os.getgid() - for d in dirs: - os.makedirs(d, exist_ok=True) - try: - subprocess.run( - ["sudo", "chown", "-R", f"{uid}:{gid}", d], - check=False, capture_output=True, - ) - except FileNotFoundError: - pass # sudo not available — dirs already owned correctly - - -def ensure_identity_rpc() -> None: - """Generate ephemeral identity keypair for RPC mode if not mounted.""" - if os.path.isfile(IDENTITY_FILE): - return - log.info("Generating RPC node identity keypair...") - subprocess.run( - ["solana-keygen", "new", "--no-passphrase", "--silent", - "--force", "--outfile", IDENTITY_FILE], - check=True, - ) - - -def print_identity() -> None: - """Print the node identity pubkey.""" - result = subprocess.run( - ["solana-keygen", "pubkey", IDENTITY_FILE], - capture_output=True, text=True, check=False, - ) - if result.returncode == 0: - log.info("Node identity: %s", result.stdout.strip()) - - -# -- Arg construction ---------------------------------------------------------- - - -def build_common_args() -> list[str]: - """Build agave-validator args common to both RPC and validator modes.""" - args: list[str] = [ - "--identity", IDENTITY_FILE, - "--entrypoint", env_required("VALIDATOR_ENTRYPOINT"), - "--known-validator", env_required("KNOWN_VALIDATOR"), - "--ledger", LEDGER_DIR, - "--accounts", ACCOUNTS_DIR, - "--snapshots", SNAPSHOTS_DIR, - "--rpc-port", env("RPC_PORT", "8899"), - "--rpc-bind-address", env("RPC_BIND_ADDRESS", "127.0.0.1"), - "--gossip-port", env("GOSSIP_PORT", "8001"), - "--dynamic-port-range", env("DYNAMIC_PORT_RANGE", "9000-10000"), - "--no-os-network-limits-test", - "--wal-recovery-mode", "skip_any_corrupted_record", - "--limit-ledger-size", env("LIMIT_LEDGER_SIZE", "50000000"), - "--no-snapshot-fetch", # entrypoint handles snapshot download - ] - - # Snapshot generation - if env("NO_SNAPSHOTS") == "true": - args.append("--no-snapshots") - else: - args += [ - "--full-snapshot-interval-slots", env("SNAPSHOT_INTERVAL_SLOTS", "100000"), - "--maximum-full-snapshots-to-retain", env("MAXIMUM_SNAPSHOTS_TO_RETAIN", "1"), - ] - if env("NO_INCREMENTAL_SNAPSHOTS") != "true": - args += ["--maximum-incremental-snapshots-to-retain", "2"] - - # Account indexes - account_indexes = env("ACCOUNT_INDEXES") - if account_indexes: - for idx in account_indexes.split(","): - idx = idx.strip() - if idx: - args += ["--account-index", idx] - - # Additional entrypoints - for ep in env("EXTRA_ENTRYPOINTS").split(): - if ep: - args += ["--entrypoint", ep] - - # Additional known validators - for kv in env("EXTRA_KNOWN_VALIDATORS").split(): - if kv: - args += ["--known-validator", kv] - - # Cluster verification - genesis_hash = env("EXPECTED_GENESIS_HASH") - if genesis_hash: - args += ["--expected-genesis-hash", genesis_hash] - shred_version = env("EXPECTED_SHRED_VERSION") - if shred_version: - args += ["--expected-shred-version", shred_version] - - # Metrics — just needs to be in the environment, agave reads it directly - # (env var is already set, nothing to pass as arg) - - # Gossip host / TVU address - gossip_host = env("GOSSIP_HOST") - if gossip_host: - args += ["--gossip-host", gossip_host] - elif env("PUBLIC_TVU_ADDRESS"): - args += ["--public-tvu-address", env("PUBLIC_TVU_ADDRESS")] - - # Jito flags - if env("JITO_ENABLE") == "true": - log.info("Jito MEV enabled") - jito_flags: list[tuple[str, str]] = [ - ("JITO_TIP_PAYMENT_PROGRAM", "--tip-payment-program-pubkey"), - ("JITO_DISTRIBUTION_PROGRAM", "--tip-distribution-program-pubkey"), - ("JITO_MERKLE_ROOT_AUTHORITY", "--merkle-root-upload-authority"), - ("JITO_COMMISSION_BPS", "--commission-bps"), - ("JITO_BLOCK_ENGINE_URL", "--block-engine-url"), - ("JITO_SHRED_RECEIVER_ADDR", "--shred-receiver-address"), - ] - for env_name, flag in jito_flags: - val = env(env_name) - if val: - args += [flag, val] - - return args - - -def build_rpc_args() -> list[str]: - """Build agave-validator args for RPC (non-voting) mode.""" - args = build_common_args() - args += [ - "--no-voting", - "--log", f"{LOG_DIR}/validator.log", - "--full-rpc-api", - "--enable-rpc-transaction-history", - "--rpc-pubsub-enable-block-subscription", - "--enable-extended-tx-metadata-storage", - "--no-wait-for-vote-to-start-leader", - ] - - # Public vs private RPC - public_rpc = env("PUBLIC_RPC_ADDRESS") - if public_rpc: - args += ["--public-rpc-address", public_rpc] - else: - args += ["--private-rpc", "--allow-private-addr", "--only-known-rpc"] - - # Jito relayer URL (RPC mode doesn't use it, but validator mode does — - # handled in build_validator_args) - - return args - - -def build_validator_args() -> list[str]: - """Build agave-validator args for voting validator mode.""" - vote_keypair = env("VOTE_ACCOUNT_KEYPAIR", - "/data/config/vote-account-keypair.json") - - # Identity must be mounted for validator mode - if not os.path.isfile(IDENTITY_FILE): - log.error("Validator identity keypair not found at %s", IDENTITY_FILE) - log.error("Mount your validator keypair to %s", IDENTITY_FILE) - sys.exit(1) - - # Vote account keypair must exist - if not os.path.isfile(vote_keypair): - log.error("Vote account keypair not found at %s", vote_keypair) - log.error("Mount your vote account keypair or set VOTE_ACCOUNT_KEYPAIR") - sys.exit(1) - - # Print vote account pubkey - result = subprocess.run( - ["solana-keygen", "pubkey", vote_keypair], - capture_output=True, text=True, check=False, - ) - if result.returncode == 0: - log.info("Vote account: %s", result.stdout.strip()) - - args = build_common_args() - args += [ - "--vote-account", vote_keypair, - "--log", "-", - ] - - # Jito relayer URL (validator-only) - relayer_url = env("JITO_RELAYER_URL") - if env("JITO_ENABLE") == "true" and relayer_url: - args += ["--relayer-url", relayer_url] - - return args - - -def append_extra_args(args: list[str]) -> list[str]: - """Append EXTRA_ARGS passthrough flags.""" - extra = env("EXTRA_ARGS") - if extra: - args += extra.split() - return args - - -# -- Graceful shutdown -------------------------------------------------------- - -# Timeout for graceful exit via admin RPC. Leave 30s margin for k8s -# terminationGracePeriodSeconds (300s). -GRACEFUL_EXIT_TIMEOUT = 270 - - -def graceful_exit(child: subprocess.Popen[bytes], reason: str = "SIGTERM") -> None: - """Request graceful shutdown via the admin RPC Unix socket. - - Runs ``agave-validator exit --force --ledger /data/ledger`` which connects - to the admin RPC socket at ``/data/ledger/admin.rpc`` and sets the - validator's exit flag. The validator flushes all I/O and exits cleanly, - avoiding the io_uring/ZFS deadlock. - - If the admin RPC exit fails or the child doesn't exit within the timeout, - falls back to SIGTERM then SIGKILL. - """ - log.info("%s — requesting graceful exit via admin RPC", reason) - try: - result = subprocess.run( - ["agave-validator", "exit", "--force", "--ledger", LEDGER_DIR], - capture_output=True, text=True, timeout=30, - ) - if result.returncode == 0: - log.info("Admin RPC exit requested successfully") - else: - log.warning( - "Admin RPC exit returned %d: %s", - result.returncode, result.stderr.strip(), - ) - except subprocess.TimeoutExpired: - log.warning("Admin RPC exit command timed out after 30s") - except FileNotFoundError: - log.warning("agave-validator binary not found for exit command") - - # Wait for child to exit - try: - child.wait(timeout=GRACEFUL_EXIT_TIMEOUT) - log.info("Validator exited cleanly with code %d", child.returncode) - return - except subprocess.TimeoutExpired: - log.warning( - "Validator did not exit within %ds — sending SIGTERM", - GRACEFUL_EXIT_TIMEOUT, - ) - - # Fallback: SIGTERM - child.terminate() - try: - child.wait(timeout=15) - log.info("Validator exited after SIGTERM with code %d", child.returncode) - return - except subprocess.TimeoutExpired: - log.warning("Validator did not exit after SIGTERM — sending SIGKILL") - - # Last resort: SIGKILL - child.kill() - child.wait() - log.info("Validator killed with SIGKILL, code %d", child.returncode) - - -# -- Serve subcommand --------------------------------------------------------- - - -def _gap_monitor( - child: subprocess.Popen[bytes], - leapfrog: threading.Event, - shutting_down: threading.Event, -) -> None: - """Background thread: poll slot gap and trigger leapfrog if too far behind. - - Waits for a grace period (SNAPSHOT_MONITOR_GRACE, default 600s) before - monitoring — the validator needs time to extract snapshots and catch up. - Then polls every SNAPSHOT_MONITOR_INTERVAL (default 30s). If the gap - exceeds SNAPSHOT_LEAPFROG_SLOTS (default 5000) for SNAPSHOT_LEAPFROG_CHECKS - (default 3) consecutive checks, triggers graceful shutdown and sets the - leapfrog event so cmd_serve loops back to download a fresh incremental. - """ - threshold = int(env("SNAPSHOT_LEAPFROG_SLOTS", "5000")) - required_checks = int(env("SNAPSHOT_LEAPFROG_CHECKS", "3")) - interval = int(env("SNAPSHOT_MONITOR_INTERVAL", "30")) - grace = int(env("SNAPSHOT_MONITOR_GRACE", "600")) - rpc_port = env("RPC_PORT", "8899") - local_url = f"http://127.0.0.1:{rpc_port}" - - # Grace period — don't monitor during initial catch-up - if shutting_down.wait(grace): - return - - consecutive = 0 - while not shutting_down.is_set(): - local_slot = rpc_get_slot(local_url, timeout=5) - mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) - - if local_slot is not None and mainnet_slot is not None: - gap = mainnet_slot - local_slot - if gap > threshold: - consecutive += 1 - log.warning("Gap %d > %d (%d/%d consecutive)", - gap, threshold, consecutive, required_checks) - if consecutive >= required_checks: - log.warning("Leapfrog triggered: gap %d", gap) - leapfrog.set() - graceful_exit(child, reason="Leapfrog") - return - else: - if consecutive > 0: - log.info("Gap %d within threshold, resetting counter", gap) - consecutive = 0 - - shutting_down.wait(interval) - - -def cmd_serve() -> None: - """Main serve flow: snapshot download, run validator, monitor gap, leapfrog. - - Python stays as PID 1. On each iteration: - 1. Download full + incremental snapshots (if needed) - 2. Start agave-validator as child process - 3. Monitor slot gap in background thread - 4. If gap exceeds threshold → graceful stop → loop back to step 1 - 5. If SIGTERM → graceful stop → exit - 6. If validator crashes → exit with its return code - """ - mode = env("AGAVE_MODE", "test") - log.info("AGAVE_MODE=%s", mode) - - if mode == "test": - os.execvp("start-test.sh", ["start-test.sh"]) - - if mode not in ("rpc", "validator"): - log.error("Unknown AGAVE_MODE: %s (valid: test, rpc, validator)", mode) - sys.exit(1) - - # One-time setup - dirs = [CONFIG_DIR, LEDGER_DIR, ACCOUNTS_DIR, SNAPSHOTS_DIR] - if mode == "rpc": - dirs.append(LOG_DIR) - ensure_dirs(*dirs) - - if not env_bool("SKIP_IP_ECHO_PREFLIGHT"): - script_dir = Path(__file__).resolve().parent - sys.path.insert(0, str(script_dir)) - from ip_echo_preflight import main as ip_echo_main - if ip_echo_main() != 0: - sys.exit(1) - - if mode == "rpc": - ensure_identity_rpc() - print_identity() - - if mode == "rpc": - args = build_rpc_args() - else: - args = build_validator_args() - args = append_extra_args(args) - - # Main loop: download → run → monitor → leapfrog if needed - while True: - maybe_download_snapshot(SNAPSHOTS_DIR) - - Path("/tmp/entrypoint-start").write_text(str(time.time())) - log.info("Starting agave-validator with %d arguments", len(args)) - child = subprocess.Popen(["agave-validator"] + args) - - shutting_down = threading.Event() - leapfrog = threading.Event() - - signal.signal(signal.SIGUSR1, - lambda _sig, _frame: child.send_signal(signal.SIGUSR1)) - - def _on_sigterm(_sig: int, _frame: object) -> None: - shutting_down.set() - threading.Thread( - target=graceful_exit, args=(child,), daemon=True, - ).start() - - signal.signal(signal.SIGTERM, _on_sigterm) - - # Start gap monitor - monitor = threading.Thread( - target=_gap_monitor, - args=(child, leapfrog, shutting_down), - daemon=True, - ) - monitor.start() - - child.wait() - - if leapfrog.is_set(): - log.info("Leapfrog: restarting with fresh incremental") - continue - - sys.exit(child.returncode) - - -# -- Probe subcommand --------------------------------------------------------- - - -def cmd_probe() -> None: - """Liveness probe: check local RPC slot vs mainnet. - - Exit 0 = healthy, exit 1 = unhealthy. - - Grace period: PROBE_GRACE_SECONDS (default 600) — probe always passes - during grace period to allow for snapshot unpacking and initial replay. - """ - grace_seconds = int(env("PROBE_GRACE_SECONDS", "600")) - max_lag = int(env("PROBE_MAX_SLOT_LAG", "20000")) - - # Check grace period - start_file = Path("/tmp/entrypoint-start") - if start_file.exists(): - try: - start_time = float(start_file.read_text().strip()) - elapsed = time.time() - start_time - if elapsed < grace_seconds: - # Within grace period — always healthy - sys.exit(0) - except (ValueError, OSError): - pass - else: - # No start file — serve hasn't started yet, within grace - sys.exit(0) - - # Query local RPC - rpc_port = env("RPC_PORT", "8899") - local_url = f"http://127.0.0.1:{rpc_port}" - local_slot = rpc_get_slot(local_url, timeout=5) - if local_slot is None: - # Local RPC unreachable after grace period — unhealthy - sys.exit(1) - - # Query mainnet - mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) - if mainnet_slot is None: - # Can't reach mainnet to compare — assume healthy (don't penalize - # the validator for mainnet RPC being down) - sys.exit(0) - - lag = mainnet_slot - local_slot - if lag > max_lag: - sys.exit(1) - - sys.exit(0) - - -# -- Main ---------------------------------------------------------------------- - - -def main() -> None: - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s [%(name)s] %(message)s", - datefmt="%H:%M:%S", - ) - - subcmd = sys.argv[1] if len(sys.argv) > 1 else "serve" - - if subcmd == "serve": - cmd_serve() - elif subcmd == "probe": - cmd_probe() - else: - log.error("Unknown subcommand: %s (valid: serve, probe)", subcmd) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/scripts/agave-container/ip_echo_preflight.py b/scripts/agave-container/ip_echo_preflight.py deleted file mode 100644 index 20cbb259..00000000 --- a/scripts/agave-container/ip_echo_preflight.py +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env python3 -"""ip_echo preflight — verify UDP port reachability before starting the validator. - -Implements the Solana ip_echo client protocol exactly: -1. Bind UDP sockets on the ports the validator will use -2. TCP connect to entrypoint gossip port, send IpEchoServerMessage -3. Parse IpEchoServerResponse (our IP as seen by entrypoint) -4. Wait for entrypoint's UDP probes on each port -5. Exit 0 if all ports reachable, exit 1 if any fail - -Wire format (from agave net-utils/src/): - Request: 4 null bytes + [u16; 4] tcp_ports LE + [u16; 4] udp_ports LE + \n - Response: 4 null bytes + bincode IpAddr (variant byte + addr) + optional shred_version - -Called from entrypoint.py before snapshot download. Prevents wasting hours -downloading a snapshot only to crash-loop on port reachability. -""" - -from __future__ import annotations - -import logging -import os -import socket -import struct -import sys -import threading -import time - -log = logging.getLogger("ip_echo_preflight") - -HEADER = b"\x00\x00\x00\x00" -TERMINUS = b"\x0a" -RESPONSE_BUF = 27 -IO_TIMEOUT = 5.0 -PROBE_TIMEOUT = 10.0 -MAX_RETRIES = 3 -RETRY_DELAY = 2.0 - - -def build_request(tcp_ports: list[int], udp_ports: list[int]) -> bytes: - """Build IpEchoServerMessage: header + [u16;4] tcp + [u16;4] udp + newline.""" - tcp = (tcp_ports + [0, 0, 0, 0])[:4] - udp = (udp_ports + [0, 0, 0, 0])[:4] - return HEADER + struct.pack("<4H", *tcp) + struct.pack("<4H", *udp) + TERMINUS - - -def parse_response(data: bytes) -> tuple[str, int | None]: - """Parse IpEchoServerResponse → (ip_string, shred_version | None). - - Wire format (bincode): - 4 bytes header (\0\0\0\0) - 4 bytes IpAddr enum variant (u32 LE: 0=IPv4, 1=IPv6) - 4|16 bytes address octets - 1 byte Option tag (0=None, 1=Some) - 2 bytes shred_version (u16 LE, only if Some) - """ - if len(data) < 8: - raise ValueError(f"response too short: {len(data)} bytes") - if data[:4] == b"HTTP": - raise ValueError("got HTTP response — not an ip_echo server") - if data[:4] != HEADER: - raise ValueError(f"unexpected header: {data[:4].hex()}") - variant = struct.unpack("= 3 and rest[0] == 1: - shred_version = struct.unpack(" None: - """Bind a UDP socket and wait for a probe packet.""" - try: - sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind(("0.0.0.0", port)) - sock.settimeout(0.5) - try: - while not stop.is_set(): - try: - _data, addr = sock.recvfrom(64) - results[port] = ("ok", addr) - return - except socket.timeout: - continue - finally: - sock.close() - except OSError as exc: - results[port] = ("bind_error", str(exc)) - - -def ip_echo_check( - entrypoint_host: str, - entrypoint_port: int, - udp_ports: list[int], -) -> tuple[str, dict[int, bool]]: - """Run one ip_echo exchange and return (seen_ip, {port: reachable}). - - Raises on TCP failure (caller retries). - """ - udp_ports = [p for p in udp_ports if p != 0][:4] - - # Start UDP listeners before sending the TCP request - results: dict[int, tuple] = {} - stop = threading.Event() - threads = [] - for port in udp_ports: - t = threading.Thread(target=_listen_udp, args=(port, results, stop), daemon=True) - t.start() - threads.append(t) - time.sleep(0.1) # let listeners bind - - # TCP: send request, read response - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(IO_TIMEOUT) - try: - sock.connect((entrypoint_host, entrypoint_port)) - sock.sendall(build_request([], udp_ports)) - resp = sock.recv(RESPONSE_BUF) - finally: - sock.close() - - seen_ip, shred_version = parse_response(resp) - log.info( - "entrypoint %s:%d sees us as %s (shred_version=%s)", - entrypoint_host, entrypoint_port, seen_ip, shred_version, - ) - - # Wait for UDP probes - deadline = time.monotonic() + PROBE_TIMEOUT - while time.monotonic() < deadline: - if all(p in results for p in udp_ports): - break - time.sleep(0.2) - - stop.set() - for t in threads: - t.join(timeout=1) - - port_ok: dict[int, bool] = {} - for port in udp_ports: - if port not in results: - log.error("port %d: no probe received within %.0fs", port, PROBE_TIMEOUT) - port_ok[port] = False - else: - status, detail = results[port] - if status == "ok": - log.info("port %d: probe received from %s", port, detail) - port_ok[port] = True - else: - log.error("port %d: %s: %s", port, status, detail) - port_ok[port] = False - - return seen_ip, port_ok - - -def run_preflight( - entrypoint_host: str, - entrypoint_port: int, - udp_ports: list[int], - expected_ip: str = "", -) -> bool: - """Run ip_echo check with retries. Returns True if all ports pass.""" - for attempt in range(1, MAX_RETRIES + 1): - log.info("ip_echo attempt %d/%d → %s:%d, ports %s", - attempt, MAX_RETRIES, entrypoint_host, entrypoint_port, udp_ports) - try: - seen_ip, port_ok = ip_echo_check(entrypoint_host, entrypoint_port, udp_ports) - except Exception as exc: - log.error("attempt %d TCP failed: %s", attempt, exc) - if attempt < MAX_RETRIES: - time.sleep(RETRY_DELAY) - continue - - if expected_ip and seen_ip != expected_ip: - log.error( - "IP MISMATCH: entrypoint sees %s, expected %s (GOSSIP_HOST). " - "Outbound mangle/SNAT path is broken.", - seen_ip, expected_ip, - ) - if attempt < MAX_RETRIES: - time.sleep(RETRY_DELAY) - continue - - reachable = [p for p, ok in port_ok.items() if ok] - unreachable = [p for p, ok in port_ok.items() if not ok] - - if not unreachable: - log.info("PASS: all ports reachable %s, seen as %s", reachable, seen_ip) - return True - - log.error( - "attempt %d: unreachable %s, reachable %s, seen as %s", - attempt, unreachable, reachable, seen_ip, - ) - if attempt < MAX_RETRIES: - time.sleep(RETRY_DELAY) - - log.error("FAIL: ip_echo preflight exhausted %d attempts", MAX_RETRIES) - return False - - -def main() -> int: - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s [%(name)s] %(message)s", - datefmt="%H:%M:%S", - ) - - # Parse entrypoint — VALIDATOR_ENTRYPOINT is "host:port" - raw = os.environ.get("VALIDATOR_ENTRYPOINT", "") - if not raw and len(sys.argv) > 1: - raw = sys.argv[1] - if not raw: - log.error("set VALIDATOR_ENTRYPOINT or pass host:port as argument") - return 1 - - if ":" in raw: - host, port_str = raw.rsplit(":", 1) - ep_port = int(port_str) - else: - host = raw - ep_port = 8001 - - gossip_port = int(os.environ.get("GOSSIP_PORT", "8001")) - dynamic_range = os.environ.get("DYNAMIC_PORT_RANGE", "9000-10000") - range_start = int(dynamic_range.split("-")[0]) - expected_ip = os.environ.get("GOSSIP_HOST", "") - - # Test gossip + first 3 ports from dynamic range (4 max per ip_echo message) - udp_ports = [gossip_port, range_start, range_start + 2, range_start + 3] - - ok = run_preflight(host, ep_port, udp_ports, expected_ip) - return 0 if ok else 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/agave-container/snapshot_download.py b/scripts/agave-container/snapshot_download.py deleted file mode 100644 index 2af2b976..00000000 --- a/scripts/agave-container/snapshot_download.py +++ /dev/null @@ -1,878 +0,0 @@ -#!/usr/bin/env python3 -"""Download Solana snapshots using aria2c for parallel multi-connection downloads. - -Discovers snapshot sources by querying getClusterNodes for all RPCs in the -cluster, probing each for available snapshots, benchmarking download speed, -and downloading from the fastest source using aria2c (16 connections by default). - -Based on the discovery approach from etcusr/solana-snapshot-finder but replaces -the single-connection wget download with aria2c parallel chunked downloads. - -Usage: - # Download to /srv/kind/solana/snapshots (mainnet, 16 connections) - ./snapshot_download.py -o /srv/kind/solana/snapshots - - # Dry run — find best source, print URL - ./snapshot_download.py --dry-run - - # Custom RPC for cluster discovery + 32 connections - ./snapshot_download.py -r https://api.mainnet-beta.solana.com -n 32 - - # Testnet - ./snapshot_download.py -c testnet -o /data/snapshots - - # Programmatic use from entrypoint.py: - from snapshot_download import download_best_snapshot - ok = download_best_snapshot("/data/snapshots") - -Requirements: - - aria2c (apt install aria2) - - python3 >= 3.10 (stdlib only, no pip dependencies) -""" - -from __future__ import annotations - -import argparse -import concurrent.futures -import json -import logging -import os -import re -import shutil -import subprocess -import sys -import time -import urllib.error -import urllib.request -from dataclasses import dataclass, field -from http.client import HTTPResponse -from pathlib import Path -from urllib.request import Request - -log: logging.Logger = logging.getLogger("snapshot-download") - -CLUSTER_RPC: dict[str, str] = { - "mainnet-beta": "https://api.mainnet-beta.solana.com", - "testnet": "https://api.testnet.solana.com", - "devnet": "https://api.devnet.solana.com", -} - -# Snapshot filenames: -# snapshot--.tar.zst -# incremental-snapshot---.tar.zst -FULL_SNAP_RE: re.Pattern[str] = re.compile( - r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" -) -INCR_SNAP_RE: re.Pattern[str] = re.compile( - r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" -) - - -@dataclass -class SnapshotSource: - """A snapshot file available from a specific RPC node.""" - - rpc_address: str - # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) - file_paths: list[str] = field(default_factory=list) - slots_diff: int = 0 - latency_ms: float = 0.0 - download_speed: float = 0.0 # bytes/sec - - -# -- JSON-RPC helpers ---------------------------------------------------------- - - -class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): - """Handler that captures redirect Location instead of following it.""" - - def redirect_request( - self, - req: Request, - fp: HTTPResponse, - code: int, - msg: str, - headers: dict[str, str], # type: ignore[override] - newurl: str, - ) -> None: - return None - - -def rpc_post(url: str, method: str, params: list[object] | None = None, - timeout: int = 25) -> object | None: - """JSON-RPC POST. Returns parsed 'result' field or None on error.""" - payload: bytes = json.dumps({ - "jsonrpc": "2.0", "id": 1, - "method": method, "params": params or [], - }).encode() - req = Request(url, data=payload, - headers={"Content-Type": "application/json"}) - try: - with urllib.request.urlopen(req, timeout=timeout) as resp: - data: dict[str, object] = json.loads(resp.read()) - return data.get("result") - except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: - log.debug("rpc_post %s %s failed: %s", url, method, e) - return None - - -def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: - """HEAD request without following redirects. - - Returns (Location header value, latency_sec) if the server returned a - 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. - """ - opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) - req = Request(url, method="HEAD") - try: - start: float = time.monotonic() - resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] - latency: float = time.monotonic() - start - # Non-redirect (2xx) — server didn't redirect, not useful for discovery - location: str | None = resp.headers.get("Location") - resp.close() - return location, latency - except urllib.error.HTTPError as e: - # 3xx redirects raise HTTPError with the redirect info - latency = time.monotonic() - start # type: ignore[possibly-undefined] - location = e.headers.get("Location") - if location and 300 <= e.code < 400: - return location, latency - return None, 0.0 - except (urllib.error.URLError, OSError, TimeoutError): - return None, 0.0 - - -# -- Discovery ----------------------------------------------------------------- - - -def get_current_slot(rpc_url: str) -> int | None: - """Get current slot from RPC.""" - result: object | None = rpc_post(rpc_url, "getSlot") - if isinstance(result, int): - return result - return None - - -def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: - """Get all RPC node addresses from getClusterNodes.""" - result: object | None = rpc_post(rpc_url, "getClusterNodes") - if not isinstance(result, list): - return [] - - rpc_addrs: list[str] = [] - for node in result: - if not isinstance(node, dict): - continue - if version_filter is not None: - node_version: str | None = node.get("version") - if node_version and not node_version.startswith(version_filter): - continue - rpc: str | None = node.get("rpc") - if rpc: - rpc_addrs.append(rpc) - return list(set(rpc_addrs)) - - -def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: - """Extract filename and full redirect path from Location header. - - Returns (filename, full_path). full_path includes any path prefix - the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). - """ - # Location may be absolute URL or relative path - if location.startswith("http://") or location.startswith("https://"): - # Absolute URL — extract path - from urllib.parse import urlparse - path: str = urlparse(location).path - else: - path = location - - filename: str = path.rsplit("/", 1)[-1] - return filename, path - - -def probe_rpc_snapshot( - rpc_address: str, - current_slot: int, -) -> SnapshotSource | None: - """Probe a single RPC node for available snapshots. - - Discovery only — no filtering. Returns a SnapshotSource with all available - info so the caller can decide what to keep. Filtering happens after all - probes complete, so rejected sources are still visible for debugging. - """ - full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" - - # Full snapshot is required — every source must have one - full_location, full_latency = head_no_follow(full_url, timeout=2) - if not full_location: - return None - - latency_ms: float = full_latency * 1000 - - full_filename, full_path = _parse_snapshot_filename(full_location) - fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) - if not fm: - return None - - full_snap_slot: int = int(fm.group(1)) - slots_diff: int = current_slot - full_snap_slot - - file_paths: list[str] = [full_path] - - # Also check for incremental snapshot - inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" - inc_location, _ = head_no_follow(inc_url, timeout=2) - if inc_location: - inc_filename, inc_path = _parse_snapshot_filename(inc_location) - m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) - if m: - inc_base_slot: int = int(m.group(1)) - # Incremental must be based on this source's full snapshot - if inc_base_slot == full_snap_slot: - file_paths.append(inc_path) - - return SnapshotSource( - rpc_address=rpc_address, - file_paths=file_paths, - slots_diff=slots_diff, - latency_ms=latency_ms, - ) - - -def discover_sources( - rpc_url: str, - current_slot: int, - max_age_slots: int, - max_latency_ms: float, - threads: int, - version_filter: str | None, -) -> list[SnapshotSource]: - """Discover all snapshot sources, then filter. - - Probing and filtering are separate: all reachable sources are collected - first so we can report what exists even if filters reject everything. - """ - rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) - if not rpc_nodes: - log.error("No RPC nodes found via getClusterNodes") - return [] - - log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) - - all_sources: list[SnapshotSource] = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: - futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { - pool.submit(probe_rpc_snapshot, addr, current_slot): addr - for addr in rpc_nodes - } - done: int = 0 - for future in concurrent.futures.as_completed(futures): - done += 1 - if done % 200 == 0: - log.info(" probed %d/%d nodes, %d reachable", - done, len(rpc_nodes), len(all_sources)) - try: - result: SnapshotSource | None = future.result() - except (urllib.error.URLError, OSError, TimeoutError) as e: - log.debug("Probe failed for %s: %s", futures[future], e) - continue - if result: - all_sources.append(result) - - log.info("Discovered %d reachable sources", len(all_sources)) - - # Apply filters - filtered: list[SnapshotSource] = [] - rejected_age: int = 0 - rejected_latency: int = 0 - for src in all_sources: - if src.slots_diff > max_age_slots or src.slots_diff < -100: - rejected_age += 1 - continue - if src.latency_ms > max_latency_ms: - rejected_latency += 1 - continue - filtered.append(src) - - if rejected_age or rejected_latency: - log.info("Filtered: %d rejected by age (>%d slots), %d by latency (>%.0fms)", - rejected_age, max_age_slots, rejected_latency, max_latency_ms) - - if not filtered and all_sources: - # Show what was available so the user can adjust filters - all_sources.sort(key=lambda s: s.slots_diff) - best = all_sources[0] - log.warning("All %d sources rejected by filters. Best available: " - "%s (age=%d slots, latency=%.0fms). " - "Try --max-snapshot-age %d --max-latency %.0f", - len(all_sources), best.rpc_address, - best.slots_diff, best.latency_ms, - best.slots_diff + 500, - max(best.latency_ms * 1.5, 500)) - - log.info("Found %d sources after filtering", len(filtered)) - return filtered - - -# -- Speed benchmark ----------------------------------------------------------- - - -def measure_speed(rpc_address: str, measure_time: int = 7) -> float: - """Measure download speed from an RPC node. Returns bytes/sec.""" - url: str = f"http://{rpc_address}/snapshot.tar.bz2" - req = Request(url) - try: - with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: - start: float = time.monotonic() - total: int = 0 - while True: - elapsed: float = time.monotonic() - start - if elapsed >= measure_time: - break - chunk: bytes = resp.read(81920) - if not chunk: - break - total += len(chunk) - elapsed = time.monotonic() - start - if elapsed <= 0: - return 0.0 - return total / elapsed - except (urllib.error.URLError, OSError, TimeoutError): - return 0.0 - - -# -- Incremental probing ------------------------------------------------------- - - -def probe_incremental( - fast_sources: list[SnapshotSource], - full_snap_slot: int, -) -> tuple[str | None, list[str]]: - """Probe fast sources for the best incremental matching full_snap_slot. - - Returns (filename, mirror_urls) or (None, []) if no match found. - The "best" incremental is the one with the highest slot (closest to head). - """ - best_filename: str | None = None - best_slot: int = 0 - best_source: SnapshotSource | None = None - best_path: str | None = None - - for source in fast_sources: - inc_url: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2" - inc_location, _ = head_no_follow(inc_url, timeout=2) - if not inc_location: - continue - inc_fn, inc_fp = _parse_snapshot_filename(inc_location) - m: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) - if not m: - continue - if int(m.group(1)) != full_snap_slot: - log.debug(" %s: incremental base slot %s != full %d, skipping", - source.rpc_address, m.group(1), full_snap_slot) - continue - inc_slot: int = int(m.group(2)) - if inc_slot > best_slot: - best_slot = inc_slot - best_filename = inc_fn - best_source = source - best_path = inc_fp - - if best_filename is None or best_source is None or best_path is None: - return None, [] - - # Build mirror list — check other sources for the same filename - mirror_urls: list[str] = [f"http://{best_source.rpc_address}{best_path}"] - for other in fast_sources: - if other.rpc_address == best_source.rpc_address: - continue - other_loc, _ = head_no_follow( - f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2) - if other_loc: - other_fn, other_fp = _parse_snapshot_filename(other_loc) - if other_fn == best_filename: - mirror_urls.append(f"http://{other.rpc_address}{other_fp}") - - return best_filename, mirror_urls - - -# -- Download ------------------------------------------------------------------ - - -def download_aria2c( - urls: list[str], - output_dir: str, - filename: str, - connections: int = 16, -) -> bool: - """Download a file using aria2c with parallel connections. - - When multiple URLs are provided, aria2c treats them as mirrors of the - same file and distributes chunks across all of them. - """ - num_mirrors: int = len(urls) - total_splits: int = max(connections, connections * num_mirrors) - cmd: list[str] = [ - "aria2c", - "--file-allocation=none", - "--continue=false", - f"--max-connection-per-server={connections}", - f"--split={total_splits}", - "--min-split-size=50M", - # aria2c retries individual chunk connections on transient network - # errors (TCP reset, timeout). This is transport-level retry analogous - # to TCP retransmit, not application-level retry of a failed operation. - "--max-tries=5", - "--retry-wait=5", - "--timeout=60", - "--connect-timeout=10", - "--summary-interval=10", - "--console-log-level=notice", - f"--dir={output_dir}", - f"--out={filename}", - "--auto-file-renaming=false", - "--allow-overwrite=true", - *urls, - ] - - log.info("Downloading %s", filename) - log.info(" aria2c: %d connections x %d mirrors (%d splits)", - connections, num_mirrors, total_splits) - - start: float = time.monotonic() - result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) - elapsed: float = time.monotonic() - start - - if result.returncode != 0: - log.error("aria2c failed with exit code %d", result.returncode) - return False - - filepath: Path = Path(output_dir) / filename - if not filepath.exists(): - log.error("aria2c reported success but %s does not exist", filepath) - return False - - size_bytes: int = filepath.stat().st_size - size_gb: float = size_bytes / (1024 ** 3) - avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 - log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) - return True - - -# -- Shared helpers ------------------------------------------------------------ - - -def _discover_and_benchmark( - rpc_url: str, - current_slot: int, - *, - max_snapshot_age: int = 10000, - max_latency: float = 500, - threads: int = 500, - min_download_speed: int = 20, - measurement_time: int = 7, - max_speed_checks: int = 15, - version_filter: str | None = None, -) -> list[SnapshotSource]: - """Discover snapshot sources and benchmark download speed. - - Returns sources that meet the minimum speed requirement, sorted by speed. - """ - sources: list[SnapshotSource] = discover_sources( - rpc_url, current_slot, - max_age_slots=max_snapshot_age, - max_latency_ms=max_latency, - threads=threads, - version_filter=version_filter, - ) - if not sources: - return [] - - sources.sort(key=lambda s: s.latency_ms) - - log.info("Benchmarking download speed on top %d sources...", max_speed_checks) - fast_sources: list[SnapshotSource] = [] - checked: int = 0 - min_speed_bytes: int = min_download_speed * 1024 * 1024 - - for source in sources: - if checked >= max_speed_checks: - break - checked += 1 - - speed: float = measure_speed(source.rpc_address, measurement_time) - source.download_speed = speed - speed_mib: float = speed / (1024 ** 2) - - if speed < min_speed_bytes: - log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", - source.rpc_address, speed_mib, min_download_speed) - continue - - log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", - source.rpc_address, speed_mib, - source.latency_ms, source.slots_diff) - fast_sources.append(source) - - return fast_sources - - -def _rolling_incremental_download( - fast_sources: list[SnapshotSource], - full_snap_slot: int, - output_dir: str, - convergence_slots: int, - connections: int, - rpc_url: str, -) -> str | None: - """Download incrementals in a loop until converged. - - Probes fast_sources for incrementals matching full_snap_slot, downloads - the freshest one, then re-probes until the gap to head is within - convergence_slots. Returns the filename of the final incremental, - or None if no incremental was found. - """ - prev_inc_filename: str | None = None - loop_start: float = time.monotonic() - max_convergence_time: float = 1800.0 # 30 min wall-clock limit - - while True: - if time.monotonic() - loop_start > max_convergence_time: - if prev_inc_filename: - log.warning("Convergence timeout (%.0fs) — using %s", - max_convergence_time, prev_inc_filename) - else: - log.warning("Convergence timeout (%.0fs) — no incremental downloaded", - max_convergence_time) - break - - inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) - if inc_fn is None: - if prev_inc_filename is None: - log.error("No matching incremental found for base slot %d", - full_snap_slot) - else: - log.info("No newer incremental available, using %s", prev_inc_filename) - break - - m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) - assert m_inc is not None - inc_slot: int = int(m_inc.group(2)) - - head_slot: int | None = get_current_slot(rpc_url) - if head_slot is None: - log.warning("Cannot get current slot — downloading best available incremental") - gap: int = convergence_slots + 1 - else: - gap = head_slot - inc_slot - - if inc_fn == prev_inc_filename: - if gap <= convergence_slots: - log.info("Incremental %s already downloaded (gap %d slots, converged)", - inc_fn, gap) - break - log.info("No newer incremental yet (slot %d, gap %d slots), waiting...", - inc_slot, gap) - time.sleep(10) - continue - - if prev_inc_filename is not None: - old_path: Path = Path(output_dir) / prev_inc_filename - if old_path.exists(): - log.info("Removing superseded incremental %s", prev_inc_filename) - old_path.unlink() - - log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", - inc_fn, len(inc_mirrors), inc_slot, gap) - if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): - log.warning("Failed to download incremental %s — re-probing in 10s", inc_fn) - time.sleep(10) - continue - - prev_inc_filename = inc_fn - - if gap <= convergence_slots: - log.info("Converged: incremental slot %d is %d slots behind head", - inc_slot, gap) - break - - if head_slot is None: - break - - log.info("Not converged (gap %d > %d), re-probing in 10s...", - gap, convergence_slots) - time.sleep(10) - - return prev_inc_filename - - -# -- Public API ---------------------------------------------------------------- - - -def download_incremental_for_slot( - output_dir: str, - full_snap_slot: int, - *, - cluster: str = "mainnet-beta", - rpc_url: str | None = None, - connections: int = 16, - threads: int = 500, - max_snapshot_age: int = 10000, - max_latency: float = 500, - min_download_speed: int = 20, - measurement_time: int = 7, - max_speed_checks: int = 15, - version_filter: str | None = None, - convergence_slots: int = 500, -) -> bool: - """Download an incremental snapshot for an existing full snapshot. - - Discovers sources, benchmarks speed, then runs the rolling incremental - download loop for the given full snapshot base slot. Does NOT download - a full snapshot. - - Returns True if an incremental was downloaded, False otherwise. - """ - resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] - - if not shutil.which("aria2c"): - log.error("aria2c not found. Install with: apt install aria2") - return False - - log.info("Incremental download for base slot %d", full_snap_slot) - current_slot: int | None = get_current_slot(resolved_rpc) - if current_slot is None: - log.error("Cannot get current slot from %s", resolved_rpc) - return False - - fast_sources: list[SnapshotSource] = _discover_and_benchmark( - resolved_rpc, current_slot, - max_snapshot_age=max_snapshot_age, - max_latency=max_latency, - threads=threads, - min_download_speed=min_download_speed, - measurement_time=measurement_time, - max_speed_checks=max_speed_checks, - version_filter=version_filter, - ) - if not fast_sources: - log.error("No fast sources found") - return False - - os.makedirs(output_dir, exist_ok=True) - result: str | None = _rolling_incremental_download( - fast_sources, full_snap_slot, output_dir, - convergence_slots, connections, resolved_rpc, - ) - return result is not None - - -def download_best_snapshot( - output_dir: str, - *, - cluster: str = "mainnet-beta", - rpc_url: str | None = None, - connections: int = 16, - threads: int = 500, - max_snapshot_age: int = 10000, - max_latency: float = 500, - min_download_speed: int = 20, - measurement_time: int = 7, - max_speed_checks: int = 15, - version_filter: str | None = None, - full_only: bool = False, - convergence_slots: int = 500, -) -> bool: - """Download the best available snapshot to output_dir. - - This is the programmatic API — called by entrypoint.py for automatic - snapshot download. Returns True on success, False on failure. - - All parameters have sensible defaults matching the CLI interface. - """ - resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] - - if not shutil.which("aria2c"): - log.error("aria2c not found. Install with: apt install aria2") - return False - - log.info("Cluster: %s | RPC: %s", cluster, resolved_rpc) - current_slot: int | None = get_current_slot(resolved_rpc) - if current_slot is None: - log.error("Cannot get current slot from %s", resolved_rpc) - return False - log.info("Current slot: %d", current_slot) - - fast_sources: list[SnapshotSource] = _discover_and_benchmark( - resolved_rpc, current_slot, - max_snapshot_age=max_snapshot_age, - max_latency=max_latency, - threads=threads, - min_download_speed=min_download_speed, - measurement_time=measurement_time, - max_speed_checks=max_speed_checks, - version_filter=version_filter, - ) - if not fast_sources: - log.error("No fast sources found") - return False - - # Use the fastest source as primary, build full snapshot download plan - best: SnapshotSource = fast_sources[0] - full_paths: list[str] = [fp for fp in best.file_paths - if fp.rsplit("/", 1)[-1].startswith("snapshot-")] - if not full_paths: - log.error("Best source has no full snapshot") - return False - - # Build mirror URLs for the full snapshot - full_filename: str = full_paths[0].rsplit("/", 1)[-1] - full_mirrors: list[str] = [f"http://{best.rpc_address}{full_paths[0]}"] - for other in fast_sources[1:]: - for other_fp in other.file_paths: - if other_fp.rsplit("/", 1)[-1] == full_filename: - full_mirrors.append(f"http://{other.rpc_address}{other_fp}") - break - - speed_mib: float = best.download_speed / (1024 ** 2) - log.info("Best source: %s (%.1f MiB/s), %d mirrors", - best.rpc_address, speed_mib, len(full_mirrors)) - - # Download full snapshot - os.makedirs(output_dir, exist_ok=True) - total_start: float = time.monotonic() - - filepath: Path = Path(output_dir) / full_filename - if filepath.exists() and filepath.stat().st_size > 0: - log.info("Skipping %s (already exists: %.1f GB)", - full_filename, filepath.stat().st_size / (1024 ** 3)) - else: - if not download_aria2c(full_mirrors, output_dir, full_filename, connections): - log.error("Failed to download %s", full_filename) - return False - - # Download incremental separately — the full download took minutes, - # so any incremental from discovery is stale. Re-probe for fresh ones. - if not full_only: - fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) - if fm: - full_snap_slot: int = int(fm.group(1)) - log.info("Downloading incremental for base slot %d...", full_snap_slot) - _rolling_incremental_download( - fast_sources, full_snap_slot, output_dir, - convergence_slots, connections, resolved_rpc, - ) - - total_elapsed: float = time.monotonic() - total_start - log.info("All downloads complete in %.0fs", total_elapsed) - - return True - - -# -- Main (CLI) ---------------------------------------------------------------- - - -def main() -> int: - p: argparse.ArgumentParser = argparse.ArgumentParser( - description="Download Solana snapshots with aria2c parallel downloads", - ) - p.add_argument("-o", "--output", default="/srv/kind/solana/snapshots", - help="Snapshot output directory (default: /srv/kind/solana/snapshots)") - p.add_argument("-c", "--cluster", default="mainnet-beta", - choices=list(CLUSTER_RPC), - help="Solana cluster (default: mainnet-beta)") - p.add_argument("-r", "--rpc", default=None, - help="RPC URL for cluster discovery (default: public RPC)") - p.add_argument("-n", "--connections", type=int, default=16, - help="aria2c connections per download (default: 16)") - p.add_argument("-t", "--threads", type=int, default=500, - help="Threads for parallel RPC probing (default: 500)") - p.add_argument("--max-snapshot-age", type=int, default=10000, - help="Max snapshot age in slots (default: 10000)") - p.add_argument("--max-latency", type=float, default=500, - help="Max RPC probe latency in ms (default: 500)") - p.add_argument("--min-download-speed", type=int, default=20, - help="Min download speed in MiB/s (default: 20)") - p.add_argument("--measurement-time", type=int, default=7, - help="Speed measurement duration in seconds (default: 7)") - p.add_argument("--max-speed-checks", type=int, default=15, - help="Max nodes to benchmark before giving up (default: 15)") - p.add_argument("--version", default=None, - help="Filter nodes by version prefix (e.g. '2.2')") - p.add_argument("--convergence-slots", type=int, default=500, - help="Max slot gap for incremental convergence (default: 500)") - p.add_argument("--full-only", action="store_true", - help="Download only full snapshot, skip incremental") - p.add_argument("--dry-run", action="store_true", - help="Find best source and print URL, don't download") - p.add_argument("--post-cmd", - help="Shell command to run after successful download " - "(e.g. 'kubectl scale deployment ... --replicas=1')") - p.add_argument("-v", "--verbose", action="store_true") - args: argparse.Namespace = p.parse_args() - - logging.basicConfig( - level=logging.DEBUG if args.verbose else logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", - datefmt="%H:%M:%S", - ) - - # Dry-run uses the original inline flow (needs access to sources for URL printing) - if args.dry_run: - rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] - current_slot: int | None = get_current_slot(rpc_url) - if current_slot is None: - log.error("Cannot get current slot from %s", rpc_url) - return 1 - - sources: list[SnapshotSource] = discover_sources( - rpc_url, current_slot, - max_age_slots=args.max_snapshot_age, - max_latency_ms=args.max_latency, - threads=args.threads, - version_filter=args.version, - ) - if not sources: - log.error("No snapshot sources found") - return 1 - - sources.sort(key=lambda s: s.latency_ms) - best = sources[0] - for fp in best.file_paths: - print(f"http://{best.rpc_address}{fp}") - return 0 - - ok: bool = download_best_snapshot( - args.output, - cluster=args.cluster, - rpc_url=args.rpc, - connections=args.connections, - threads=args.threads, - max_snapshot_age=args.max_snapshot_age, - max_latency=args.max_latency, - min_download_speed=args.min_download_speed, - measurement_time=args.measurement_time, - max_speed_checks=args.max_speed_checks, - version_filter=args.version, - full_only=args.full_only, - convergence_slots=args.convergence_slots, - ) - - if ok and args.post_cmd: - log.info("Running post-download command: %s", args.post_cmd) - result: subprocess.CompletedProcess[bytes] = subprocess.run( - args.post_cmd, shell=True, - ) - if result.returncode != 0: - log.error("Post-download command failed with exit code %d", - result.returncode) - return 1 - log.info("Post-download command completed successfully") - - return 0 if ok else 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/agave-container/start-test.sh b/scripts/agave-container/start-test.sh deleted file mode 100644 index e003a97a..00000000 --- a/scripts/agave-container/start-test.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# ----------------------------------------------------------------------- -# Start solana-test-validator with optional SPL token setup -# -# Environment variables: -# FACILITATOR_PUBKEY - facilitator fee-payer public key (base58) -# SERVER_PUBKEY - server/payee wallet public key (base58) -# CLIENT_PUBKEY - client/payer wallet public key (base58) -# MINT_DECIMALS - token decimals (default: 6, matching USDC) -# MINT_AMOUNT - amount to mint to client (default: 1000000000) -# LEDGER_DIR - ledger directory (default: /data/ledger) -# ----------------------------------------------------------------------- - -LEDGER_DIR="${LEDGER_DIR:-/data/ledger}" -MINT_DECIMALS="${MINT_DECIMALS:-6}" -MINT_AMOUNT="${MINT_AMOUNT:-1000000000}" -SETUP_MARKER="${LEDGER_DIR}/.setup-done" - -sudo chown -R "$(id -u):$(id -g)" "$LEDGER_DIR" 2>/dev/null || true - -# Start test-validator in the background -solana-test-validator \ - --ledger "${LEDGER_DIR}" \ - --rpc-port 8899 \ - --bind-address 0.0.0.0 \ - --quiet & - -VALIDATOR_PID=$! - -# Wait for RPC to become available -echo "Waiting for test-validator RPC..." -for i in $(seq 1 60); do - if solana cluster-version --url http://127.0.0.1:8899 >/dev/null 2>&1; then - echo "Test-validator is ready (attempt ${i})" - break - fi - sleep 1 -done - -solana config set --url http://127.0.0.1:8899 - -# Only run setup once (idempotent via marker file) -if [ ! -f "${SETUP_MARKER}" ]; then - echo "Running first-time setup..." - - # Airdrop SOL to all wallets for gas - for PUBKEY in "${FACILITATOR_PUBKEY:-}" "${SERVER_PUBKEY:-}" "${CLIENT_PUBKEY:-}"; do - if [ -n "${PUBKEY}" ]; then - echo "Airdropping 100 SOL to ${PUBKEY}..." - solana airdrop 100 "${PUBKEY}" --url http://127.0.0.1:8899 || true - fi - done - - # Create a USDC-equivalent SPL token mint if any pubkeys are set - if [ -n "${CLIENT_PUBKEY:-}" ] || [ -n "${FACILITATOR_PUBKEY:-}" ] || [ -n "${SERVER_PUBKEY:-}" ]; then - MINT_AUTHORITY_FILE="${LEDGER_DIR}/mint-authority.json" - if [ ! -f "${MINT_AUTHORITY_FILE}" ]; then - solana-keygen new --no-bip39-passphrase --outfile "${MINT_AUTHORITY_FILE}" --force - MINT_AUTH_PUBKEY=$(solana-keygen pubkey "${MINT_AUTHORITY_FILE}") - solana airdrop 10 "${MINT_AUTH_PUBKEY}" --url http://127.0.0.1:8899 - fi - - MINT_ADDRESS_FILE="${LEDGER_DIR}/usdc-mint-address.txt" - if [ ! -f "${MINT_ADDRESS_FILE}" ]; then - spl-token create-token \ - --decimals "${MINT_DECIMALS}" \ - --mint-authority "${MINT_AUTHORITY_FILE}" \ - --url http://127.0.0.1:8899 \ - 2>&1 | grep "Creating token" | awk '{print $3}' > "${MINT_ADDRESS_FILE}" - echo "Created USDC mint: $(cat "${MINT_ADDRESS_FILE}")" - fi - - USDC_MINT=$(cat "${MINT_ADDRESS_FILE}") - - # Create ATAs and mint tokens for the client - if [ -n "${CLIENT_PUBKEY:-}" ]; then - echo "Creating ATA for client ${CLIENT_PUBKEY}..." - spl-token create-account "${USDC_MINT}" \ - --owner "${CLIENT_PUBKEY}" \ - --fee-payer "${MINT_AUTHORITY_FILE}" \ - --url http://127.0.0.1:8899 || true - - echo "Minting ${MINT_AMOUNT} tokens to client..." - spl-token mint "${USDC_MINT}" "${MINT_AMOUNT}" \ - --recipient-owner "${CLIENT_PUBKEY}" \ - --mint-authority "${MINT_AUTHORITY_FILE}" \ - --url http://127.0.0.1:8899 || true - fi - - # Create ATAs for server and facilitator - for PUBKEY in "${SERVER_PUBKEY:-}" "${FACILITATOR_PUBKEY:-}"; do - if [ -n "${PUBKEY}" ]; then - echo "Creating ATA for ${PUBKEY}..." - spl-token create-account "${USDC_MINT}" \ - --owner "${PUBKEY}" \ - --fee-payer "${MINT_AUTHORITY_FILE}" \ - --url http://127.0.0.1:8899 || true - fi - done - - # Expose mint address for other containers - cp "${MINT_ADDRESS_FILE}" /tmp/usdc-mint-address.txt 2>/dev/null || true - fi - - touch "${SETUP_MARKER}" - echo "Setup complete." -fi - -echo "solana-test-validator running (PID ${VALIDATOR_PID})" -wait ${VALIDATOR_PID} From 481e9d239247c01604ed9e11160abc94e9dd9eb4 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 06:21:15 +0000 Subject: [PATCH 53/62] Squashed 'agave-stack/' content from commit 7100d11 git-subtree-dir: agave-stack git-subtree-split: 7100d117421bd79fb52d3dfcd85b76cf18ed0ffa --- README.md | 277 ++ WORK_IN_PROGRESS.md | 198 + ansible/biscayne-redeploy.yml | 193 + .../k8s-manifests/doublezero-daemonset.yaml | 50 + deployment/spec.yml | 113 + scripts/backlog.sh | 234 + scripts/biscayne-status.py | 280 ++ scripts/snapshot-download.py | 546 +++ scripts/zfs-setup.md | 109 + .../compose/docker-compose-agave-rpc.yml | 112 + .../compose/docker-compose-agave-test.yml | 27 + .../compose/docker-compose-agave.yml | 115 + .../compose/docker-compose-doublezero.yml | 19 + .../compose/docker-compose-monitoring.yml | 49 + .../config/agave/restart-node.sh | 8 + stack-orchestrator/config/agave/restart.cron | 4 + .../grafana-dashboards/agave-indexing.json | 3941 +++++++++++++++++ .../agave-transactions.json | 1985 +++++++++ .../grafana-dashboards/dashboards.yml | 12 + .../grafana-dashboards/sync-status.json | 2183 +++++++++ .../grafana-dashboards/system-overview.json | 1643 +++++++ .../grafana-datasources/datasources.yml | 16 + .../scripts/check_canonical_slot.sh | 17 + .../scripts/check_getslot_latency.sh | 33 + .../monitoring/telegraf-config/telegraf.conf | 36 + .../laconicnetwork-agave/Dockerfile | 81 + .../laconicnetwork-agave/build.sh | 17 + .../laconicnetwork-agave/entrypoint.py | 686 +++ .../laconicnetwork-agave/ip_echo_preflight.py | 249 ++ .../laconicnetwork-agave/snapshot_download.py | 878 ++++ .../laconicnetwork-agave/start-test.sh | 112 + .../laconicnetwork-doublezero/Dockerfile | 22 + .../laconicnetwork-doublezero/build.sh | 9 + .../laconicnetwork-doublezero/entrypoint.sh | 38 + stack-orchestrator/stacks/agave/README.md | 169 + stack-orchestrator/stacks/agave/stack.yml | 10 + 36 files changed, 14471 insertions(+) create mode 100644 README.md create mode 100644 WORK_IN_PROGRESS.md create mode 100644 ansible/biscayne-redeploy.yml create mode 100644 deployment/k8s-manifests/doublezero-daemonset.yaml create mode 100644 deployment/spec.yml create mode 100755 scripts/backlog.sh create mode 100755 scripts/biscayne-status.py create mode 100755 scripts/snapshot-download.py create mode 100644 scripts/zfs-setup.md create mode 100644 stack-orchestrator/compose/docker-compose-agave-rpc.yml create mode 100644 stack-orchestrator/compose/docker-compose-agave-test.yml create mode 100644 stack-orchestrator/compose/docker-compose-agave.yml create mode 100644 stack-orchestrator/compose/docker-compose-doublezero.yml create mode 100644 stack-orchestrator/compose/docker-compose-monitoring.yml create mode 100644 stack-orchestrator/config/agave/restart-node.sh create mode 100644 stack-orchestrator/config/agave/restart.cron create mode 100644 stack-orchestrator/config/monitoring/grafana-dashboards/agave-indexing.json create mode 100644 stack-orchestrator/config/monitoring/grafana-dashboards/agave-transactions.json create mode 100644 stack-orchestrator/config/monitoring/grafana-dashboards/dashboards.yml create mode 100644 stack-orchestrator/config/monitoring/grafana-dashboards/sync-status.json create mode 100644 stack-orchestrator/config/monitoring/grafana-dashboards/system-overview.json create mode 100644 stack-orchestrator/config/monitoring/grafana-datasources/datasources.yml create mode 100755 stack-orchestrator/config/monitoring/scripts/check_canonical_slot.sh create mode 100755 stack-orchestrator/config/monitoring/scripts/check_getslot_latency.sh create mode 100644 stack-orchestrator/config/monitoring/telegraf-config/telegraf.conf create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/Dockerfile create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/build.sh create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/entrypoint.py create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/ip_echo_preflight.py create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py create mode 100644 stack-orchestrator/container-build/laconicnetwork-agave/start-test.sh create mode 100644 stack-orchestrator/container-build/laconicnetwork-doublezero/Dockerfile create mode 100644 stack-orchestrator/container-build/laconicnetwork-doublezero/build.sh create mode 100644 stack-orchestrator/container-build/laconicnetwork-doublezero/entrypoint.sh create mode 100644 stack-orchestrator/stacks/agave/README.md create mode 100644 stack-orchestrator/stacks/agave/stack.yml diff --git a/README.md b/README.md new file mode 100644 index 00000000..bb1c2260 --- /dev/null +++ b/README.md @@ -0,0 +1,277 @@ +# agave-stack + +Unified Agave/Jito Solana stack for [laconic-so](https://github.com/LaconicNetwork/stack-orchestrator). Deploys Solana validators, RPC nodes, and test validators as containers with optional [DoubleZero](https://doublezero.xyz) network routing. + +## Modes + +| Mode | Compose file | Use case | +|------|-------------|----------| +| `validator` | `docker-compose-agave.yml` | Voting validator (mainnet/testnet) | +| `rpc` | `docker-compose-agave-rpc.yml` | Non-voting RPC node | +| `test` | `docker-compose-agave-test.yml` | Local dev with instant finality | + +Mode is selected via the `AGAVE_MODE` environment variable. + +## Repository layout + +``` +agave-stack/ +├── deployment/ # Reference deployment (biscayne) +│ ├── spec.yml # k8s-kind deployment spec +│ └── k8s-manifests/ +│ └── doublezero-daemonset.yaml # DZ DaemonSet (hostNetwork) +├── stack-orchestrator/ +│ ├── stacks/agave/ +│ │ ├── stack.yml # laconic-so stack definition +│ │ └── README.md # Stack-level docs +│ ├── compose/ +│ │ ├── docker-compose-agave.yml # Voting validator +│ │ ├── docker-compose-agave-rpc.yml # Non-voting RPC +│ │ ├── docker-compose-agave-test.yml # Test validator +│ │ └── docker-compose-doublezero.yml # DoubleZero daemon +│ ├── container-build/ +│ │ ├── laconicnetwork-agave/ # Agave/Jito image +│ │ │ ├── Dockerfile # Two-stage build from source +│ │ │ ├── build.sh # laconic-so build script +│ │ │ ├── entrypoint.sh # Mode router +│ │ │ ├── start-validator.sh # Voting validator startup +│ │ │ ├── start-rpc.sh # RPC node startup +│ │ │ └── start-test.sh # Test validator + SPL setup +│ │ └── laconicnetwork-doublezero/ # DoubleZero image +│ │ ├── Dockerfile # Installs from Cloudsmith apt +│ │ ├── build.sh +│ │ └── entrypoint.sh +│ └── config/agave/ +│ ├── restart-node.sh # Container restart helper +│ └── restart.cron # Scheduled restart schedule +``` + +## Prerequisites + +- [laconic-so](https://github.com/LaconicNetwork/stack-orchestrator) (stack orchestrator) +- Docker +- Kind (for k8s deployments) + +## Building + +```bash +# Vanilla Agave v3.1.9 +laconic-so --stack agave build-containers + +# Jito v3.1.8 (required for MEV) +AGAVE_REPO=https://github.com/jito-foundation/jito-solana.git \ +AGAVE_VERSION=v3.1.8-jito \ +laconic-so --stack agave build-containers +``` + +Build compiles from source (~30-60 min on first build). This produces both the `laconicnetwork/agave:local` and `laconicnetwork/doublezero:local` images. + +## Deploying + +### Test validator (local dev) + +```bash +laconic-so --stack agave deploy init --output spec.yml +laconic-so --stack agave deploy create --spec-file spec.yml --deployment-dir my-test +laconic-so deployment --dir my-test start +``` + +The test validator starts with instant finality and optionally creates SPL token mints and airdrops to configured pubkeys. + +### Mainnet/testnet (Docker Compose) + +```bash +laconic-so --stack agave deploy init --output spec.yml +# Edit spec.yml: set AGAVE_MODE, VALIDATOR_ENTRYPOINT, KNOWN_VALIDATOR, etc. +laconic-so --stack agave deploy create --spec-file spec.yml --deployment-dir my-node +laconic-so deployment --dir my-node start +``` + +### Kind/k8s deployment + +The `deployment/spec.yml` provides a reference spec targeting `k8s-kind`. The compose files use `network_mode: host` which works for Docker Compose and is silently ignored by laconic-so's k8s conversion (it uses explicit ports from the deployment spec instead). + +```bash +laconic-so --stack agave deploy create \ + --spec-file deployment/spec.yml \ + --deployment-dir my-deployment + +# Mount validator keypairs +cp validator-identity.json my-deployment/data/validator-config/ +cp vote-account-keypair.json my-deployment/data/validator-config/ # validator mode only + +laconic-so deployment --dir my-deployment start +``` + +## Configuration + +### Common (all modes) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AGAVE_MODE` | `test` | `test`, `rpc`, or `validator` | +| `VALIDATOR_ENTRYPOINT` | *required* | Cluster entrypoint (host:port) | +| `KNOWN_VALIDATOR` | *required* | Known validator pubkey | +| `EXTRA_ENTRYPOINTS` | | Space-separated additional entrypoints | +| `EXTRA_KNOWN_VALIDATORS` | | Space-separated additional known validators | +| `RPC_PORT` | `8899` | RPC HTTP port | +| `RPC_BIND_ADDRESS` | `127.0.0.1` | RPC bind address | +| `GOSSIP_PORT` | `8001` | Gossip protocol port | +| `DYNAMIC_PORT_RANGE` | `8000-10000` | TPU/TVU/repair UDP port range | +| `LIMIT_LEDGER_SIZE` | `50000000` | Max ledger slots to retain | +| `SNAPSHOT_INTERVAL_SLOTS` | `1000` | Full snapshot interval | +| `MAXIMUM_SNAPSHOTS_TO_RETAIN` | `5` | Max full snapshots | +| `EXPECTED_GENESIS_HASH` | | Cluster genesis verification | +| `EXPECTED_SHRED_VERSION` | | Shred version verification | +| `RUST_LOG` | `info` | Log level | +| `SOLANA_METRICS_CONFIG` | | Metrics reporting config | + +### Validator mode + +| Variable | Default | Description | +|----------|---------|-------------| +| `VOTE_ACCOUNT_KEYPAIR` | `/data/config/vote-account-keypair.json` | Vote account keypair path | + +Identity keypair must be mounted at `/data/config/validator-identity.json`. + +### RPC mode + +| Variable | Default | Description | +|----------|---------|-------------| +| `PUBLIC_RPC_ADDRESS` | | If set, advertise as public RPC | +| `ACCOUNT_INDEXES` | `program-id,spl-token-owner,spl-token-mint` | Account indexes for queries | + +Identity is auto-generated if not mounted. + +### Jito MEV (validator and RPC modes) + +Set `JITO_ENABLE=true` and provide: + +| Variable | Description | +|----------|-------------| +| `JITO_BLOCK_ENGINE_URL` | Block engine endpoint | +| `JITO_SHRED_RECEIVER_ADDR` | Shred receiver (region-specific) | +| `JITO_RELAYER_URL` | Relayer URL (validator mode) | +| `JITO_TIP_PAYMENT_PROGRAM` | Tip payment program pubkey | +| `JITO_DISTRIBUTION_PROGRAM` | Tip distribution program pubkey | +| `JITO_MERKLE_ROOT_AUTHORITY` | Merkle root upload authority | +| `JITO_COMMISSION_BPS` | Commission basis points | + +Image must be built from `jito-foundation/jito-solana` for Jito flags to work. + +### Test mode + +| Variable | Default | Description | +|----------|---------|-------------| +| `FACILITATOR_PUBKEY` | | Pubkey to airdrop SOL | +| `SERVER_PUBKEY` | | Pubkey to airdrop SOL | +| `CLIENT_PUBKEY` | | Pubkey to airdrop SOL + create ATA | +| `MINT_DECIMALS` | `6` | SPL token decimals | +| `MINT_AMOUNT` | `1000000` | SPL tokens to mint | + +## DoubleZero + +[DoubleZero](https://doublezero.xyz) provides optimized network routing for Solana validators via GRE tunnels (IP protocol 47) and BGP (TCP/179) over link-local 169.254.0.0/16. Validator traffic to other DZ participants is routed through private fiber instead of the public internet. + +### How it works + +`doublezerod` creates a `doublezero0` GRE tunnel interface and runs BGP peering through it. Routes are injected into the host routing table, so the validator transparently sends traffic over the fiber backbone. IBRL mode falls back to public internet if DZ is down. + +### Requirements + +- Validator identity keypair at `/data/config/validator-identity.json` +- `privileged: true` + `NET_ADMIN` (GRE tunnel + route table manipulation) +- `hostNetwork: true` (GRE uses IP protocol 47 — cannot be port-mapped) +- Node registered with DoubleZero passport system + +### Docker Compose + +`docker-compose-doublezero.yml` runs alongside the validator with `network_mode: host`, sharing the `validator-config` volume for identity access. + +### k8s + +laconic-so does not pass `hostNetwork` through to generated k8s resources. DoubleZero runs as a DaemonSet applied after `deployment start`: + +```bash +kubectl apply -f deployment/k8s-manifests/doublezero-daemonset.yaml +``` + +Since the validator pods share the node's network namespace, they automatically see the GRE routes injected by `doublezerod`. + +| Variable | Default | Description | +|----------|---------|-------------| +| `VALIDATOR_IDENTITY_PATH` | `/data/config/validator-identity.json` | Validator identity keypair | +| `DOUBLEZERO_RPC_ENDPOINT` | `http://127.0.0.1:8899` | Solana RPC for DZ registration | +| `DOUBLEZERO_EXTRA_ARGS` | | Additional doublezerod arguments | + +## Runtime requirements + +The container requires the following (already set in compose files): + +| Setting | Value | Why | +|---------|-------|-----| +| `privileged` | `true` | `mlock()` syscall and raw network access | +| `cap_add` | `IPC_LOCK` | Memory page locking for account indexes and ledger | +| `ulimits.memlock` | `-1` (unlimited) | Agave locks gigabytes of memory | +| `ulimits.nofile` | `1000000` | Gossip/TPU connections + memory-mapped ledger files | +| `network_mode` | `host` | Direct host network stack for gossip, TPU, UDP ranges | + +Without these, Agave either refuses to start or dies under load. + +## Container overhead + +Containers with `privileged: true` and `network_mode: host` add **zero measurable overhead** vs bare metal. Linux containers are not VMs: + +- **Network**: Host network namespace directly — no bridge, no NAT, no veth. Same kernel code path as bare metal. +- **CPU**: No hypervisor. Same physical cores, same scheduler priority. +- **Memory**: `IPC_LOCK` + unlimited memlock = identical `mlock()` behavior. +- **Disk I/O**: hostPath-backed PVs have identical I/O characteristics. + +The only overhead is cgroup accounting (nanoseconds per syscall) and overlayfs for cold file opens (single-digit microseconds, zero once cached). + +## Scheduled restarts + +The `config/agave/restart.cron` defines periodic restarts to mitigate memory growth: + +- **Validator**: every 4 hours +- **RPC**: every 6 hours (staggered 30 min offset) + +Uses `restart-node.sh` which sends TERM to the matching container for graceful shutdown. + +## Biscayne reference deployment + +The `deployment/` directory contains a reference deployment for biscayne.vaasl.io (186.233.184.235), a mainnet voting validator with Jito MEV and DoubleZero: + +```bash +# Build Jito image +AGAVE_REPO=https://github.com/jito-foundation/jito-solana.git \ +AGAVE_VERSION=v3.1.8-jito \ +laconic-so --stack agave build-containers + +# Create deployment +laconic-so --stack agave deploy create \ + --spec-file deployment/spec.yml \ + --deployment-dir biscayne-deployment + +# Mount keypairs +cp validator-identity.json biscayne-deployment/data/validator-config/ +cp vote-account-keypair.json biscayne-deployment/data/validator-config/ + +# Start +laconic-so deployment --dir biscayne-deployment start + +# Start DoubleZero +kubectl apply -f deployment/k8s-manifests/doublezero-daemonset.yaml +``` + +To run as non-voting RPC, change `AGAVE_MODE: rpc` in `deployment/spec.yml`. + +## Volumes + +| Volume | Mount | Content | +|--------|-------|---------| +| `validator-config` / `rpc-config` | `/data/config` | Identity keypairs, node config | +| `validator-ledger` / `rpc-ledger` | `/data/ledger` | Blockchain ledger data | +| `validator-accounts` / `rpc-accounts` | `/data/accounts` | Account state cache | +| `validator-snapshots` / `rpc-snapshots` | `/data/snapshots` | Full and incremental snapshots | +| `doublezero-config` | `~/.config/doublezero` | DZ identity and state | diff --git a/WORK_IN_PROGRESS.md b/WORK_IN_PROGRESS.md new file mode 100644 index 00000000..192159be --- /dev/null +++ b/WORK_IN_PROGRESS.md @@ -0,0 +1,198 @@ +# Work in Progress: Biscayne TVU Shred Relay + +## Overview + +Biscayne's agave validator was shred-starved (~1.7 slots/sec replay vs ~2.5 mainnet). +Root cause: not enough turbine shreds arriving. Solution: advertise a TVU address in +Ashburn (dense validator population, better turbine tree neighbors) and relay shreds +to biscayne in Miami over the laconic backbone. + +### Architecture + +``` +Turbine peers (hundreds of validators) + | + v UDP shreds to port 20000 +laconic-was-sw01 Et1/1 (64.92.84.81, Ashburn) + | ASIC receives on front-panel interface + | EOS monitor session mirrors matched packets to CPU + v +mirror0 interface (Linux userspace) + | socat reads raw frames, sends as UDP + v 172.16.1.188 -> 186.233.184.235:9100 (Et4/1 backbone, 25.4ms) +laconic-mia-sw01 Et4/1 (172.16.1.189, Miami) + | forwards via default route (Et1/1, same metro) + v 0.13ms +biscayne:9100 (186.233.184.235, Miami) + | shred-unwrap.py strips IP+UDP headers + v clean shred payload to localhost:9000 +agave-validator TVU port +``` + +Total one-way relay latency: ~12.8ms + +### Results + +Before relay: ~1.7 slots/sec replay, falling behind ~0.8 slots/sec. +After relay: ~3.32 slots/sec replay, catching up ~0.82 slots/sec. + +--- + +## Changes by Host + +### laconic-was-sw01 (Ashburn) — `install@137.239.200.198` + +All changes are ephemeral (not persisted, lost on reboot). + +**1. EOS monitor session (running-config, not in startup-config)** + +Mirrors inbound UDP port 20000 traffic on Et1/1 to a CPU-accessible `mirror0` interface. +Required because the Arista 7280CR3A ASIC handles front-panel traffic without punting to +Linux userspace — regular sockets cannot receive packets on front-panel IPs. + +``` +monitor session 1 source Ethernet1/1 rx +monitor session 1 ip access-group SHRED-RELAY +monitor session 1 destination Cpu +``` + +**2. EOS ACL (running-config, not in startup-config)** + +``` +ip access-list SHRED-RELAY + 10 permit udp any any eq 20000 +``` + +**3. EOS static route (running-config, not in startup-config)** + +``` +ip route 186.233.184.235/32 172.16.1.189 +``` + +Routes biscayne traffic via Et4/1 backbone to laconic-mia-sw01 instead of the default +route (64.92.84.80, Cogent public internet). + +**4. Linux kernel static route (ephemeral, `ip route add`)** + +``` +ip route add 186.233.184.235/32 via 172.16.1.189 dev et4_1 +``` + +Required because socat runs in Linux userspace. The EOS static route programs the ASIC +but does not always sync to the Linux kernel routing table. Without this, socat's UDP +packets egress via the default route (et1_1, public internet). + +**5. socat relay process (foreground, pts/5)** + +```bash +sudo socat -u INTERFACE:mirror0,type=2 UDP-SENDTO:186.233.184.235:9100 +``` + +Reads raw L2 frames from mirror0 (SOCK_DGRAM strips ethernet header, leaving IP+UDP+payload). +Sends each frame as a UDP datagram to biscayne:9100. Runs as root (raw socket access to mirror0). + +PID: 27743 (child of sudo PID 27742) + +--- + +### laconic-mia-sw01 (Miami) — `install@209.42.167.130` + +**No changes made.** MIA already reaches biscayne at 0.13ms via its default route +(`209.42.167.132` on Et1/1, same metro). Relay traffic from WAS arrives on Et4/1 +(`172.16.1.189`) and MIA forwards to `186.233.184.235` natively. + +Key interfaces for reference: +- Et1/1: `209.42.167.133/31` (public uplink, default route via 209.42.167.132) +- Et4/1: `172.16.1.189/31` (backbone link to WAS, peer 172.16.1.188) +- Et8/1: `172.16.1.192/31` (another backbone link, not used for relay) + +--- + +### biscayne (Miami) — `rix@biscayne.vaasl.io` + +**1. Custom agave image: `laconicnetwork/agave:tvu-relay`** + +Stock agave v3.1.9 with cherry-picked commit 9f4b3ae from anza master (adds +`--public-tvu-address` flag, from anza PR #6778). Built in `/tmp/agave-tvu-patch/`, +transferred via `docker save | scp | docker load | kind load docker-image`. + +**2. K8s deployment changes** + +Namespace: `laconic-laconic-70ce4c4b47e23b85` +Deployment: `laconic-70ce4c4b47e23b85-deployment` + +Changes from previous deployment: +- Image: `laconicnetwork/agave:local` -> `laconicnetwork/agave:tvu-relay` +- Added env: `PUBLIC_TVU_ADDRESS=64.92.84.81:20000` +- Set: `JITO_ENABLE=false` (stock agave has no Jito flags) +- Strategy: changed to `Recreate` (hostNetwork port conflicts prevent RollingUpdate) + +The validator runs with `--public-tvu-address 64.92.84.81:20000`, causing it to +advertise the Ashburn switch IP as its TVU address in gossip. Turbine tree peers +send shreds to Ashburn instead of directly to Miami. + +**3. shred-unwrap.py (foreground process, PID 2497694)** + +```bash +python3 /tmp/shred-unwrap.py 9100 127.0.0.1 9000 +``` + +Listens on UDP port 9100, strips IP+UDP headers from mirrored packets (variable-length +IP header via IHL field + 8-byte UDP header), forwards clean shred payloads to +localhost:9000 (the validator's TVU port). Running as user `rix`. + +Script location: `/tmp/shred-unwrap.py` + +**4. agave-stack repo changes (uncommitted)** + +- `stack-orchestrator/container-build/laconicnetwork-agave/start-rpc.sh`: + Added `PUBLIC_TVU_ADDRESS` to header docs and + `[ -n "${PUBLIC_TVU_ADDRESS:-}" ] && ARGS+=(--public-tvu-address "$PUBLIC_TVU_ADDRESS")` + +- `stack-orchestrator/compose/docker-compose-agave-rpc.yml`: + Added `PUBLIC_TVU_ADDRESS: ${PUBLIC_TVU_ADDRESS:-}` to environment section + +--- + +## What's NOT Production-Ready + +### Ephemeral processes +- socat on laconic-was-sw01: foreground process in a terminal session +- shred-unwrap.py on biscayne: foreground process, running from /tmp +- Both die if the terminal disconnects or the host reboots +- Need systemd units for both + +### Ephemeral switch config +- Monitor session, ACL, and static routes on was-sw01 are in running-config only +- Not saved to startup-config (`write memory` was run but the route didn't persist) +- Linux kernel route (`ip route add`) is completely ephemeral +- All lost on switch reboot + +### No monitoring +- No alerting on relay health (socat crash, shred-unwrap crash, packet loss) +- No metrics on relay throughput vs direct turbine throughput +- No comparison of before/after slot gap trends + +### Validator still catching up +- ~50k slots behind as of initial relay activation +- Catching up at ~0.82 slots/sec (~2,950 slots/hour) +- ~17 hours to catch up from current position, or reset with fresh snapshot (~15-30 min) + +--- + +## Key Details + +| Item | Value | +|------|-------| +| Biscayne validator identity | `4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr` | +| Biscayne IP | `186.233.184.235` | +| laconic-was-sw01 public IP | `64.92.84.81` (Et1/1) | +| laconic-was-sw01 backbone IP | `172.16.1.188` (Et4/1) | +| laconic-was-sw01 SSH | `install@137.239.200.198` | +| laconic-mia-sw01 backbone IP | `172.16.1.189` (Et4/1) | +| laconic-mia-sw01 SSH | `install@209.42.167.130` | +| Biscayne SSH | `rix@biscayne.vaasl.io` (via ProxyJump abernathy) | +| Backbone RTT (WAS-MIA) | 25.4ms (Et4/1 ↔ Et4/1, 0.01ms jitter) | +| Relay one-way latency | ~12.8ms | +| Agave image | `laconicnetwork/agave:tvu-relay` (v3.1.9 + commit 9f4b3ae) | +| EOS version | 4.34.0F | diff --git a/ansible/biscayne-redeploy.yml b/ansible/biscayne-redeploy.yml new file mode 100644 index 00000000..80f49189 --- /dev/null +++ b/ansible/biscayne-redeploy.yml @@ -0,0 +1,193 @@ +--- +# Redeploy agave-stack on biscayne with aria2c snapshot pre-download +# +# Usage: +# # Standard redeploy (download snapshot, preserve accounts + ledger) +# ansible-playbook -i biscayne.vaasl.io, ansible/biscayne-redeploy.yml +# +# # Full wipe (accounts + ledger) — slow rebuild +# ansible-playbook -i biscayne.vaasl.io, ansible/biscayne-redeploy.yml \ +# -e wipe_accounts=true -e wipe_ledger=true +# +# # Skip snapshot download (use existing) +# ansible-playbook -i biscayne.vaasl.io, ansible/biscayne-redeploy.yml \ +# -e skip_snapshot=true +# +# # Pass extra args to snapshot-download.py +# ansible-playbook -i biscayne.vaasl.io, ansible/biscayne-redeploy.yml \ +# -e 'snapshot_args=--version 2.2 --min-download-speed 50' +# +# # Snapshot only (no redeploy) +# ansible-playbook -i biscayne.vaasl.io, ansible/biscayne-redeploy.yml --tags snapshot +# +- name: Redeploy agave validator on biscayne + hosts: all + gather_facts: false + vars: + deployment_dir: /srv/deployments/agave + laconic_so: /home/rix/.local/bin/laconic-so + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + snapshot_dir: /srv/solana/snapshots + ledger_dir: /srv/solana/ledger + accounts_dir: /srv/solana/ramdisk/accounts + ramdisk_mount: /srv/solana/ramdisk + ramdisk_device: /dev/ram0 + snapshot_script_local: "{{ playbook_dir }}/../scripts/snapshot-download.py" + snapshot_script: /tmp/snapshot-download.py + # Flags — non-destructive by default + wipe_accounts: false + wipe_ledger: false + skip_snapshot: false + snapshot_args: "" + + tasks: + # --- Snapshot download (runs while validator is still up) --- + - name: Verify aria2c installed + command: which aria2c + changed_when: false + when: not skip_snapshot | bool + tags: [snapshot] + + - name: Copy snapshot script to remote + copy: + src: "{{ snapshot_script_local }}" + dest: "{{ snapshot_script }}" + mode: "0755" + when: not skip_snapshot | bool + tags: [snapshot] + + - name: Download snapshot via aria2c + command: > + python3 {{ snapshot_script }} + -o {{ snapshot_dir }} + {{ snapshot_args }} + become: true + register: snapshot_result + when: not skip_snapshot | bool + timeout: 3600 + tags: [snapshot] + + - name: Show snapshot download result + debug: + msg: "{{ snapshot_result.stdout_lines | default(['skipped']) }}" + tags: [snapshot] + + # --- Teardown (namespace only, preserve kind cluster) --- + - name: Delete deployment namespace + command: > + kubectl delete namespace {{ k8s_namespace }} --timeout=120s + register: ns_delete + failed_when: false + tags: [teardown] + + - name: Wait for namespace to terminate + command: > + kubectl get namespace {{ k8s_namespace }} + -o jsonpath='{.status.phase}' + register: ns_status + retries: 30 + delay: 5 + until: ns_status.rc != 0 + failed_when: false + when: ns_delete.rc == 0 + tags: [teardown] + + # --- Data wipe (opt-in) --- + - name: Wipe ledger data + shell: rm -rf {{ ledger_dir }}/* + become: true + when: wipe_ledger | bool + tags: [wipe] + + - name: Wipe accounts ramdisk (umount + mkfs + mount) + shell: | + mountpoint -q {{ ramdisk_mount }} && umount {{ ramdisk_mount }} || true + mkfs.ext4 -q {{ ramdisk_device }} + mount {{ ramdisk_device }} {{ ramdisk_mount }} + mkdir -p {{ accounts_dir }} + chown solana:solana {{ ramdisk_mount }} {{ accounts_dir }} + become: true + when: wipe_accounts | bool + tags: [wipe] + + - name: Clean old snapshots (keep newest full + incremental) + shell: | + cd {{ snapshot_dir }} || exit 0 + newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1) + if [ -n "$newest" ]; then + newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1) + find . -maxdepth 1 -name '*.tar.*' \ + ! -name "$newest" \ + ! -name "${newest_inc:-__none__}" \ + -delete + fi + become: true + when: not skip_snapshot | bool + tags: [wipe] + + # --- Deploy --- + - name: Verify kind-config.yml has unified mount root + command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml" + register: mount_root_check + failed_when: mount_root_check.stdout | int < 1 + tags: [deploy] + + - name: Start deployment + command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" + timeout: 600 + tags: [deploy] + + - name: Wait for pod to be running + command: > + kubectl get pods -n {{ k8s_namespace }} + -o jsonpath='{.items[0].status.phase}' + register: pod_status + retries: 60 + delay: 10 + until: pod_status.stdout == "Running" + tags: [deploy] + + # --- Verify --- + - name: Verify unified mount inside kind node + command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" + register: mount_check + tags: [verify] + + - name: Show mount contents + debug: + msg: "{{ mount_check.stdout_lines }}" + tags: [verify] + + - name: Check validator log file is being written + command: > + kubectl exec -n {{ k8s_namespace }} + deployment/{{ kind_cluster }}-deployment + -c agave-validator -- test -f /data/log/validator.log + retries: 12 + delay: 10 + until: log_file_check.rc == 0 + register: log_file_check + failed_when: false + tags: [verify] + + - name: Check RPC health + uri: + url: http://127.0.0.1:8899/health + return_content: true + register: rpc_health + retries: 6 + delay: 10 + until: rpc_health.status == 200 + failed_when: false + delegate_to: "{{ inventory_hostname }}" + tags: [verify] + + - name: Report status + debug: + msg: >- + Deployment complete. + Log: {{ 'writing' if log_file_check.rc == 0 else 'not yet created' }}. + RPC: {{ rpc_health.content | default('not responding') }}. + Wiped: ledger={{ wipe_ledger }}, accounts={{ wipe_accounts }}. + tags: [verify] diff --git a/deployment/k8s-manifests/doublezero-daemonset.yaml b/deployment/k8s-manifests/doublezero-daemonset.yaml new file mode 100644 index 00000000..2baaa4c1 --- /dev/null +++ b/deployment/k8s-manifests/doublezero-daemonset.yaml @@ -0,0 +1,50 @@ +# DoubleZero DaemonSet - applied separately from laconic-so deployment +# laconic-so does not support hostNetwork in generated k8s resources, +# so this manifest is applied via kubectl after 'deployment start'. +# +# DoubleZero creates GRE tunnels (IP protocol 47) and runs BGP (tcp/179) +# on link-local 169.254.0.0/16. This requires host network access. +# The GRE routes injected into the node routing table are automatically +# visible to all pods using hostNetwork. +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: doublezero + labels: + app: doublezero +spec: + selector: + matchLabels: + app: doublezero + template: + metadata: + labels: + app: doublezero + spec: + hostNetwork: true + containers: + - name: doublezerod + image: laconicnetwork/doublezero:local + securityContext: + privileged: true + capabilities: + add: + - NET_ADMIN + env: + - name: VALIDATOR_IDENTITY_PATH + value: /data/config/validator-identity.json + - name: DOUBLEZERO_RPC_ENDPOINT + value: http://127.0.0.1:8899 + volumeMounts: + - name: validator-config + mountPath: /data/config + readOnly: true + - name: doublezero-config + mountPath: /root/.config/doublezero + volumes: + - name: validator-config + persistentVolumeClaim: + claimName: validator-config + - name: doublezero-config + persistentVolumeClaim: + claimName: doublezero-config diff --git a/deployment/spec.yml b/deployment/spec.yml new file mode 100644 index 00000000..bce46463 --- /dev/null +++ b/deployment/spec.yml @@ -0,0 +1,113 @@ +# Biscayne Solana Validator deployment spec +# Host: biscayne.vaasl.io (186.233.184.235) +# Identity: 4WeLUxfQghbhsLEuwaAzjZiHg2VBw87vqHc4iZrGvKPr +stack: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave +deploy-to: k8s-kind +kind-mount-root: /srv/kind +network: + http-proxy: + - host-name: biscayne.vaasl.io + routes: + - path: / + proxy-to: agave-validator:8899 + - path: / + proxy-to: agave-validator:8900 + websocket: true + ports: + agave-validator: + - '8899' + - '8900' + - '8001' + - 8001/udp + - 9000/udp + - 9001/udp + - 9002/udp + - 9003/udp + - 9004/udp + - 9005/udp + - 9006/udp + - 9007/udp + - 9008/udp + - 9009/udp + - 9010/udp + - 9011/udp + - 9012/udp + - 9013/udp + - 9014/udp + - 9015/udp + - 9016/udp + - 9017/udp + - 9018/udp + - 9019/udp + - 9020/udp + - 9021/udp + - 9022/udp + - 9023/udp + - 9024/udp + - 9025/udp +resources: + containers: + reservations: + cpus: '4.0' + memory: 256000M + limits: + cpus: '32.0' + memory: 921600M +security: + privileged: true + unlimited-memlock: true + capabilities: + - IPC_LOCK +volumes: + # Config volumes — on ZFS dataset (backed up via snapshots) + validator-config: /srv/deployments/agave/data/validator-config + doublezero-validator-identity: /srv/deployments/agave/data/validator-config + doublezero-config: /srv/deployments/agave/data/doublezero-config + # Heavy data volumes — on zvol/ramdisk (not backed up, rebuildable) + validator-ledger: /srv/kind/solana/ledger + validator-accounts: /srv/kind/solana/ramdisk/accounts + validator-snapshots: /srv/kind/solana/snapshots + validator-log: /srv/kind/solana/log + # Monitoring + monitoring-influxdb-data: /srv/kind/solana/monitoring/influxdb + monitoring-grafana-data: /srv/kind/solana/monitoring/grafana +configmaps: + monitoring-telegraf-config: config/monitoring/telegraf-config + monitoring-telegraf-scripts: config/monitoring/scripts + monitoring-grafana-datasources: config/monitoring/grafana-datasources + monitoring-grafana-dashboards: config/monitoring/grafana-dashboards +config: + # Mode: 'rpc' (non-voting) — matches current biscayne systemd config + AGAVE_MODE: rpc + # Mainnet entrypoints + VALIDATOR_ENTRYPOINT: entrypoint.mainnet-beta.solana.com:8001 + EXTRA_ENTRYPOINTS: entrypoint2.mainnet-beta.solana.com:8001 entrypoint3.mainnet-beta.solana.com:8001 entrypoint4.mainnet-beta.solana.com:8001 entrypoint5.mainnet-beta.solana.com:8001 + # Known validators (Solana Foundation, Everstake, Chorus One) + KNOWN_VALIDATOR: 7Np41oeYqPefeNQEHSv1UDhYrehxin3NStELsSKCT4K2 + EXTRA_KNOWN_VALIDATORS: GdnSyH3YtwcxFvQrVVJMm1JhTS4QVX7MFsX56uJLUfiZ dDzy5SR3AXdYWVqbDEkVFdvSPCtS9ihF5kJkHCtXoFs DE1bawNcRJB9rVm3buyMVfr8mBEoyyu73NBovf2oXJsJ CakcnaRDHka2gXyfbEd2d3xsvkJkqsLw2akB3zsN1D2S C1ocKDYMCm2ooWptMMnpd5VEB2Nx4UMJgRuYofysyzcA GwHH8ciFhR8vejWCqmg8FWZUCNtubPY2esALvy5tBvji 6WgdYhhGE53WrZ7ywJA15hBVkw7CRbQ8yDBBTwmBtAHN + # Network + RPC_PORT: '8899' + RPC_BIND_ADDRESS: 0.0.0.0 + GOSSIP_PORT: '8001' + GOSSIP_HOST: 137.239.194.65 + DYNAMIC_PORT_RANGE: 9000-10000 + # Cluster verification + EXPECTED_GENESIS_HASH: 5eykt4UsFv8P8NJdTREpY1vzqKqZKvdpKuc147dw2N9d + EXPECTED_SHRED_VERSION: '50093' + # Storage + LIMIT_LEDGER_SIZE: '50000000' + SNAPSHOT_INTERVAL_SLOTS: '1000' + MAXIMUM_SNAPSHOTS_TO_RETAIN: '5' + NO_INCREMENTAL_SNAPSHOTS: 'true' + RUST_LOG: info,solana_metrics=warn + SOLANA_METRICS_CONFIG: host=http://localhost:8086,db=agave_metrics,u=admin,p=admin + # Jito MEV (NY region shred receiver) — disabled until voting enabled + JITO_ENABLE: 'false' + JITO_BLOCK_ENGINE_URL: https://mainnet.block-engine.jito.wtf + JITO_SHRED_RECEIVER_ADDR: 141.98.216.96:1002 + JITO_TIP_PAYMENT_PROGRAM: T1pyyaTNZsKv2WcRAB8oVnk93mLJw2XzjtVYqCsaHqt + JITO_DISTRIBUTION_PROGRAM: 4R3gSG8BpU4t19KYj8CfnbtRpnT8gtk4dvTHxVRwc2r7 + JITO_MERKLE_ROOT_AUTHORITY: 8F4jGUmxF36vQ6yabnsxX6AQVXdKBhs8kGSUuRKSg8Xt + JITO_COMMISSION_BPS: '800' + # DoubleZero + DOUBLEZERO_RPC_ENDPOINT: http://127.0.0.1:8899 diff --git a/scripts/backlog.sh b/scripts/backlog.sh new file mode 100755 index 00000000..9ef79a83 --- /dev/null +++ b/scripts/backlog.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +set -Eeuo pipefail + +export PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin +export XDG_RUNTIME_DIR="/run/user/$(id -u)" +mkdir -p "$XDG_RUNTIME_DIR" + +# optional suffix from command-line, prepend dash if non-empty +SUFFIX="${1:-}" +SUFFIX="${SUFFIX:+-$SUFFIX}" + +# define variables +DATASET="biscayne/DATA/deployments" +DEPLOYMENT_DIR="/srv/deployments/agave" +LOG_FILE="$HOME/.backlog_history" +ZFS_HOLD="backlog:pending" +SERVICE_STOP_TIMEOUT="300" +SNAPSHOT_RETENTION="6" +SNAPSHOT_PREFIX="backlog" +SNAPSHOT_TAG="$(date +%Y%m%d)${SUFFIX}" +SNAPSHOT="${DATASET}@${SNAPSHOT_PREFIX}-${SNAPSHOT_TAG}" + +# remote replication targets +REMOTES=( + "mysterio:edith/DATA/backlog/biscayne-main" + "ardham:batterywharf/DATA/backlog/biscayne-main" +) + +# log functions +log() { + local time_fmt + time_fmt=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + echo "[$time_fmt] $1" >> "$LOG_FILE" +} + +log_close() { + local end_time duration + end_time=$(date +%s) + duration=$((end_time - start_time)) + log "Backlog completed in ${duration}s" + echo "" >> "$LOG_FILE" +} + +# service controls +services() { + local action="$1" + + case "$action" in + stop) + log "Stopping agave deployment..." + laconic-so deployment --dir "$DEPLOYMENT_DIR" stop + + log "Waiting for services to fully stop..." + local deadline=$(( $(date +%s) + SERVICE_STOP_TIMEOUT )) + while true; do + local running + running=$(docker ps --filter "label=com.docker.compose.project.working_dir=$DEPLOYMENT_DIR" -q 2>/dev/null | wc -l) + if [[ "$running" -eq 0 ]]; then + break + fi + if (( $(date +%s) >= deadline )); then + log "WARNING: Timeout waiting for services to stop; continuing." + break + fi + sleep 0.2 + done + ;; + start) + log "Starting agave deployment..." + laconic-so deployment --dir "$DEPLOYMENT_DIR" start + ;; + *) + log "ERROR: Unknown action '$action' in services()" + exit 2 + ;; + esac +} + +# send a snapshot to one remote +# args: snap remote_host remote_dataset +snapshot_send_one() { + local snap="$1" remote_host="$2" remote_dataset="$3" + + log "Checking remote snapshots on $remote_host..." + + local -a local_snaps remote_snaps + mapfile -t local_snaps < <(zfs list -H -t snapshot -o name -s creation -d1 "$DATASET" | grep -F "${DATASET}@${SNAPSHOT_PREFIX}-") + mapfile -t remote_snaps < <(ssh "$remote_host" zfs list -H -t snapshot -o name -s creation "$remote_dataset" | grep -F "${remote_dataset}@${SNAPSHOT_PREFIX}-" || true) + + # find latest common snapshot + local base="" + local local_snap remote_snap remote_check + for local_snap in "${local_snaps[@]}"; do + remote_snap="${local_snap/$DATASET/$remote_dataset}" + for remote_check in "${remote_snaps[@]}"; do + if [[ "$remote_check" == "$remote_snap" ]]; then + base="$local_snap" + break + fi + done + done + + if [[ -z "$base" && ${#remote_snaps[@]} -eq 0 ]]; then + log "No remote snapshots found on $remote_host — sending full snapshot." + if zfs send "$snap" | ssh "$remote_host" zfs receive -sF "$remote_dataset"; then + log "Full send to $remote_host succeeded." + return 0 + else + log "ERROR: Full send to $remote_host failed." + return 1 + fi + elif [[ -n "$base" ]]; then + log "Common base snapshot $base found — sending incremental to $remote_host." + if zfs send -i "$base" "$snap" | ssh "$remote_host" zfs receive -sF "$remote_dataset"; then + log "Incremental send to $remote_host succeeded." + return 0 + else + log "ERROR: Incremental send to $remote_host failed." + return 1 + fi + else + log "STALE DESTINATION: $remote_host has snapshots but no common base with local — skipping." + return 1 + fi +} + +# send snapshot to all remotes +snapshot_send() { + local snap="$1" + local failure_count=0 + + set +e + local entry remote_host remote_dataset + for entry in "${REMOTES[@]}"; do + remote_host="${entry%%:*}" + remote_dataset="${entry#*:}" + if ! snapshot_send_one "$snap" "$remote_host" "$remote_dataset"; then + failure_count=$((failure_count + 1)) + fi + done + set -e + + if [[ "$failure_count" -gt 0 ]]; then + log "WARNING: $failure_count destination(s) failed or are out of sync." + return 1 + fi + return 0 +} + +# snapshot management +snapshot() { + local action="$1" + + case "$action" in + create) + log "Creating snapshot: $SNAPSHOT" + zfs snapshot "$SNAPSHOT" + zfs hold "$ZFS_HOLD" "$SNAPSHOT" || log "ERROR: Failed to hold $SNAPSHOT" + ;; + send) + log "Sending snapshot $SNAPSHOT..." + if snapshot_send "$SNAPSHOT"; then + log "Snapshot send completed. Releasing hold." + zfs release "$ZFS_HOLD" "$SNAPSHOT" || log "ERROR: Failed to release hold on $SNAPSHOT" + else + log "WARNING: Snapshot send encountered errors. Hold retained on $SNAPSHOT." + fi + ;; + prune) + if [[ "$SNAPSHOT_RETENTION" -gt 0 ]]; then + log "Pruning old snapshots in $DATASET (retaining $SNAPSHOT_RETENTION destroyable snapshots)..." + + local -a all_snaps destroyable + mapfile -t all_snaps < <(zfs list -H -t snapshot -o name -s creation -d1 "$DATASET" | grep -F "${DATASET}@${SNAPSHOT_PREFIX}-") + + destroyable=() + for snap in "${all_snaps[@]}"; do + if zfs destroy -n -- "$snap" &>/dev/null; then + destroyable+=("$snap") + else + log "Skipping $snap — snapshot not destroyable (likely held)" + fi + done + + local count to_destroy + count="${#destroyable[@]}" + to_destroy=$((count - SNAPSHOT_RETENTION)) + + if [[ "$to_destroy" -le 0 ]]; then + log "Nothing to prune — only $count destroyable snapshots exist" + else + local i + for (( i=0; i> "$LOG_FILE" 2>&1 +trap 'log_close' EXIT +trap 'rc=$?; log "ERROR: command failed at line $LINENO (exit $rc)"; exit $rc' ERR + +log "Backlog Started" + +if zfs list -H -t snapshot -o name -d1 "$DATASET" | grep -qxF "$SNAPSHOT"; then + log "WARNING: Snapshot $SNAPSHOT already exists. Exiting." + exit 1 +fi + +services stop +snapshot create +services start +snapshot send +snapshot prune + +# end diff --git a/scripts/biscayne-status.py b/scripts/biscayne-status.py new file mode 100755 index 00000000..c6f26b7a --- /dev/null +++ b/scripts/biscayne-status.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +"""Biscayne agave validator status check. + +Collects and displays key health metrics: +- Slot position (local vs mainnet, gap, replay rate) +- Pod status (running, restarts, age) +- Memory usage (cgroup current vs limit, % used) +- OOM kills (recent dmesg entries) +- Shred relay (packets/sec on port 9100, shred-unwrap.py alive) +- Validator process state (from logs) +""" + +import json +import subprocess +import sys +import time + +NAMESPACE = "laconic-laconic-70ce4c4b47e23b85" +DEPLOYMENT = "laconic-70ce4c4b47e23b85-deployment" +KIND_NODE = "laconic-70ce4c4b47e23b85-control-plane" +SSH = "rix@biscayne.vaasl.io" +MAINNET_RPC = "https://api.mainnet-beta.solana.com" +LOCAL_RPC = "http://127.0.0.1:8899" + + +def ssh(cmd: str, timeout: int = 10) -> str: + try: + r = subprocess.run( + ["ssh", SSH, cmd], + capture_output=True, text=True, timeout=timeout, + ) + return r.stdout.strip() + r.stderr.strip() + except subprocess.TimeoutExpired: + return "" + + +def local(cmd: str, timeout: int = 10) -> str: + try: + r = subprocess.run( + cmd, shell=True, capture_output=True, text=True, timeout=timeout, + ) + return r.stdout.strip() + except subprocess.TimeoutExpired: + return "" + + +def rpc_call(method: str, url: str = LOCAL_RPC, remote: bool = True, params: list | None = None) -> dict | None: + payload = json.dumps({"jsonrpc": "2.0", "id": 1, "method": method, "params": params or []}) + cmd = f"curl -s {url} -X POST -H 'Content-Type: application/json' -d '{payload}'" + raw = ssh(cmd) if remote else local(cmd) + try: + return json.loads(raw) + except (json.JSONDecodeError, TypeError): + return None + + +def get_slots() -> tuple[int | None, int | None]: + local_resp = rpc_call("getSlot") + mainnet_resp = rpc_call("getSlot", MAINNET_RPC, remote=False) + local_slot = local_resp.get("result") if local_resp else None + mainnet_slot = mainnet_resp.get("result") if mainnet_resp else None + return local_slot, mainnet_slot + + +def get_health() -> str: + resp = rpc_call("getHealth") + if not resp: + return "unreachable" + if "result" in resp and resp["result"] == "ok": + return "healthy" + err = resp.get("error", {}) + msg = err.get("message", "unknown") + behind = err.get("data", {}).get("numSlotsBehind") + if behind is not None: + return f"behind {behind:,} slots" + return msg + + +def get_pod_status() -> str: + cmd = f"kubectl -n {NAMESPACE} get pods -o json" + raw = ssh(cmd, timeout=15) + try: + data = json.loads(raw) + except (json.JSONDecodeError, TypeError): + return "unknown" + items = data.get("items", []) + if not items: + return "no pods" + pod = items[0] + name = pod["metadata"]["name"].split("-")[-1] + phase = pod["status"].get("phase", "?") + containers = pod["status"].get("containerStatuses", []) + restarts = sum(c.get("restartCount", 0) for c in containers) + ready = sum(1 for c in containers if c.get("ready")) + total = len(containers) + age = pod["metadata"].get("creationTimestamp", "?") + return f"{ready}/{total} {phase} restarts={restarts} pod=..{name} created={age}" + + +def get_memory() -> str: + cmd = ( + f"docker exec {KIND_NODE} bash -c '" + "find /sys/fs/cgroup -name memory.current -path \"*burstable*\" 2>/dev/null | head -1 | " + "while read f; do " + " dir=$(dirname $f); " + " cur=$(cat $f); " + " max=$(cat $dir/memory.max 2>/dev/null || echo unknown); " + " echo $cur $max; " + "done'" + ) + raw = ssh(cmd, timeout=10) + try: + parts = raw.split() + current = int(parts[0]) + limit_str = parts[1] + cur_gb = current / (1024**3) + if limit_str == "max": + return f"{cur_gb:.0f}GB / unlimited" + limit = int(limit_str) + lim_gb = limit / (1024**3) + pct = (current / limit) * 100 + return f"{cur_gb:.0f}GB / {lim_gb:.0f}GB ({pct:.0f}%)" + except (IndexError, ValueError): + return raw or "unknown" + + +def get_oom_kills() -> str: + raw = ssh("sudo dmesg | grep -c 'oom-kill' || echo 0") + try: + count = int(raw.strip()) + except ValueError: + return "check failed" + if count == 0: + return "none" + # Get kernel uptime-relative timestamp and convert to UTC + # dmesg timestamps are seconds since boot; combine with boot time + raw = ssh( + "BOOT=$(date -d \"$(uptime -s)\" +%s); " + "KERN_TS=$(sudo dmesg | grep 'oom-kill' | tail -1 | " + " sed 's/\\[\\s*\\([0-9.]*\\)\\].*/\\1/'); " + "echo $BOOT $KERN_TS" + ) + try: + parts = raw.split() + boot_epoch = int(parts[0]) + kern_secs = float(parts[1]) + oom_epoch = boot_epoch + int(kern_secs) + from datetime import datetime, timezone + oom_utc = datetime.fromtimestamp(oom_epoch, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + return f"{count} total (last: {oom_utc})" + except (IndexError, ValueError): + return f"{count} total (timestamp parse failed)" + + +def get_relay_rate() -> str: + # Two samples 3s apart from /proc/net/snmp + cmd = ( + "T0=$(cat /proc/net/snmp | grep '^Udp:' | tail -1 | awk '{print $2}'); " + "sleep 3; " + "T1=$(cat /proc/net/snmp | grep '^Udp:' | tail -1 | awk '{print $2}'); " + "echo $T0 $T1" + ) + raw = ssh(cmd, timeout=15) + try: + parts = raw.split() + t0, t1 = int(parts[0]), int(parts[1]) + rate = (t1 - t0) / 3 + return f"{rate:,.0f} UDP dgrams/sec (all ports)" + except (IndexError, ValueError): + return raw or "unknown" + + +def get_shreds_per_sec() -> str: + """Count UDP packets on TVU port 9000 over 3 seconds using tcpdump.""" + cmd = "sudo timeout 3 tcpdump -i any udp dst port 9000 -q 2>&1 | grep -oP '\\d+(?= packets captured)'" + raw = ssh(cmd, timeout=15) + try: + count = int(raw.strip()) + rate = count / 3 + return f"{rate:,.0f} shreds/sec ({count:,} in 3s)" + except (ValueError, TypeError): + return raw or "unknown" + + +def get_unwrap_status() -> str: + raw = ssh("ps -p $(pgrep -f shred-unwrap | head -1) -o pid,etime,rss --no-headers 2>/dev/null || echo dead") + if "dead" in raw or not raw.strip(): + return "NOT RUNNING" + parts = raw.split() + if len(parts) >= 3: + pid, etime, rss_kb = parts[0], parts[1], parts[2] + rss_mb = int(rss_kb) / 1024 + return f"pid={pid} uptime={etime} rss={rss_mb:.0f}MB" + return raw + + +def get_replay_rate() -> tuple[float | None, int | None, int | None]: + """Sample processed slot twice over 10s to measure replay rate.""" + params = [{"commitment": "processed"}] + r0 = rpc_call("getSlot", params=params) + s0 = r0.get("result") if r0 else None + if s0 is None: + return None, None, None + t0 = time.monotonic() + time.sleep(10) + r1 = rpc_call("getSlot", params=params) + s1 = r1.get("result") if r1 else None + if s1 is None: + return None, s0, None + dt = time.monotonic() - t0 + rate = (s1 - s0) / dt if s1 != s0 else 0 + return rate, s0, s1 + + +def main() -> None: + print("=" * 60) + print(" BISCAYNE VALIDATOR STATUS") + print("=" * 60) + + # Health + slots + print("\n--- RPC ---") + health = get_health() + local_slot, mainnet_slot = get_slots() + print(f" Health: {health}") + if local_slot is not None: + print(f" Local slot: {local_slot:,}") + else: + print(" Local slot: unreachable") + if mainnet_slot is not None: + print(f" Mainnet slot: {mainnet_slot:,}") + if local_slot and mainnet_slot: + gap = mainnet_slot - local_slot + print(f" Gap: {gap:,} slots") + + # Replay rate (10s sample) + print("\n--- Replay ---") + print(" Sampling replay rate (10s)...", end="", flush=True) + rate, s0, s1 = get_replay_rate() + if rate is not None: + print(f"\r Replay rate: {rate:.1f} slots/sec ({s0:,} → {s1:,})") + net = rate - 2.5 + if net > 0: + print(f" Net catchup: +{net:.1f} slots/sec (gaining)") + elif net < 0: + print(f" Net catchup: {net:.1f} slots/sec (falling behind)") + else: + print(" Net catchup: 0 (keeping pace)") + else: + print("\r Replay rate: could not measure") + + # Pod + print("\n--- Pod ---") + pod = get_pod_status() + print(f" {pod}") + + # Memory + print("\n--- Memory ---") + mem = get_memory() + print(f" Cgroup: {mem}") + + # OOM + oom = get_oom_kills() + print(f" OOM kills: {oom}") + + # Relay + print("\n--- Shred Relay ---") + unwrap = get_unwrap_status() + print(f" shred-unwrap: {unwrap}") + print(" Measuring shred rate (3s)...", end="", flush=True) + shreds = get_shreds_per_sec() + print(f"\r TVU shreds: {shreds} ") + print(" Measuring UDP rate (3s)...", end="", flush=True) + relay = get_relay_rate() + print(f"\r UDP inbound: {relay} ") + + print("\n" + "=" * 60) + + +if __name__ == "__main__": + main() diff --git a/scripts/snapshot-download.py b/scripts/snapshot-download.py new file mode 100755 index 00000000..a8caddfc --- /dev/null +++ b/scripts/snapshot-download.py @@ -0,0 +1,546 @@ +#!/usr/bin/env python3 +"""Download Solana snapshots using aria2c for parallel multi-connection downloads. + +Discovers snapshot sources by querying getClusterNodes for all RPCs in the +cluster, probing each for available snapshots, benchmarking download speed, +and downloading from the fastest source using aria2c (16 connections by default). + +Based on the discovery approach from etcusr/solana-snapshot-finder but replaces +the single-connection wget download with aria2c parallel chunked downloads. + +Usage: + # Download to /srv/solana/snapshots (mainnet, 16 connections) + ./snapshot-download.py -o /srv/solana/snapshots + + # Dry run — find best source, print URL + ./snapshot-download.py --dry-run + + # Custom RPC for cluster node discovery + 32 connections + ./snapshot-download.py -r https://api.mainnet-beta.solana.com -n 32 + + # Testnet + ./snapshot-download.py -c testnet -o /data/snapshots + +Requirements: + - aria2c (apt install aria2) + - python3 >= 3.10 (stdlib only, no pip dependencies) +""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from http.client import HTTPResponse +from pathlib import Path +from typing import NoReturn +from urllib.request import Request + +log: logging.Logger = logging.getLogger("snapshot-download") + +CLUSTER_RPC: dict[str, str] = { + "mainnet-beta": "https://api.mainnet-beta.solana.com", + "testnet": "https://api.testnet.solana.com", + "devnet": "https://api.devnet.solana.com", +} + +# Snapshot filenames: +# snapshot--.tar.zst +# incremental-snapshot---.tar.zst +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) + + +@dataclass +class SnapshotSource: + """A snapshot file available from a specific RPC node.""" + + rpc_address: str + # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) + file_paths: list[str] = field(default_factory=list) + slots_diff: int = 0 + latency_ms: float = 0.0 + download_speed: float = 0.0 # bytes/sec + + +# -- JSON-RPC helpers ---------------------------------------------------------- + + +class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): + """Handler that captures redirect Location instead of following it.""" + + def redirect_request( + self, + req: Request, + fp: HTTPResponse, + code: int, + msg: str, + headers: dict[str, str], # type: ignore[override] + newurl: str, + ) -> None: + return None + + +def rpc_post(url: str, method: str, params: list[object] | None = None, + timeout: int = 25) -> object | None: + """JSON-RPC POST. Returns parsed 'result' field or None on error.""" + payload: bytes = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": method, "params": params or [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data: dict[str, object] = json.loads(resp.read()) + return data.get("result") + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: + log.debug("rpc_post %s %s failed: %s", url, method, e) + return None + + +def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: + """HEAD request without following redirects. + + Returns (Location header value, latency_sec) if the server returned a + 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. + """ + opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) + req = Request(url, method="HEAD") + try: + start: float = time.monotonic() + resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] + latency: float = time.monotonic() - start + # Non-redirect (2xx) — server didn't redirect, not useful for discovery + location: str | None = resp.headers.get("Location") + resp.close() + return location, latency + except urllib.error.HTTPError as e: + # 3xx redirects raise HTTPError with the redirect info + latency = time.monotonic() - start # type: ignore[possibly-undefined] + location = e.headers.get("Location") + if location and 300 <= e.code < 400: + return location, latency + return None, 0.0 + except (urllib.error.URLError, OSError, TimeoutError): + return None, 0.0 + + +# -- Discovery ----------------------------------------------------------------- + + +def get_current_slot(rpc_url: str) -> int | None: + """Get current slot from RPC.""" + result: object | None = rpc_post(rpc_url, "getSlot") + if isinstance(result, int): + return result + return None + + +def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: + """Get all RPC node addresses from getClusterNodes.""" + result: object | None = rpc_post(rpc_url, "getClusterNodes") + if not isinstance(result, list): + return [] + + rpc_addrs: list[str] = [] + for node in result: + if not isinstance(node, dict): + continue + if version_filter is not None: + node_version: str | None = node.get("version") + if node_version and not node_version.startswith(version_filter): + continue + rpc: str | None = node.get("rpc") + if rpc: + rpc_addrs.append(rpc) + return list(set(rpc_addrs)) + + +def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: + """Extract filename and full redirect path from Location header. + + Returns (filename, full_path). full_path includes any path prefix + the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). + """ + # Location may be absolute URL or relative path + if location.startswith("http://") or location.startswith("https://"): + # Absolute URL — extract path + from urllib.parse import urlparse + path: str = urlparse(location).path + else: + path = location + + filename: str = path.rsplit("/", 1)[-1] + return filename, path + + +def probe_rpc_snapshot( + rpc_address: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, +) -> SnapshotSource | None: + """Probe a single RPC node for available snapshots. + + Probes for full snapshot first (required), then incremental. Records all + available files. Which files to actually download is decided at download + time based on what already exists locally — not here. + + Based on the discovery approach from etcusr/solana-snapshot-finder. + """ + full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" + + # Full snapshot is required — every source must have one + full_location, full_latency = head_no_follow(full_url, timeout=2) + if not full_location: + return None + + latency_ms: float = full_latency * 1000 + if latency_ms > max_latency_ms: + return None + + full_filename, full_path = _parse_snapshot_filename(full_location) + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if not fm: + return None + + full_snap_slot: int = int(fm.group(1)) + slots_diff: int = current_slot - full_snap_slot + + if slots_diff > max_age_slots or slots_diff < -100: + return None + + file_paths: list[str] = [full_path] + + # Also check for incremental snapshot + inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if inc_location: + inc_filename, inc_path = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) + if m: + inc_base_slot: int = int(m.group(1)) + # Incremental must be based on this source's full snapshot + if inc_base_slot == full_snap_slot: + file_paths.append(inc_path) + + return SnapshotSource( + rpc_address=rpc_address, + file_paths=file_paths, + slots_diff=slots_diff, + latency_ms=latency_ms, + ) + + +def discover_sources( + rpc_url: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, + threads: int, + version_filter: str | None, +) -> list[SnapshotSource]: + """Discover all snapshot sources from the cluster.""" + rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) + if not rpc_nodes: + log.error("No RPC nodes found via getClusterNodes") + return [] + + log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) + + sources: list[SnapshotSource] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: + futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { + pool.submit( + probe_rpc_snapshot, addr, current_slot, + max_age_slots, max_latency_ms, + ): addr + for addr in rpc_nodes + } + done: int = 0 + for future in concurrent.futures.as_completed(futures): + done += 1 + if done % 200 == 0: + log.info(" probed %d/%d nodes, %d sources found", + done, len(rpc_nodes), len(sources)) + try: + result: SnapshotSource | None = future.result() + except (urllib.error.URLError, OSError, TimeoutError) as e: + log.debug("Probe failed for %s: %s", futures[future], e) + continue + if result: + sources.append(result) + + log.info("Found %d RPC nodes with suitable snapshots", len(sources)) + return sources + + +# -- Speed benchmark ----------------------------------------------------------- + + +def measure_speed(rpc_address: str, measure_time: int = 7) -> float: + """Measure download speed from an RPC node. Returns bytes/sec.""" + url: str = f"http://{rpc_address}/snapshot.tar.bz2" + req = Request(url) + try: + with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: + start: float = time.monotonic() + total: int = 0 + while True: + elapsed: float = time.monotonic() - start + if elapsed >= measure_time: + break + chunk: bytes = resp.read(81920) + if not chunk: + break + total += len(chunk) + elapsed = time.monotonic() - start + if elapsed <= 0: + return 0.0 + return total / elapsed + except (urllib.error.URLError, OSError, TimeoutError): + return 0.0 + + +# -- Download ------------------------------------------------------------------ + + +def download_aria2c( + urls: list[str], + output_dir: str, + filename: str, + connections: int = 16, +) -> bool: + """Download a file using aria2c with parallel connections. + + When multiple URLs are provided, aria2c treats them as mirrors of the + same file and distributes chunks across all of them. + """ + num_mirrors: int = len(urls) + total_splits: int = max(connections, connections * num_mirrors) + cmd: list[str] = [ + "aria2c", + "--file-allocation=none", + "--continue=true", + f"--max-connection-per-server={connections}", + f"--split={total_splits}", + "--min-split-size=50M", + # aria2c retries individual chunk connections on transient network + # errors (TCP reset, timeout). This is transport-level retry analogous + # to TCP retransmit, not application-level retry of a failed operation. + "--max-tries=5", + "--retry-wait=5", + "--timeout=60", + "--connect-timeout=10", + "--summary-interval=10", + "--console-log-level=notice", + f"--dir={output_dir}", + f"--out={filename}", + "--auto-file-renaming=false", + "--allow-overwrite=true", + *urls, + ] + + log.info("Downloading %s", filename) + log.info(" aria2c: %d connections × %d mirrors (%d splits)", + connections, num_mirrors, total_splits) + + start: float = time.monotonic() + result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) + elapsed: float = time.monotonic() - start + + if result.returncode != 0: + log.error("aria2c failed with exit code %d", result.returncode) + return False + + filepath: Path = Path(output_dir) / filename + if not filepath.exists(): + log.error("aria2c reported success but %s does not exist", filepath) + return False + + size_bytes: int = filepath.stat().st_size + size_gb: float = size_bytes / (1024 ** 3) + avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 + log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) + return True + + +# -- Main ---------------------------------------------------------------------- + + +def main() -> int: + p: argparse.ArgumentParser = argparse.ArgumentParser( + description="Download Solana snapshots with aria2c parallel downloads", + ) + p.add_argument("-o", "--output", default="/srv/solana/snapshots", + help="Snapshot output directory (default: /srv/solana/snapshots)") + p.add_argument("-c", "--cluster", default="mainnet-beta", + choices=list(CLUSTER_RPC), + help="Solana cluster (default: mainnet-beta)") + p.add_argument("-r", "--rpc", default=None, + help="RPC URL for cluster discovery (default: public RPC)") + p.add_argument("-n", "--connections", type=int, default=16, + help="aria2c connections per download (default: 16)") + p.add_argument("-t", "--threads", type=int, default=500, + help="Threads for parallel RPC probing (default: 500)") + p.add_argument("--max-snapshot-age", type=int, default=1300, + help="Max snapshot age in slots (default: 1300)") + p.add_argument("--max-latency", type=float, default=100, + help="Max RPC probe latency in ms (default: 100)") + p.add_argument("--min-download-speed", type=int, default=20, + help="Min download speed in MiB/s (default: 20)") + p.add_argument("--measurement-time", type=int, default=7, + help="Speed measurement duration in seconds (default: 7)") + p.add_argument("--max-speed-checks", type=int, default=15, + help="Max nodes to benchmark before giving up (default: 15)") + p.add_argument("--version", default=None, + help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--full-only", action="store_true", + help="Download only full snapshot, skip incremental") + p.add_argument("--dry-run", action="store_true", + help="Find best source and print URL, don't download") + p.add_argument("-v", "--verbose", action="store_true") + args: argparse.Namespace = p.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + ) + + rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] + + # aria2c is required for actual downloads (not dry-run) + if not args.dry_run and not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return 1 + + # Get current slot + log.info("Cluster: %s | RPC: %s", args.cluster, rpc_url) + current_slot: int | None = get_current_slot(rpc_url) + if current_slot is None: + log.error("Cannot get current slot from %s", rpc_url) + return 1 + log.info("Current slot: %d", current_slot) + + # Discover sources + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=args.max_snapshot_age, + max_latency_ms=args.max_latency, + threads=args.threads, + version_filter=args.version, + ) + if not sources: + log.error("No snapshot sources found") + return 1 + + # Sort by latency (lowest first) for speed benchmarking + sources.sort(key=lambda s: s.latency_ms) + + # Benchmark top candidates — all speeds in MiB/s (binary, 1 MiB = 1048576 bytes) + log.info("Benchmarking download speed on top %d sources...", args.max_speed_checks) + fast_sources: list[SnapshotSource] = [] + checked: int = 0 + min_speed_bytes: int = args.min_download_speed * 1024 * 1024 # MiB to bytes + + for source in sources: + if checked >= args.max_speed_checks: + break + checked += 1 + + speed: float = measure_speed(source.rpc_address, args.measurement_time) + source.download_speed = speed + speed_mib: float = speed / (1024 ** 2) + + if speed < min_speed_bytes: + log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", + source.rpc_address, speed_mib, args.min_download_speed) + continue + + log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", + source.rpc_address, speed_mib, + source.latency_ms, source.slots_diff) + fast_sources.append(source) + + if not fast_sources: + log.error("No source met minimum speed requirement (%d MiB/s)", + args.min_download_speed) + log.info("Try: --min-download-speed 10") + return 1 + + # Use the fastest source as primary, collect mirrors for each file + best: SnapshotSource = fast_sources[0] + file_paths: list[str] = best.file_paths + if args.full_only: + file_paths = [fp for fp in file_paths + if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + + # Build mirror URL lists: for each file, collect URLs from all fast sources + # that serve the same filename + download_plan: list[tuple[str, list[str]]] = [] + for fp in file_paths: + filename: str = fp.rsplit("/", 1)[-1] + mirror_urls: list[str] = [f"http://{best.rpc_address}{fp}"] + for other in fast_sources[1:]: + for other_fp in other.file_paths: + if other_fp.rsplit("/", 1)[-1] == filename: + mirror_urls.append(f"http://{other.rpc_address}{other_fp}") + break + download_plan.append((filename, mirror_urls)) + + speed_mib: float = best.download_speed / (1024 ** 2) + log.info("Best source: %s (%.1f MiB/s), %d mirrors total", + best.rpc_address, speed_mib, len(fast_sources)) + for filename, mirror_urls in download_plan: + log.info(" %s (%d mirrors)", filename, len(mirror_urls)) + for url in mirror_urls: + log.info(" %s", url) + + if args.dry_run: + for _, mirror_urls in download_plan: + for url in mirror_urls: + print(url) + return 0 + + # Download — skip files that already exist locally + os.makedirs(args.output, exist_ok=True) + total_start: float = time.monotonic() + + for filename, mirror_urls in download_plan: + filepath: Path = Path(args.output) / filename + if filepath.exists() and filepath.stat().st_size > 0: + log.info("Skipping %s (already exists: %.1f GB)", + filename, filepath.stat().st_size / (1024 ** 3)) + continue + if not download_aria2c(mirror_urls, args.output, filename, args.connections): + log.error("Failed to download %s", filename) + return 1 + + total_elapsed: float = time.monotonic() - total_start + log.info("All downloads complete in %.0fs", total_elapsed) + for filename, _ in download_plan: + fp: Path = Path(args.output) / filename + if fp.exists(): + log.info(" %s (%.1f GB)", fp.name, fp.stat().st_size / (1024 ** 3)) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/zfs-setup.md b/scripts/zfs-setup.md new file mode 100644 index 00000000..c62042f9 --- /dev/null +++ b/scripts/zfs-setup.md @@ -0,0 +1,109 @@ +# ZFS Setup for Biscayne + +## Current State + +``` +biscayne none (pool root) +biscayne/DATA none +biscayne/DATA/home /home 42G +biscayne/DATA/home/solana /home/solana 2.9G +biscayne/DATA/srv /srv 712G +biscayne/DATA/srv/backups /srv/backups 208G +biscayne/DATA/volumes/solana (zvol, 4T) → block-mounted at /srv/solana +``` + +Docker root: `/var/lib/docker` on root filesystem (`/dev/md0`, 439G). + +## Target State + +``` +biscayne/DATA/deployments /srv/deployments ← laconic-so deployment dirs (snapshotted) +biscayne/DATA/var/docker /var/lib/docker ← docker storage on ZFS +biscayne/DATA/volumes/solana (zvol, 4T) ← bulk solana data (not backed up) +``` + +## Steps + +### 1. Create deployments dataset + +```bash +zfs create -o mountpoint=/srv/deployments biscayne/DATA/deployments +``` + +### 2. Move docker onto ZFS + +Stop docker and all containers first: + +```bash +systemctl stop docker.socket docker.service +``` + +Create the dataset: + +```bash +zfs create -o mountpoint=/var/lib/docker biscayne/DATA/var +zfs create biscayne/DATA/var/docker +``` + +Copy existing docker data (if any worth keeping): + +```bash +rsync -aHAX /var/lib/docker.bak/ /var/lib/docker/ +``` + +Or just start fresh — the only running containers are telegraf/influxdb monitoring +which can be recreated. + +Start docker: + +```bash +systemctl start docker.service +``` + +### 3. Grant ZFS permissions to the backup user + +```bash +zfs allow -u destroy,snapshot,send,hold,release,mount biscayne/DATA/deployments +``` + +### 4. Create remote receiving datasets + +On mysterio: + +```bash +zfs create -p edith/DATA/backlog/biscayne-main +``` + +On ardham: + +```bash +zfs create -p batterywharf/DATA/backlog/biscayne-main +``` + +These will fail until SSH keys and network access are configured for biscayne +to reach these hosts. The backup script handles this gracefully. + +### 5. Install backlog.sh and crontab + +```bash +mkdir -p ~/.local/bin +cp scripts/backlog.sh ~/.local/bin/backlog.sh +chmod +x ~/.local/bin/backlog.sh +crontab -e +# Add: 01 0 * * * /home//.local/bin/backlog.sh +``` + +## Volume Layout + +laconic-so deployment at `/srv/deployments/agave/`: + +| Volume | Location | Backed up | +|---|---|---| +| validator-config | `/srv/deployments/agave/data/validator-config/` | Yes (ZFS snapshot) | +| doublezero-config | `/srv/deployments/agave/data/doublezero-config/` | Yes (ZFS snapshot) | +| validator-ledger | `/srv/solana/ledger/` (zvol) | No (rebuildable) | +| validator-accounts | `/srv/solana/accounts/` (zvol) | No (rebuildable) | +| validator-snapshots | `/srv/solana/snapshots/` (zvol) | No (rebuildable) | + +The laconic-so spec.yml must map the heavy volumes to zvol paths and the small +config volumes to the deployment directory. diff --git a/stack-orchestrator/compose/docker-compose-agave-rpc.yml b/stack-orchestrator/compose/docker-compose-agave-rpc.yml new file mode 100644 index 00000000..46f10cb9 --- /dev/null +++ b/stack-orchestrator/compose/docker-compose-agave-rpc.yml @@ -0,0 +1,112 @@ +services: + agave-rpc: + restart: unless-stopped + image: laconicnetwork/agave:local + network_mode: host + privileged: true + cap_add: + - IPC_LOCK + # Compose owns all defaults. spec.yml overrides per-deployment. + environment: + AGAVE_MODE: rpc + # Required — no defaults + VALIDATOR_ENTRYPOINT: ${VALIDATOR_ENTRYPOINT} + KNOWN_VALIDATOR: ${KNOWN_VALIDATOR} + # Optional with defaults + EXTRA_ENTRYPOINTS: ${EXTRA_ENTRYPOINTS:-} + EXTRA_KNOWN_VALIDATORS: ${EXTRA_KNOWN_VALIDATORS:-} + RPC_PORT: ${RPC_PORT:-8899} + RPC_BIND_ADDRESS: ${RPC_BIND_ADDRESS:-127.0.0.1} + GOSSIP_PORT: ${GOSSIP_PORT:-8001} + DYNAMIC_PORT_RANGE: ${DYNAMIC_PORT_RANGE:-9000-10000} + EXPECTED_GENESIS_HASH: ${EXPECTED_GENESIS_HASH:-} + EXPECTED_SHRED_VERSION: ${EXPECTED_SHRED_VERSION:-} + LIMIT_LEDGER_SIZE: ${LIMIT_LEDGER_SIZE:-50000000} + NO_SNAPSHOTS: ${NO_SNAPSHOTS:-false} + SNAPSHOT_INTERVAL_SLOTS: ${SNAPSHOT_INTERVAL_SLOTS:-100000} + MAXIMUM_SNAPSHOTS_TO_RETAIN: ${MAXIMUM_SNAPSHOTS_TO_RETAIN:-1} + NO_INCREMENTAL_SNAPSHOTS: ${NO_INCREMENTAL_SNAPSHOTS:-false} + ACCOUNT_INDEXES: ${ACCOUNT_INDEXES:-} + PUBLIC_RPC_ADDRESS: ${PUBLIC_RPC_ADDRESS:-} + GOSSIP_HOST: ${GOSSIP_HOST:-} + PUBLIC_TVU_ADDRESS: ${PUBLIC_TVU_ADDRESS:-} + RUST_LOG: ${RUST_LOG:-info} + SOLANA_METRICS_CONFIG: ${SOLANA_METRICS_CONFIG:-} + JITO_ENABLE: ${JITO_ENABLE:-false} + JITO_BLOCK_ENGINE_URL: ${JITO_BLOCK_ENGINE_URL:-} + JITO_SHRED_RECEIVER_ADDR: ${JITO_SHRED_RECEIVER_ADDR:-} + JITO_TIP_PAYMENT_PROGRAM: ${JITO_TIP_PAYMENT_PROGRAM:-} + JITO_DISTRIBUTION_PROGRAM: ${JITO_DISTRIBUTION_PROGRAM:-} + JITO_MERKLE_ROOT_AUTHORITY: ${JITO_MERKLE_ROOT_AUTHORITY:-} + JITO_COMMISSION_BPS: ${JITO_COMMISSION_BPS:-0} + EXTRA_ARGS: ${EXTRA_ARGS:-} + SNAPSHOT_AUTO_DOWNLOAD: ${SNAPSHOT_AUTO_DOWNLOAD:-true} + SNAPSHOT_MAX_AGE_SLOTS: ${SNAPSHOT_MAX_AGE_SLOTS:-20000} + PROBE_GRACE_SECONDS: ${PROBE_GRACE_SECONDS:-600} + PROBE_MAX_SLOT_LAG: ${PROBE_MAX_SLOT_LAG:-20000} + deploy: + resources: + reservations: + cpus: '4.0' + memory: 256000M + limits: + cpus: '32.0' + memory: 921600M + volumes: + - rpc-config:/data/config + - rpc-ledger:/data/ledger + - rpc-accounts:/data/accounts + - rpc-snapshots:/data/snapshots + ports: + # RPC ports + - "8899" + - "8900" + # Gossip port + - "8001" + - "8001/udp" + # Dynamic port range for TPU/TVU/repair (9000-9025, 26 ports) + - "9000/udp" + - "9001/udp" + - "9002/udp" + - "9003/udp" + - "9004/udp" + - "9005/udp" + - "9006/udp" + - "9007/udp" + - "9008/udp" + - "9009/udp" + - "9010/udp" + - "9011/udp" + - "9012/udp" + - "9013/udp" + - "9014/udp" + - "9015/udp" + - "9016/udp" + - "9017/udp" + - "9018/udp" + - "9019/udp" + - "9020/udp" + - "9021/udp" + - "9022/udp" + - "9023/udp" + - "9024/udp" + - "9025/udp" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 1000000 + hard: 1000000 + healthcheck: + test: ["CMD", "entrypoint.py", "probe"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 600s + +volumes: + rpc-config: + rpc-ledger: + rpc-accounts: + rpc-snapshots: diff --git a/stack-orchestrator/compose/docker-compose-agave-test.yml b/stack-orchestrator/compose/docker-compose-agave-test.yml new file mode 100644 index 00000000..7981d8e3 --- /dev/null +++ b/stack-orchestrator/compose/docker-compose-agave-test.yml @@ -0,0 +1,27 @@ +services: + agave-test: + restart: unless-stopped + image: laconicnetwork/agave:local + security_opt: + - seccomp=unconfined + environment: + AGAVE_MODE: test + FACILITATOR_PUBKEY: ${FACILITATOR_PUBKEY:-} + SERVER_PUBKEY: ${SERVER_PUBKEY:-} + CLIENT_PUBKEY: ${CLIENT_PUBKEY:-} + MINT_DECIMALS: ${MINT_DECIMALS:-6} + MINT_AMOUNT: ${MINT_AMOUNT:-1000000000} + volumes: + - test-ledger:/data/ledger + ports: + - "8899" + - "8900" + healthcheck: + test: ["CMD", "solana", "cluster-version", "--url", "http://127.0.0.1:8899"] + interval: 5s + timeout: 5s + retries: 30 + start_period: 10s + +volumes: + test-ledger: diff --git a/stack-orchestrator/compose/docker-compose-agave.yml b/stack-orchestrator/compose/docker-compose-agave.yml new file mode 100644 index 00000000..47a68921 --- /dev/null +++ b/stack-orchestrator/compose/docker-compose-agave.yml @@ -0,0 +1,115 @@ +services: + agave-validator: + restart: unless-stopped + image: laconicnetwork/agave:local + network_mode: host + privileged: true + cap_add: + - IPC_LOCK + # Compose owns all defaults. spec.yml overrides per-deployment. + environment: + AGAVE_MODE: ${AGAVE_MODE:-validator} + # Required — no defaults + VALIDATOR_ENTRYPOINT: ${VALIDATOR_ENTRYPOINT} + KNOWN_VALIDATOR: ${KNOWN_VALIDATOR} + # Optional with defaults + EXTRA_ENTRYPOINTS: ${EXTRA_ENTRYPOINTS:-} + EXTRA_KNOWN_VALIDATORS: ${EXTRA_KNOWN_VALIDATORS:-} + RPC_PORT: ${RPC_PORT:-8899} + RPC_BIND_ADDRESS: ${RPC_BIND_ADDRESS:-127.0.0.1} + GOSSIP_PORT: ${GOSSIP_PORT:-8001} + DYNAMIC_PORT_RANGE: ${DYNAMIC_PORT_RANGE:-9000-10000} + EXPECTED_GENESIS_HASH: ${EXPECTED_GENESIS_HASH:-} + EXPECTED_SHRED_VERSION: ${EXPECTED_SHRED_VERSION:-} + LIMIT_LEDGER_SIZE: ${LIMIT_LEDGER_SIZE:-50000000} + NO_SNAPSHOTS: ${NO_SNAPSHOTS:-false} + SNAPSHOT_INTERVAL_SLOTS: ${SNAPSHOT_INTERVAL_SLOTS:-100000} + MAXIMUM_SNAPSHOTS_TO_RETAIN: ${MAXIMUM_SNAPSHOTS_TO_RETAIN:-1} + NO_INCREMENTAL_SNAPSHOTS: ${NO_INCREMENTAL_SNAPSHOTS:-false} + ACCOUNT_INDEXES: ${ACCOUNT_INDEXES:-} + VOTE_ACCOUNT_KEYPAIR: ${VOTE_ACCOUNT_KEYPAIR:-/data/config/vote-account-keypair.json} + GOSSIP_HOST: ${GOSSIP_HOST:-} + PUBLIC_TVU_ADDRESS: ${PUBLIC_TVU_ADDRESS:-} + RUST_LOG: ${RUST_LOG:-info} + SOLANA_METRICS_CONFIG: ${SOLANA_METRICS_CONFIG:-} + JITO_ENABLE: ${JITO_ENABLE:-false} + JITO_BLOCK_ENGINE_URL: ${JITO_BLOCK_ENGINE_URL:-} + JITO_RELAYER_URL: ${JITO_RELAYER_URL:-} + JITO_SHRED_RECEIVER_ADDR: ${JITO_SHRED_RECEIVER_ADDR:-} + JITO_TIP_PAYMENT_PROGRAM: ${JITO_TIP_PAYMENT_PROGRAM:-} + JITO_DISTRIBUTION_PROGRAM: ${JITO_DISTRIBUTION_PROGRAM:-} + JITO_MERKLE_ROOT_AUTHORITY: ${JITO_MERKLE_ROOT_AUTHORITY:-} + JITO_COMMISSION_BPS: ${JITO_COMMISSION_BPS:-0} + EXTRA_ARGS: ${EXTRA_ARGS:-} + SNAPSHOT_AUTO_DOWNLOAD: ${SNAPSHOT_AUTO_DOWNLOAD:-true} + SNAPSHOT_MAX_AGE_SLOTS: ${SNAPSHOT_MAX_AGE_SLOTS:-20000} + PROBE_GRACE_SECONDS: ${PROBE_GRACE_SECONDS:-600} + PROBE_MAX_SLOT_LAG: ${PROBE_MAX_SLOT_LAG:-20000} + deploy: + resources: + reservations: + cpus: '4.0' + memory: 256000M + limits: + cpus: '32.0' + memory: 921600M + volumes: + - validator-config:/data/config + - validator-ledger:/data/ledger + - validator-accounts:/data/accounts + - validator-snapshots:/data/snapshots + - validator-log:/data/log + ports: + # RPC ports + - "8899" + - "8900" + # Gossip port + - "8001" + - "8001/udp" + # Dynamic port range for TPU/TVU/repair (9000-9025, 26 ports) + - "9000/udp" + - "9001/udp" + - "9002/udp" + - "9003/udp" + - "9004/udp" + - "9005/udp" + - "9006/udp" + - "9007/udp" + - "9008/udp" + - "9009/udp" + - "9010/udp" + - "9011/udp" + - "9012/udp" + - "9013/udp" + - "9014/udp" + - "9015/udp" + - "9016/udp" + - "9017/udp" + - "9018/udp" + - "9019/udp" + - "9020/udp" + - "9021/udp" + - "9022/udp" + - "9023/udp" + - "9024/udp" + - "9025/udp" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 1000000 + hard: 1000000 + healthcheck: + test: ["CMD", "entrypoint.py", "probe"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 600s + +volumes: + validator-config: + validator-ledger: + validator-accounts: + validator-snapshots: + validator-log: diff --git a/stack-orchestrator/compose/docker-compose-doublezero.yml b/stack-orchestrator/compose/docker-compose-doublezero.yml new file mode 100644 index 00000000..656d5ecd --- /dev/null +++ b/stack-orchestrator/compose/docker-compose-doublezero.yml @@ -0,0 +1,19 @@ +services: + doublezerod: + restart: unless-stopped + image: laconicnetwork/doublezero:local + network_mode: host + privileged: true + cap_add: + - NET_ADMIN + environment: + DOUBLEZERO_RPC_ENDPOINT: ${DOUBLEZERO_RPC_ENDPOINT:-http://127.0.0.1:8899} + DOUBLEZERO_ENV: ${DOUBLEZERO_ENV:-mainnet-beta} + DOUBLEZERO_EXTRA_ARGS: ${DOUBLEZERO_EXTRA_ARGS:-} + volumes: + - doublezero-validator-identity:/data/config:ro + - doublezero-config:/root/.config/doublezero + +volumes: + doublezero-validator-identity: + doublezero-config: diff --git a/stack-orchestrator/compose/docker-compose-monitoring.yml b/stack-orchestrator/compose/docker-compose-monitoring.yml new file mode 100644 index 00000000..0828c37c --- /dev/null +++ b/stack-orchestrator/compose/docker-compose-monitoring.yml @@ -0,0 +1,49 @@ +services: + monitoring-influxdb: + image: influxdb:1.8 + restart: unless-stopped + environment: + INFLUXDB_DB: agave_metrics + INFLUXDB_HTTP_AUTH_ENABLED: "true" + INFLUXDB_ADMIN_USER: admin + INFLUXDB_ADMIN_PASSWORD: admin + INFLUXDB_REPORTING_DISABLED: "true" + volumes: + - monitoring-influxdb-data:/var/lib/influxdb + ports: + - "8086" + + monitoring-grafana: + image: grafana/grafana:latest + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_SECURITY_ADMIN_USER: admin + GF_USERS_ALLOW_SIGN_UP: "false" + GF_PATHS_DATA: /var/lib/grafana + volumes: + - monitoring-grafana-data:/var/lib/grafana + - monitoring-grafana-datasources:/etc/grafana/provisioning/datasources:ro + - monitoring-grafana-dashboards:/etc/grafana/provisioning/dashboards:ro + ports: + - "3000" + + monitoring-telegraf: + image: telegraf:1.36 + restart: unless-stopped + network_mode: host + environment: + NODE_RPC_URL: ${NODE_RPC_URL:-http://localhost:8899} + CANONICAL_RPC_URL: ${CANONICAL_RPC_URL:-https://api.mainnet-beta.solana.com} + INFLUXDB_URL: ${INFLUXDB_URL:-http://localhost:8086} + volumes: + - monitoring-telegraf-config:/etc/telegraf:ro + - monitoring-telegraf-scripts:/scripts:ro + +volumes: + monitoring-influxdb-data: + monitoring-grafana-data: + monitoring-grafana-datasources: + monitoring-grafana-dashboards: + monitoring-telegraf-config: + monitoring-telegraf-scripts: diff --git a/stack-orchestrator/config/agave/restart-node.sh b/stack-orchestrator/config/agave/restart-node.sh new file mode 100644 index 00000000..ad2f2df3 --- /dev/null +++ b/stack-orchestrator/config/agave/restart-node.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Restart a container by label filter +# Used by the cron-based restarter sidecar +label_filter="$1" +container=$(docker ps -qf "label=$label_filter") +if [ -n "$container" ]; then + docker restart -s TERM "$container" > /dev/null +fi diff --git a/stack-orchestrator/config/agave/restart.cron b/stack-orchestrator/config/agave/restart.cron new file mode 100644 index 00000000..c853e921 --- /dev/null +++ b/stack-orchestrator/config/agave/restart.cron @@ -0,0 +1,4 @@ +# Restart validator every 4 hours (mitigate memory leaks) +0 */4 * * * /scripts/restart-node.sh role=validator +# Restart RPC every 6 hours (staggered from validator) +30 */6 * * * /scripts/restart-node.sh role=rpc diff --git a/stack-orchestrator/config/monitoring/grafana-dashboards/agave-indexing.json b/stack-orchestrator/config/monitoring/grafana-dashboards/agave-indexing.json new file mode 100644 index 00000000..ae77b9e0 --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-dashboards/agave-indexing.json @@ -0,0 +1,3941 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": 0 + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "green", + "value": 20 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 10, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "memory-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "avail_percent" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Memory Available %", + "type": "gauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 6, + "y": 0 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "memory-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "used_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Used" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "memory-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "cached_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Cached" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "memory-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "buffers_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Buffers" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "System Memory Usage", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 4 + }, + "id": 11, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto", + "text": {} + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "memory-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "used_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Memory Used", + "type": "gauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_store_timings", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "update_index" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Index Update Latency", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "bank-accounts_lt_hash", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "inspect_account_lookup_ns" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Lookup" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Account Lookup Latency", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_cache_size", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "total_size" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Cache" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "estimate_mem_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Index" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "AccountsDB Memory", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "bank-accounts_lt_hash", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_cache_misses" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Cache Misses", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "cf_name" + ], + "type": "tag" + } + ], + "measurement": "blockstore_rocksdb_cfs", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "block_cache_usage" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "RocksDB Block Cache Usage", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "cf_name" + ], + "type": "tag" + } + ], + "measurement": "blockstore_rocksdb_cfs", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "size_all_mem_tables" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "cf_name" + ], + "type": "tag" + } + ], + "measurement": "blockstore_rocksdb_cfs", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "estimate_table_readers_mem" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "RocksDB Memtables + Table Readers", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "disk_index_index_file_size" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Index Files" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "disk_index_data_file_size" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Data Files Size" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "disk_index_data_file_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Data Files Count" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Disk Index Size", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count_in_mem" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "In Memory" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Total" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Accounts Index Entry Count", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "ms_per_age" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Index Flush: Age Progression Speed", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "flush_entries_evicted_from_mem" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Evicted" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "failed_to_evict" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Failed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Index Flush: Evictions", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_active", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "flush" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + }, + { + "params": [ + "Flush" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_active", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "clean" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + }, + { + "params": [ + "Clean" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_active", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "shrink" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + }, + { + "params": [ + "Shrink" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_active", + "orderByTime": "ASC", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "hash" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + }, + { + "params": [ + "Hash" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "AccountsDB Active Operations (blocking cache flush)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "load_disk_found_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Disk Reads" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Index Flush: Disk Index Activity", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db-flush_accounts_cache", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_accounts_flushed" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Accounts Flushed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db-flush_accounts_cache", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_cleaned_roots_flushed" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Roots Flushed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Cache Flush: Progress", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 380000000 + }, + { + "color": "red", + "value": 400000000 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db_store_timings", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "read_only_accounts_cache_data_size" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Size" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Read-Only Accounts Cache (400MB limit)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "shrink_candidate_slots", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "candidates_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Candidates" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "shrink_candidate_slots", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "selected_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Selected" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "shrink_candidate_slots", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "deferred_to_next_round_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Deferred" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Shrink Candidate Selection", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "hide": false, + "measurement": "accounts_index_roots_len", + "orderByTime": "ASC", + "policy": "default", + "refId": "Open", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "append_vecs_open" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Open" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index_roots_len", + "orderByTime": "ASC", + "policy": "default", + "refId": "File I/O", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "append_vecs_open_as_file_io" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "File I/O" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_index_roots_len", + "orderByTime": "ASC", + "policy": "default", + "refId": "Dirty", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "append_vecs_dirty" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Dirty (need flush)" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "hide": false, + "measurement": "accounts_index_roots_len", + "orderByTime": "ASC", + "policy": "default", + "refId": "mmap'd", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "append_vecs_open_as_mmap" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "mmap'd" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Append Vecs", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "construct_candidates_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Construct Candidates" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "accounts_scan" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Accounts Scan" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "calc_deps" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Calc Deps" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "reclaims" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Reclaims" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Clean Accounts: Timing Breakdown", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 72 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db-flush_accounts_cache", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "unflushable_unrooted_slot_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Unflushable Slots" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db-flush_accounts_cache", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "excess_slot_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Excess Slots (can flush)" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Cache Flush: Slot Status", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 80 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "useful_keys" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Keys Cleaned" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "total_keys_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Candidates" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "clean_accounts", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "slots_cleaned" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Slots Cleaned" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Clean Accounts: Progress", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 80 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "accounts_db-flush_accounts_cache", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "account_bytes_flushed" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Bytes Flushed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Cache Flush: Throughput", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [ + "agave", + "indexing" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "includeAll": false, + "label": "Node", + "multi": false, + "name": "node_name", + "options": [], + "query": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "includeAll": false, + "label": "Validator ID", + "multi": false, + "name": "node", + "options": [], + "query": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Agave Indexing", + "uid": "agave-indexing", + "version": 17 +} diff --git a/stack-orchestrator/config/monitoring/grafana-dashboards/agave-transactions.json b/stack-orchestrator/config/monitoring/grafana-dashboards/agave-transactions.json new file mode 100644 index 00000000..fc684687 --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-dashboards/agave-transactions.json @@ -0,0 +1,1985 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "transactions_attempted_processing_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Attempted" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Rate (Attempted Processing)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "committed_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Committed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "committed_transactions_with_successful_result_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Successful" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Commits (Total vs Successful)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "retryable_errored_transaction_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Retryable Errors" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "nonretryable_errored_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Non-retryable Errors" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "executed_transactions_failed_commit_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Failed Commit" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Failures", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "committed_transactions_with_successful_result_count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "Succeeded" + ], + "type": "alias" + } + ], + [ + { + "params": [ + "transactions_attempted_processing_count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "Attempted" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Success Rate", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Success Rate", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "banking_stage-vote_slot_packet_counts.Succeeded" + } + }, + "operator": "/", + "right": { + "matcher": { + "id": "byName", + "options": "banking_stage-vote_slot_packet_counts.Attempted" + } + } + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": true + } + } + ], + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "account_lock_throttled_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Account Lock Throttled" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "cost_model_throttled_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Cost Model Throttled" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "account_locks_limit_throttled_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Account Locks Limit Throttled" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Throttling", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "id" + ], + "type": "tag" + } + ], + "measurement": "banking_stage_worker_timing", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "load_execute_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Execution Time (per worker)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "id" + ], + "type": "tag" + } + ], + "measurement": "banking_stage_worker_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "processed_transactions_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transactions Processed (per worker)", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "id" + ], + "type": "tag" + } + ], + "measurement": "banking_stage_worker_timing", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "freeze_lock_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Freeze Lock" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "id" + ], + "type": "tag" + } + ], + "measurement": "banking_stage_worker_timing", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "record_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Record" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "id" + ], + "type": "tag" + } + ], + "measurement": "banking_stage_worker_timing", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "commit_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Commit" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Transaction Processing Time Breakdown", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_transaction_errors", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "account_in_use" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Account In Use" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_transaction_errors", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "blockhash_not_found" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Blockhash Not Found" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_transaction_errors", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "blockhash_too_old" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Blockhash Too Old" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_transaction_errors", + "orderByTime": "ASC", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "already_processed" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Already Processed" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_transaction_errors", + "orderByTime": "ASC", + "policy": "default", + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "instruction_error" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Instruction Error" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Top Transaction Error Types", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "total_new_valid_packets" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Valid Packets" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "newly_buffered_packets_count" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Buffered" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "banking_stage-vote_slot_packet_counts", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "end_of_slot_unprocessed_buffer_len" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "Unprocessed Buffer" + ], + "type": "alias" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Packet Buffer Status", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [ + "agave", + "transactions" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "includeAll": false, + "label": "Node", + "multi": false, + "name": "node_name", + "options": [], + "query": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "includeAll": false, + "label": "Validator ID", + "multi": false, + "name": "node", + "options": [], + "query": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Agave Transactions", + "uid": "agave-transactions", + "version": 1 +} diff --git a/stack-orchestrator/config/monitoring/grafana-dashboards/dashboards.yml b/stack-orchestrator/config/monitoring/grafana-dashboards/dashboards.yml new file mode 100644 index 00000000..89c4b2ad --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/provisioning/dashboards + foldersFromFilesStructure: false diff --git a/stack-orchestrator/config/monitoring/grafana-dashboards/sync-status.json b/stack-orchestrator/config/monitoring/grafana-dashboards/sync-status.json new file mode 100644 index 00000000..b5d640f4 --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-dashboards/sync-status.json @@ -0,0 +1,2183 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Slots Behind", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Current slot", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "slot" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Canonical slot", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "canonical_slot", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "slot" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + } + ] + ], + "tags": [] + } + ], + "title": "Slot Lag", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Slot difference vs cluster", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "Canonical slot" + } + }, + "operator": "-", + "right": { + "matcher": { + "id": "byName", + "options": "Current slot" + } + } + }, + "mode": "binary", + "reduce": { + "reducer": "mean" + }, + "replaceFields": false + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Canonical slot": true, + "Current slot": true, + "Time": false + }, + "includeByName": {}, + "indexByName": {}, + "orderByMode": "manual", + "renameByName": {} + } + } + ], + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Shreds/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Repaired shreds/sec", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "slot_stats_tracking_complete", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_repaired" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Recovered shreds/sec (FEC)", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "slot_stats_tracking_complete", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_recovered" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Shred Reception Rate", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Slots per second", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Slots/second", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "slot" + ], + "type": "field" + }, + { + "params": [], + "type": "max" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Replay Speed", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Completed Slots/sec", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "shred_insert_is_full", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "slot" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Slot Completion Rate", + "transformations": [], + "type": "stat" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cache hit %" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "percent" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Shreds dropped", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "retransmit-stage", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_shreds_dropped_xdp_full" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Small batches", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "retransmit-stage", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_small_batches" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Retransmit Pipeline Health", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Time (ms)", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Fetch entries", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "fetch_entries_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "PoH verification", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "entry_poh_verification_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "TX verification", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "entry_transaction_verification_time" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Execute batches", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "replay-slot-stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "execute_batches_us" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "/ 1000" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Replay Pipeline Breakdown", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Disk Queue" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 50 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "CPU %", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "cpu", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "usage_idle" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + " * -1 + 100" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "cpu", + "operator": "=", + "value": "cpu-total" + } + ] + }, + { + "alias": "Memory %", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "mem", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "used_percent" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Disk Queue", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "diskio", + "orderByTime": "ASC", + "policy": "default", + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "iops_in_progress" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "System Resource Pressure", + "transformations": [], + "type": "stat" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "FEC Recovery %", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 30 + } + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "recovered", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "slot_stats_tracking_complete", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_recovered" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "total_shreds", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "slot_stats_tracking_complete", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "last_index" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "FEC Recovery Ratio", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "FEC Recovery %", + "binary": { + "left": "recovered", + "operator": "/", + "right": "total_shreds" + }, + "mode": "binary" + } + }, + { + "id": "calculateField", + "options": { + "alias": "FEC Recovery %", + "binary": { + "left": "FEC Recovery %", + "operator": "*", + "right": "100" + }, + "mode": "binary", + "replaceFields": true + } + } + ], + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "green", + "value": 500 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 36 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Active Peers", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "cluster_info_stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_nodes" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Active Peer Count", + "type": "stat" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Peers", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Stale Peers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dead Peers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 0, + "y": 42 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Stale Peers", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "cluster_nodes_retransmit", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_nodes_stale" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Dead Peers", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "cluster_nodes_retransmit", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "num_nodes_dead" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Peer Health", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Messages/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 9, + "y": 42 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Push messages/sec", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "cluster_info_stats2", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "new_push_requests" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Pull requests/sec", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "cluster_info_stats3", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "new_pull_requests" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Gossip Message Rate", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 42 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "TVU Peers", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "cluster_info_stats", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "tvu_peers" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "Repair Peers", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "cluster_info_stats2", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "repair_peers" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Critical Peer Types", + "type": "stat" + } + ], + "schemaVersion": 39, + "tags": [ + "solana", + "sync" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "includeAll": false, + "label": "Node", + "multi": false, + "name": "node_name", + "options": [], + "query": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "includeAll": false, + "label": "Validator ID", + "multi": false, + "name": "node", + "options": [], + "query": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Solana Sync Status", + "uid": "sync-status", + "version": 1, + "weekStart": "" +} diff --git a/stack-orchestrator/config/monitoring/grafana-dashboards/system-overview.json b/stack-orchestrator/config/monitoring/grafana-dashboards/system-overview.json new file mode 100644 index 00000000..0d376af8 --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-dashboards/system-overview.json @@ -0,0 +1,1643 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Memory Usage", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "mem", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "used_percent" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Memory Usage", + "type": "gauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "CPU Usage", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cpu", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "usage_user" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ], + [ + { + "params": [ + "usage_system" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "cpu", + "operator": "=", + "value": "cpu-total" + } + ] + } + ], + "title": "CPU Usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Total CPU", + "binary": { + "left": "mem.mean {cpu: cpu-total}", + "operator": "+", + "right": "mem.mean.1 {cpu: cpu-total}" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + } + ], + "type": "gauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 30 + } + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "IO Wait", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cpu", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "usage_iowait" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "cpu", + "operator": "=", + "value": "cpu-total" + } + ] + } + ], + "title": "Disk I/O Wait", + "type": "gauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "last" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "$tag_interface RX", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "interface" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "net", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "bytes_recv" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + }, + { + "params": [ + "* 8" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "$tag_interface TX", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "interface" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "net", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "bytes_sent" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + }, + { + "params": [ + "* 8" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Network Throughput", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 6 + }, + "id": 5, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "$tag_path", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "path" + ], + "type": "tag" + } + ], + "measurement": "disk", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "used_percent" + ], + "type": "field" + }, + { + "params": [], + "type": "last" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "path", + "operator": "=~", + "value": "/\\/srv\\/solana/" + } + ] + } + ], + "title": "Disk Usage by Mount", + "type": "bargauge" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 6 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "$tag_name Read", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "name" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "diskio", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "read_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "$tag_name Write", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "name" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "diskio", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "write_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Disk I/O Bytes", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "IOPS", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 6 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "$tag_name Read", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "name" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "diskio", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "reads" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + }, + { + "alias": "$tag_name Write", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "name" + ], + "type": "tag" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "diskio", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "writes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + } + ] + } + ], + "title": "Disk IOPS", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU %", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 14 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Agave CPU", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "procstat", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "cpu_usage" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "pattern", + "operator": "=", + "value": "agave-validator" + } + ] + } + ], + "title": "Agave Process CPU", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Memory (GB)", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "decgbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 14 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "RSS (Resident)", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "procstat", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "memory_rss" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "/ 1073741824" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "pattern", + "operator": "=", + "value": "agave-validator" + } + ] + }, + { + "alias": "VMS (Virtual)", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "procstat", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "memory_vms" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "/ 1073741824" + ], + "type": "math" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "pattern", + "operator": "=", + "value": "agave-validator" + } + ] + } + ], + "title": "Agave Process Memory", + "type": "timeseries" + }, + { + "datasource": "InfluxDB", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Bytes/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 14 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.0.0", + "targets": [ + { + "alias": "Read", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "procstat", + "orderByTime": "ASC", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "read_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "pattern", + "operator": "=", + "value": "agave-validator" + } + ] + }, + { + "alias": "Write", + "datasource": "InfluxDB", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "linear" + ], + "type": "fill" + } + ], + "measurement": "procstat", + "orderByTime": "ASC", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "write_bytes" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + }, + { + "params": [ + "1s" + ], + "type": "derivative" + } + ] + ], + "tags": [ + { + "key": "host_id", + "operator": "=~", + "value": "/^$node$/" + }, + { + "key": "pattern", + "operator": "=", + "value": "agave-validator" + } + ] + } + ], + "title": "Agave Process I/O", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [ + "solana", + "system" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "includeAll": false, + "label": "Node", + "multi": false, + "name": "node_name", + "options": [], + "query": "SELECT \"name\" FROM (SELECT LAST(value) FROM \"validator_name\" GROUP BY \"name\") ORDER BY time DESC", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "influxdb", + "uid": "InfluxDB" + }, + "definition": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "includeAll": false, + "label": "Validator ID", + "multi": false, + "name": "node", + "options": [], + "query": "SELECT \"host_id\" FROM (SELECT LAST(value) FROM \"validator_name\" WHERE \"name\" =~ /^$node_name$/ GROUP BY \"host_id\") ORDER BY time DESC LIMIT 1", + "refresh": 1, + "regex": "", + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "System Overview", + "uid": "system-overview", + "version": 1, + "weekStart": "" +} diff --git a/stack-orchestrator/config/monitoring/grafana-datasources/datasources.yml b/stack-orchestrator/config/monitoring/grafana-datasources/datasources.yml new file mode 100644 index 00000000..6e246fb7 --- /dev/null +++ b/stack-orchestrator/config/monitoring/grafana-datasources/datasources.yml @@ -0,0 +1,16 @@ +apiVersion: 1 + +datasources: + - name: InfluxDB + type: influxdb + access: proxy + url: http://monitoring-influxdb:8086 + database: agave_metrics + user: admin + isDefault: true + editable: true + secureJsonData: + password: admin + jsonData: + timeInterval: 10s + httpMode: GET diff --git a/stack-orchestrator/config/monitoring/scripts/check_canonical_slot.sh b/stack-orchestrator/config/monitoring/scripts/check_canonical_slot.sh new file mode 100755 index 00000000..8f9785cf --- /dev/null +++ b/stack-orchestrator/config/monitoring/scripts/check_canonical_slot.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Query canonical mainnet slot for sync lag comparison + +set -euo pipefail + +CANONICAL_RPC="${CANONICAL_RPC_URL:-https://api.mainnet-beta.solana.com}" + +response=$(curl -s --max-time 10 -X POST \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"getSlot"}' \ + "$CANONICAL_RPC" 2>/dev/null || echo '{"result":0}') + +slot=$(echo "$response" | grep -o '"result":[0-9]*' | grep -o '[0-9]*' || echo "0") + +if [ "$slot" != "0" ]; then + echo "canonical_slot slot=${slot}i" +fi diff --git a/stack-orchestrator/config/monitoring/scripts/check_getslot_latency.sh b/stack-orchestrator/config/monitoring/scripts/check_getslot_latency.sh new file mode 100755 index 00000000..da36da6c --- /dev/null +++ b/stack-orchestrator/config/monitoring/scripts/check_getslot_latency.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Check getSlot RPC latency +# Outputs metrics in InfluxDB line protocol format + +set -euo pipefail + +RPC_URL="${NODE_RPC_URL:-http://localhost:8899}" +RPC_PAYLOAD='{"jsonrpc":"2.0","id":1,"method":"getSlot"}' + +response=$(curl -sk --max-time 10 -X POST \ + -H "Content-Type: application/json" \ + -d "$RPC_PAYLOAD" \ + -w "\n%{http_code}\n%{time_total}" \ + "$RPC_URL" 2>/dev/null || echo -e "\n000\n0") + +json_response=$(echo "$response" | head -n 1) +# curl -w output follows response body; blank lines may appear between them +http_code=$(echo "$response" | tail -2 | head -1) +time_total=$(echo "$response" | tail -1) + +latency_ms="$(awk -v t="$time_total" 'BEGIN { printf "%.0f", (t * 1000) }')" +# Strip leading zeros from http_code (influx line protocol rejects 000i) +http_code=$((10#${http_code:-0})) + +if [ "$http_code" = "200" ]; then + slot=$(echo "$json_response" | grep -o '"result":[0-9]*' | grep -o '[0-9]*' || echo "0") + [ "$slot" != "0" ] && success=1 || success=0 +else + success=0 + slot=0 +fi + +echo "rpc_latency,endpoint=direct,method=getSlot latency_ms=${latency_ms},success=${success}i,http_code=${http_code}i,slot=${slot}i" diff --git a/stack-orchestrator/config/monitoring/telegraf-config/telegraf.conf b/stack-orchestrator/config/monitoring/telegraf-config/telegraf.conf new file mode 100644 index 00000000..293f67ad --- /dev/null +++ b/stack-orchestrator/config/monitoring/telegraf-config/telegraf.conf @@ -0,0 +1,36 @@ +# Telegraf configuration for Agave monitoring + +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "0s" + hostname = "telegraf" + omit_hostname = false + +# Output to InfluxDB +[[outputs.influxdb]] + urls = ["http://localhost:8086"] + database = "agave_metrics" + skip_database_creation = true + username = "admin" + password = "admin" + retention_policy = "" + write_consistency = "any" + timeout = "5s" + +# Custom getSlot latency check +[[inputs.exec]] + commands = ["/scripts/check_getslot_latency.sh"] + timeout = "30s" + data_format = "influx" + +# Canonical mainnet slot tracking +[[inputs.exec]] + commands = ["/scripts/check_canonical_slot.sh"] + timeout = "30s" + data_format = "influx" diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/Dockerfile b/stack-orchestrator/container-build/laconicnetwork-agave/Dockerfile new file mode 100644 index 00000000..97c6227f --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/Dockerfile @@ -0,0 +1,81 @@ +# Unified Agave/Jito Solana image +# Supports three modes via AGAVE_MODE env: test, rpc, validator +# +# Build args: +# AGAVE_REPO - git repo URL (anza-xyz/agave or jito-foundation/jito-solana) +# AGAVE_VERSION - git tag to build (e.g. v3.1.9, v3.1.8-jito) + +ARG AGAVE_REPO=https://github.com/anza-xyz/agave.git +ARG AGAVE_VERSION=v3.1.9 + +# ---------- Stage 1: Build ---------- +FROM rust:1.85-bookworm AS builder + +ARG AGAVE_REPO +ARG AGAVE_VERSION + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + libssl-dev \ + libudev-dev \ + libclang-dev \ + protobuf-compiler \ + ca-certificates \ + git \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build +RUN git clone "$AGAVE_REPO" --depth 1 --branch "$AGAVE_VERSION" --recurse-submodules agave +WORKDIR /build/agave + +# Cherry-pick --public-tvu-address support (anza-xyz/agave PR #6778, commit 9f4b3ae) +# This flag only exists on master, not in v3.1.9 — fetch the PR ref and cherry-pick +ARG TVU_ADDRESS_PR=6778 +RUN if [ -n "$TVU_ADDRESS_PR" ]; then \ + git fetch --depth 50 origin "pull/${TVU_ADDRESS_PR}/head:tvu-pr" && \ + git cherry-pick --no-commit tvu-pr; \ + fi + +# Build all binaries using the upstream install script +RUN CI_COMMIT=$(git rev-parse HEAD) scripts/cargo-install-all.sh /solana-release + +# ---------- Stage 2: Runtime ---------- +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + libudev1 \ + curl \ + sudo \ + aria2 \ + python3 \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user with sudo +RUN useradd -m -s /bin/bash agave \ + && echo "agave ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +# Copy all compiled binaries +COPY --from=builder /solana-release/bin/ /usr/local/bin/ + +# Copy entrypoint and support scripts +COPY entrypoint.py snapshot_download.py ip_echo_preflight.py /usr/local/bin/ +COPY start-test.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/entrypoint.py /usr/local/bin/start-test.sh + +# Create data directories +RUN mkdir -p /data/config /data/ledger /data/accounts /data/snapshots \ + && chown -R agave:agave /data + +USER agave +WORKDIR /data + +ENV RUST_LOG=info +ENV RUST_BACKTRACE=1 + +EXPOSE 8899 8900 8001 8001/udp + +ENTRYPOINT ["entrypoint.py"] diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/build.sh b/stack-orchestrator/container-build/laconicnetwork-agave/build.sh new file mode 100644 index 00000000..4c4d940f --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/build.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Build laconicnetwork/agave +# Set AGAVE_REPO and AGAVE_VERSION env vars to build Jito or a different version +source ${CERC_CONTAINER_BASE_DIR}/build-base.sh + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +AGAVE_REPO="${AGAVE_REPO:-https://github.com/anza-xyz/agave.git}" +AGAVE_VERSION="${AGAVE_VERSION:-v3.1.9}" + +docker build -t laconicnetwork/agave:local \ + --build-arg AGAVE_REPO="$AGAVE_REPO" \ + --build-arg AGAVE_VERSION="$AGAVE_VERSION" \ + ${build_command_args} \ + -f ${SCRIPT_DIR}/Dockerfile \ + ${SCRIPT_DIR} diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/entrypoint.py b/stack-orchestrator/container-build/laconicnetwork-agave/entrypoint.py new file mode 100644 index 00000000..2b7324c3 --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/entrypoint.py @@ -0,0 +1,686 @@ +#!/usr/bin/env python3 +"""Agave validator entrypoint — snapshot management, arg construction, liveness probe. + +Two subcommands: + entrypoint.py serve (default) — snapshot freshness check + run agave-validator + entrypoint.py probe — liveness probe (slot lag check, exits 0/1) + +Replaces the bash entrypoint.sh / start-rpc.sh / start-validator.sh with a single +Python module. Test mode still dispatches to start-test.sh. + +Python stays as PID 1 and traps SIGTERM. On SIGTERM, it runs +``agave-validator exit --force --ledger /data/ledger`` which connects to the +admin RPC Unix socket and tells the validator to flush I/O and exit cleanly. +This avoids the io_uring/ZFS deadlock that occurs when the process is killed. + +All configuration comes from environment variables — same vars as the original +bash scripts. See compose files for defaults. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import signal +import subprocess +import sys +import threading +import time +import urllib.error +import urllib.request +from pathlib import Path +from urllib.request import Request + +log: logging.Logger = logging.getLogger("entrypoint") + +# Directories +CONFIG_DIR = "/data/config" +LEDGER_DIR = "/data/ledger" +ACCOUNTS_DIR = "/data/accounts" +SNAPSHOTS_DIR = "/data/snapshots" +LOG_DIR = "/data/log" +IDENTITY_FILE = f"{CONFIG_DIR}/validator-identity.json" + +# Snapshot filename patterns +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" +) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-[A-Za-z0-9]+\.tar\.(zst|bz2)$" +) + +MAINNET_RPC = "https://api.mainnet-beta.solana.com" + + +# -- Helpers ------------------------------------------------------------------- + + +def env(name: str, default: str = "") -> str: + """Read env var with default.""" + return os.environ.get(name, default) + + +def env_required(name: str) -> str: + """Read required env var, exit if missing.""" + val = os.environ.get(name) + if not val: + log.error("%s is required but not set", name) + sys.exit(1) + return val + + +def env_bool(name: str, default: bool = False) -> bool: + """Read boolean env var (true/false/1/0).""" + val = os.environ.get(name, "").lower() + if not val: + return default + return val in ("true", "1", "yes") + + +def rpc_get_slot(url: str, timeout: int = 10) -> int | None: + """Get current slot from a Solana RPC endpoint.""" + payload = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": "getSlot", "params": [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read()) + result = data.get("result") + if isinstance(result, int): + return result + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError): + pass + return None + + +# -- Snapshot management ------------------------------------------------------- + + +def get_local_snapshot_slot(snapshots_dir: str) -> int | None: + """Find the highest slot among local snapshot files.""" + best_slot: int | None = None + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return None + for entry in snap_path.iterdir(): + m = FULL_SNAP_RE.match(entry.name) + if m: + slot = int(m.group(1)) + if best_slot is None or slot > best_slot: + best_slot = slot + return best_slot + + +def clean_snapshots(snapshots_dir: str) -> None: + """Remove all snapshot files from the directory.""" + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return + for entry in snap_path.iterdir(): + if entry.name.startswith(("snapshot-", "incremental-snapshot-")): + log.info("Removing old snapshot: %s", entry.name) + entry.unlink(missing_ok=True) + + +def get_incremental_slot(snapshots_dir: str, full_slot: int | None) -> int | None: + """Get the highest incremental snapshot slot matching the full's base slot.""" + if full_slot is None: + return None + snap_path = Path(snapshots_dir) + if not snap_path.is_dir(): + return None + best: int | None = None + for entry in snap_path.iterdir(): + m = INCR_SNAP_RE.match(entry.name) + if m and int(m.group(1)) == full_slot: + slot = int(m.group(2)) + if best is None or slot > best: + best = slot + return best + + +def maybe_download_snapshot(snapshots_dir: str) -> None: + """Ensure full + incremental snapshots exist before starting. + + The validator should always start from a full + incremental pair to + minimize replay time. If either is missing or the full is too old, + download fresh ones via download_best_snapshot (which does rolling + incremental convergence after downloading the full). + + Controlled by env vars: + SNAPSHOT_AUTO_DOWNLOAD (default: true) — enable/disable + SNAPSHOT_MAX_AGE_SLOTS (default: 100000) — full snapshot staleness threshold + (one full snapshot generation, ~11 hours) + """ + if not env_bool("SNAPSHOT_AUTO_DOWNLOAD", default=True): + log.info("Snapshot auto-download disabled") + return + + max_age = int(env("SNAPSHOT_MAX_AGE_SLOTS", "100000")) + + mainnet_slot = rpc_get_slot(MAINNET_RPC) + if mainnet_slot is None: + log.warning("Cannot reach mainnet RPC — skipping snapshot check") + return + + script_dir = Path(__file__).resolve().parent + sys.path.insert(0, str(script_dir)) + from snapshot_download import download_best_snapshot, download_incremental_for_slot + + convergence = int(env("SNAPSHOT_CONVERGENCE_SLOTS", "500")) + retry_delay = int(env("SNAPSHOT_RETRY_DELAY", "60")) + + # Check local full snapshot + local_slot = get_local_snapshot_slot(snapshots_dir) + have_fresh_full = (local_slot is not None + and (mainnet_slot - local_slot) <= max_age) + + if have_fresh_full: + assert local_slot is not None + inc_slot = get_incremental_slot(snapshots_dir, local_slot) + if inc_slot is not None: + inc_gap = mainnet_slot - inc_slot + if inc_gap <= convergence: + log.info("Full (slot %d) + incremental (slot %d, gap %d) " + "within convergence, starting", + local_slot, inc_slot, inc_gap) + return + log.info("Incremental too stale (slot %d, gap %d > %d)", + inc_slot, inc_gap, convergence) + # Fresh full, need a fresh incremental + log.info("Downloading incremental for full at slot %d", local_slot) + while True: + if download_incremental_for_slot(snapshots_dir, local_slot, + convergence_slots=convergence): + return + log.warning("Incremental download failed — retrying in %ds", + retry_delay) + time.sleep(retry_delay) + + # No full or full too old — download both + log.info("Downloading full + incremental") + clean_snapshots(snapshots_dir) + while True: + if download_best_snapshot(snapshots_dir, convergence_slots=convergence): + return + log.warning("Snapshot download failed — retrying in %ds", retry_delay) + time.sleep(retry_delay) + + +# -- Directory and identity setup ---------------------------------------------- + + +def ensure_dirs(*dirs: str) -> None: + """Create directories and fix ownership.""" + uid = os.getuid() + gid = os.getgid() + for d in dirs: + os.makedirs(d, exist_ok=True) + try: + subprocess.run( + ["sudo", "chown", "-R", f"{uid}:{gid}", d], + check=False, capture_output=True, + ) + except FileNotFoundError: + pass # sudo not available — dirs already owned correctly + + +def ensure_identity_rpc() -> None: + """Generate ephemeral identity keypair for RPC mode if not mounted.""" + if os.path.isfile(IDENTITY_FILE): + return + log.info("Generating RPC node identity keypair...") + subprocess.run( + ["solana-keygen", "new", "--no-passphrase", "--silent", + "--force", "--outfile", IDENTITY_FILE], + check=True, + ) + + +def print_identity() -> None: + """Print the node identity pubkey.""" + result = subprocess.run( + ["solana-keygen", "pubkey", IDENTITY_FILE], + capture_output=True, text=True, check=False, + ) + if result.returncode == 0: + log.info("Node identity: %s", result.stdout.strip()) + + +# -- Arg construction ---------------------------------------------------------- + + +def build_common_args() -> list[str]: + """Build agave-validator args common to both RPC and validator modes.""" + args: list[str] = [ + "--identity", IDENTITY_FILE, + "--entrypoint", env_required("VALIDATOR_ENTRYPOINT"), + "--known-validator", env_required("KNOWN_VALIDATOR"), + "--ledger", LEDGER_DIR, + "--accounts", ACCOUNTS_DIR, + "--snapshots", SNAPSHOTS_DIR, + "--rpc-port", env("RPC_PORT", "8899"), + "--rpc-bind-address", env("RPC_BIND_ADDRESS", "127.0.0.1"), + "--gossip-port", env("GOSSIP_PORT", "8001"), + "--dynamic-port-range", env("DYNAMIC_PORT_RANGE", "9000-10000"), + "--no-os-network-limits-test", + "--wal-recovery-mode", "skip_any_corrupted_record", + "--limit-ledger-size", env("LIMIT_LEDGER_SIZE", "50000000"), + "--no-snapshot-fetch", # entrypoint handles snapshot download + ] + + # Snapshot generation + if env("NO_SNAPSHOTS") == "true": + args.append("--no-snapshots") + else: + args += [ + "--full-snapshot-interval-slots", env("SNAPSHOT_INTERVAL_SLOTS", "100000"), + "--maximum-full-snapshots-to-retain", env("MAXIMUM_SNAPSHOTS_TO_RETAIN", "1"), + ] + if env("NO_INCREMENTAL_SNAPSHOTS") != "true": + args += ["--maximum-incremental-snapshots-to-retain", "2"] + + # Account indexes + account_indexes = env("ACCOUNT_INDEXES") + if account_indexes: + for idx in account_indexes.split(","): + idx = idx.strip() + if idx: + args += ["--account-index", idx] + + # Additional entrypoints + for ep in env("EXTRA_ENTRYPOINTS").split(): + if ep: + args += ["--entrypoint", ep] + + # Additional known validators + for kv in env("EXTRA_KNOWN_VALIDATORS").split(): + if kv: + args += ["--known-validator", kv] + + # Cluster verification + genesis_hash = env("EXPECTED_GENESIS_HASH") + if genesis_hash: + args += ["--expected-genesis-hash", genesis_hash] + shred_version = env("EXPECTED_SHRED_VERSION") + if shred_version: + args += ["--expected-shred-version", shred_version] + + # Metrics — just needs to be in the environment, agave reads it directly + # (env var is already set, nothing to pass as arg) + + # Gossip host / TVU address + gossip_host = env("GOSSIP_HOST") + if gossip_host: + args += ["--gossip-host", gossip_host] + elif env("PUBLIC_TVU_ADDRESS"): + args += ["--public-tvu-address", env("PUBLIC_TVU_ADDRESS")] + + # Jito flags + if env("JITO_ENABLE") == "true": + log.info("Jito MEV enabled") + jito_flags: list[tuple[str, str]] = [ + ("JITO_TIP_PAYMENT_PROGRAM", "--tip-payment-program-pubkey"), + ("JITO_DISTRIBUTION_PROGRAM", "--tip-distribution-program-pubkey"), + ("JITO_MERKLE_ROOT_AUTHORITY", "--merkle-root-upload-authority"), + ("JITO_COMMISSION_BPS", "--commission-bps"), + ("JITO_BLOCK_ENGINE_URL", "--block-engine-url"), + ("JITO_SHRED_RECEIVER_ADDR", "--shred-receiver-address"), + ] + for env_name, flag in jito_flags: + val = env(env_name) + if val: + args += [flag, val] + + return args + + +def build_rpc_args() -> list[str]: + """Build agave-validator args for RPC (non-voting) mode.""" + args = build_common_args() + args += [ + "--no-voting", + "--log", f"{LOG_DIR}/validator.log", + "--full-rpc-api", + "--enable-rpc-transaction-history", + "--rpc-pubsub-enable-block-subscription", + "--enable-extended-tx-metadata-storage", + "--no-wait-for-vote-to-start-leader", + ] + + # Public vs private RPC + public_rpc = env("PUBLIC_RPC_ADDRESS") + if public_rpc: + args += ["--public-rpc-address", public_rpc] + else: + args += ["--private-rpc", "--allow-private-addr", "--only-known-rpc"] + + # Jito relayer URL (RPC mode doesn't use it, but validator mode does — + # handled in build_validator_args) + + return args + + +def build_validator_args() -> list[str]: + """Build agave-validator args for voting validator mode.""" + vote_keypair = env("VOTE_ACCOUNT_KEYPAIR", + "/data/config/vote-account-keypair.json") + + # Identity must be mounted for validator mode + if not os.path.isfile(IDENTITY_FILE): + log.error("Validator identity keypair not found at %s", IDENTITY_FILE) + log.error("Mount your validator keypair to %s", IDENTITY_FILE) + sys.exit(1) + + # Vote account keypair must exist + if not os.path.isfile(vote_keypair): + log.error("Vote account keypair not found at %s", vote_keypair) + log.error("Mount your vote account keypair or set VOTE_ACCOUNT_KEYPAIR") + sys.exit(1) + + # Print vote account pubkey + result = subprocess.run( + ["solana-keygen", "pubkey", vote_keypair], + capture_output=True, text=True, check=False, + ) + if result.returncode == 0: + log.info("Vote account: %s", result.stdout.strip()) + + args = build_common_args() + args += [ + "--vote-account", vote_keypair, + "--log", "-", + ] + + # Jito relayer URL (validator-only) + relayer_url = env("JITO_RELAYER_URL") + if env("JITO_ENABLE") == "true" and relayer_url: + args += ["--relayer-url", relayer_url] + + return args + + +def append_extra_args(args: list[str]) -> list[str]: + """Append EXTRA_ARGS passthrough flags.""" + extra = env("EXTRA_ARGS") + if extra: + args += extra.split() + return args + + +# -- Graceful shutdown -------------------------------------------------------- + +# Timeout for graceful exit via admin RPC. Leave 30s margin for k8s +# terminationGracePeriodSeconds (300s). +GRACEFUL_EXIT_TIMEOUT = 270 + + +def graceful_exit(child: subprocess.Popen[bytes], reason: str = "SIGTERM") -> None: + """Request graceful shutdown via the admin RPC Unix socket. + + Runs ``agave-validator exit --force --ledger /data/ledger`` which connects + to the admin RPC socket at ``/data/ledger/admin.rpc`` and sets the + validator's exit flag. The validator flushes all I/O and exits cleanly, + avoiding the io_uring/ZFS deadlock. + + If the admin RPC exit fails or the child doesn't exit within the timeout, + falls back to SIGTERM then SIGKILL. + """ + log.info("%s — requesting graceful exit via admin RPC", reason) + try: + result = subprocess.run( + ["agave-validator", "exit", "--force", "--ledger", LEDGER_DIR], + capture_output=True, text=True, timeout=30, + ) + if result.returncode == 0: + log.info("Admin RPC exit requested successfully") + else: + log.warning( + "Admin RPC exit returned %d: %s", + result.returncode, result.stderr.strip(), + ) + except subprocess.TimeoutExpired: + log.warning("Admin RPC exit command timed out after 30s") + except FileNotFoundError: + log.warning("agave-validator binary not found for exit command") + + # Wait for child to exit + try: + child.wait(timeout=GRACEFUL_EXIT_TIMEOUT) + log.info("Validator exited cleanly with code %d", child.returncode) + return + except subprocess.TimeoutExpired: + log.warning( + "Validator did not exit within %ds — sending SIGTERM", + GRACEFUL_EXIT_TIMEOUT, + ) + + # Fallback: SIGTERM + child.terminate() + try: + child.wait(timeout=15) + log.info("Validator exited after SIGTERM with code %d", child.returncode) + return + except subprocess.TimeoutExpired: + log.warning("Validator did not exit after SIGTERM — sending SIGKILL") + + # Last resort: SIGKILL + child.kill() + child.wait() + log.info("Validator killed with SIGKILL, code %d", child.returncode) + + +# -- Serve subcommand --------------------------------------------------------- + + +def _gap_monitor( + child: subprocess.Popen[bytes], + leapfrog: threading.Event, + shutting_down: threading.Event, +) -> None: + """Background thread: poll slot gap and trigger leapfrog if too far behind. + + Waits for a grace period (SNAPSHOT_MONITOR_GRACE, default 600s) before + monitoring — the validator needs time to extract snapshots and catch up. + Then polls every SNAPSHOT_MONITOR_INTERVAL (default 30s). If the gap + exceeds SNAPSHOT_LEAPFROG_SLOTS (default 5000) for SNAPSHOT_LEAPFROG_CHECKS + (default 3) consecutive checks, triggers graceful shutdown and sets the + leapfrog event so cmd_serve loops back to download a fresh incremental. + """ + threshold = int(env("SNAPSHOT_LEAPFROG_SLOTS", "5000")) + required_checks = int(env("SNAPSHOT_LEAPFROG_CHECKS", "3")) + interval = int(env("SNAPSHOT_MONITOR_INTERVAL", "30")) + grace = int(env("SNAPSHOT_MONITOR_GRACE", "600")) + rpc_port = env("RPC_PORT", "8899") + local_url = f"http://127.0.0.1:{rpc_port}" + + # Grace period — don't monitor during initial catch-up + if shutting_down.wait(grace): + return + + consecutive = 0 + while not shutting_down.is_set(): + local_slot = rpc_get_slot(local_url, timeout=5) + mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) + + if local_slot is not None and mainnet_slot is not None: + gap = mainnet_slot - local_slot + if gap > threshold: + consecutive += 1 + log.warning("Gap %d > %d (%d/%d consecutive)", + gap, threshold, consecutive, required_checks) + if consecutive >= required_checks: + log.warning("Leapfrog triggered: gap %d", gap) + leapfrog.set() + graceful_exit(child, reason="Leapfrog") + return + else: + if consecutive > 0: + log.info("Gap %d within threshold, resetting counter", gap) + consecutive = 0 + + shutting_down.wait(interval) + + +def cmd_serve() -> None: + """Main serve flow: snapshot download, run validator, monitor gap, leapfrog. + + Python stays as PID 1. On each iteration: + 1. Download full + incremental snapshots (if needed) + 2. Start agave-validator as child process + 3. Monitor slot gap in background thread + 4. If gap exceeds threshold → graceful stop → loop back to step 1 + 5. If SIGTERM → graceful stop → exit + 6. If validator crashes → exit with its return code + """ + mode = env("AGAVE_MODE", "test") + log.info("AGAVE_MODE=%s", mode) + + if mode == "test": + os.execvp("start-test.sh", ["start-test.sh"]) + + if mode not in ("rpc", "validator"): + log.error("Unknown AGAVE_MODE: %s (valid: test, rpc, validator)", mode) + sys.exit(1) + + # One-time setup + dirs = [CONFIG_DIR, LEDGER_DIR, ACCOUNTS_DIR, SNAPSHOTS_DIR] + if mode == "rpc": + dirs.append(LOG_DIR) + ensure_dirs(*dirs) + + if not env_bool("SKIP_IP_ECHO_PREFLIGHT"): + script_dir = Path(__file__).resolve().parent + sys.path.insert(0, str(script_dir)) + from ip_echo_preflight import main as ip_echo_main + if ip_echo_main() != 0: + sys.exit(1) + + if mode == "rpc": + ensure_identity_rpc() + print_identity() + + if mode == "rpc": + args = build_rpc_args() + else: + args = build_validator_args() + args = append_extra_args(args) + + # Main loop: download → run → monitor → leapfrog if needed + while True: + maybe_download_snapshot(SNAPSHOTS_DIR) + + Path("/tmp/entrypoint-start").write_text(str(time.time())) + log.info("Starting agave-validator with %d arguments", len(args)) + child = subprocess.Popen(["agave-validator"] + args) + + shutting_down = threading.Event() + leapfrog = threading.Event() + + signal.signal(signal.SIGUSR1, + lambda _sig, _frame: child.send_signal(signal.SIGUSR1)) + + def _on_sigterm(_sig: int, _frame: object) -> None: + shutting_down.set() + threading.Thread( + target=graceful_exit, args=(child,), daemon=True, + ).start() + + signal.signal(signal.SIGTERM, _on_sigterm) + + # Start gap monitor + monitor = threading.Thread( + target=_gap_monitor, + args=(child, leapfrog, shutting_down), + daemon=True, + ) + monitor.start() + + child.wait() + + if leapfrog.is_set(): + log.info("Leapfrog: restarting with fresh incremental") + continue + + sys.exit(child.returncode) + + +# -- Probe subcommand --------------------------------------------------------- + + +def cmd_probe() -> None: + """Liveness probe: check local RPC slot vs mainnet. + + Exit 0 = healthy, exit 1 = unhealthy. + + Grace period: PROBE_GRACE_SECONDS (default 600) — probe always passes + during grace period to allow for snapshot unpacking and initial replay. + """ + grace_seconds = int(env("PROBE_GRACE_SECONDS", "600")) + max_lag = int(env("PROBE_MAX_SLOT_LAG", "20000")) + + # Check grace period + start_file = Path("/tmp/entrypoint-start") + if start_file.exists(): + try: + start_time = float(start_file.read_text().strip()) + elapsed = time.time() - start_time + if elapsed < grace_seconds: + # Within grace period — always healthy + sys.exit(0) + except (ValueError, OSError): + pass + else: + # No start file — serve hasn't started yet, within grace + sys.exit(0) + + # Query local RPC + rpc_port = env("RPC_PORT", "8899") + local_url = f"http://127.0.0.1:{rpc_port}" + local_slot = rpc_get_slot(local_url, timeout=5) + if local_slot is None: + # Local RPC unreachable after grace period — unhealthy + sys.exit(1) + + # Query mainnet + mainnet_slot = rpc_get_slot(MAINNET_RPC, timeout=10) + if mainnet_slot is None: + # Can't reach mainnet to compare — assume healthy (don't penalize + # the validator for mainnet RPC being down) + sys.exit(0) + + lag = mainnet_slot - local_slot + if lag > max_lag: + sys.exit(1) + + sys.exit(0) + + +# -- Main ---------------------------------------------------------------------- + + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + subcmd = sys.argv[1] if len(sys.argv) > 1 else "serve" + + if subcmd == "serve": + cmd_serve() + elif subcmd == "probe": + cmd_probe() + else: + log.error("Unknown subcommand: %s (valid: serve, probe)", subcmd) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/ip_echo_preflight.py b/stack-orchestrator/container-build/laconicnetwork-agave/ip_echo_preflight.py new file mode 100644 index 00000000..20cbb259 --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/ip_echo_preflight.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +"""ip_echo preflight — verify UDP port reachability before starting the validator. + +Implements the Solana ip_echo client protocol exactly: +1. Bind UDP sockets on the ports the validator will use +2. TCP connect to entrypoint gossip port, send IpEchoServerMessage +3. Parse IpEchoServerResponse (our IP as seen by entrypoint) +4. Wait for entrypoint's UDP probes on each port +5. Exit 0 if all ports reachable, exit 1 if any fail + +Wire format (from agave net-utils/src/): + Request: 4 null bytes + [u16; 4] tcp_ports LE + [u16; 4] udp_ports LE + \n + Response: 4 null bytes + bincode IpAddr (variant byte + addr) + optional shred_version + +Called from entrypoint.py before snapshot download. Prevents wasting hours +downloading a snapshot only to crash-loop on port reachability. +""" + +from __future__ import annotations + +import logging +import os +import socket +import struct +import sys +import threading +import time + +log = logging.getLogger("ip_echo_preflight") + +HEADER = b"\x00\x00\x00\x00" +TERMINUS = b"\x0a" +RESPONSE_BUF = 27 +IO_TIMEOUT = 5.0 +PROBE_TIMEOUT = 10.0 +MAX_RETRIES = 3 +RETRY_DELAY = 2.0 + + +def build_request(tcp_ports: list[int], udp_ports: list[int]) -> bytes: + """Build IpEchoServerMessage: header + [u16;4] tcp + [u16;4] udp + newline.""" + tcp = (tcp_ports + [0, 0, 0, 0])[:4] + udp = (udp_ports + [0, 0, 0, 0])[:4] + return HEADER + struct.pack("<4H", *tcp) + struct.pack("<4H", *udp) + TERMINUS + + +def parse_response(data: bytes) -> tuple[str, int | None]: + """Parse IpEchoServerResponse → (ip_string, shred_version | None). + + Wire format (bincode): + 4 bytes header (\0\0\0\0) + 4 bytes IpAddr enum variant (u32 LE: 0=IPv4, 1=IPv6) + 4|16 bytes address octets + 1 byte Option tag (0=None, 1=Some) + 2 bytes shred_version (u16 LE, only if Some) + """ + if len(data) < 8: + raise ValueError(f"response too short: {len(data)} bytes") + if data[:4] == b"HTTP": + raise ValueError("got HTTP response — not an ip_echo server") + if data[:4] != HEADER: + raise ValueError(f"unexpected header: {data[:4].hex()}") + variant = struct.unpack("= 3 and rest[0] == 1: + shred_version = struct.unpack(" None: + """Bind a UDP socket and wait for a probe packet.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(("0.0.0.0", port)) + sock.settimeout(0.5) + try: + while not stop.is_set(): + try: + _data, addr = sock.recvfrom(64) + results[port] = ("ok", addr) + return + except socket.timeout: + continue + finally: + sock.close() + except OSError as exc: + results[port] = ("bind_error", str(exc)) + + +def ip_echo_check( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], +) -> tuple[str, dict[int, bool]]: + """Run one ip_echo exchange and return (seen_ip, {port: reachable}). + + Raises on TCP failure (caller retries). + """ + udp_ports = [p for p in udp_ports if p != 0][:4] + + # Start UDP listeners before sending the TCP request + results: dict[int, tuple] = {} + stop = threading.Event() + threads = [] + for port in udp_ports: + t = threading.Thread(target=_listen_udp, args=(port, results, stop), daemon=True) + t.start() + threads.append(t) + time.sleep(0.1) # let listeners bind + + # TCP: send request, read response + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(IO_TIMEOUT) + try: + sock.connect((entrypoint_host, entrypoint_port)) + sock.sendall(build_request([], udp_ports)) + resp = sock.recv(RESPONSE_BUF) + finally: + sock.close() + + seen_ip, shred_version = parse_response(resp) + log.info( + "entrypoint %s:%d sees us as %s (shred_version=%s)", + entrypoint_host, entrypoint_port, seen_ip, shred_version, + ) + + # Wait for UDP probes + deadline = time.monotonic() + PROBE_TIMEOUT + while time.monotonic() < deadline: + if all(p in results for p in udp_ports): + break + time.sleep(0.2) + + stop.set() + for t in threads: + t.join(timeout=1) + + port_ok: dict[int, bool] = {} + for port in udp_ports: + if port not in results: + log.error("port %d: no probe received within %.0fs", port, PROBE_TIMEOUT) + port_ok[port] = False + else: + status, detail = results[port] + if status == "ok": + log.info("port %d: probe received from %s", port, detail) + port_ok[port] = True + else: + log.error("port %d: %s: %s", port, status, detail) + port_ok[port] = False + + return seen_ip, port_ok + + +def run_preflight( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], + expected_ip: str = "", +) -> bool: + """Run ip_echo check with retries. Returns True if all ports pass.""" + for attempt in range(1, MAX_RETRIES + 1): + log.info("ip_echo attempt %d/%d → %s:%d, ports %s", + attempt, MAX_RETRIES, entrypoint_host, entrypoint_port, udp_ports) + try: + seen_ip, port_ok = ip_echo_check(entrypoint_host, entrypoint_port, udp_ports) + except Exception as exc: + log.error("attempt %d TCP failed: %s", attempt, exc) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + if expected_ip and seen_ip != expected_ip: + log.error( + "IP MISMATCH: entrypoint sees %s, expected %s (GOSSIP_HOST). " + "Outbound mangle/SNAT path is broken.", + seen_ip, expected_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + reachable = [p for p, ok in port_ok.items() if ok] + unreachable = [p for p, ok in port_ok.items() if not ok] + + if not unreachable: + log.info("PASS: all ports reachable %s, seen as %s", reachable, seen_ip) + return True + + log.error( + "attempt %d: unreachable %s, reachable %s, seen as %s", + attempt, unreachable, reachable, seen_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + + log.error("FAIL: ip_echo preflight exhausted %d attempts", MAX_RETRIES) + return False + + +def main() -> int: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + # Parse entrypoint — VALIDATOR_ENTRYPOINT is "host:port" + raw = os.environ.get("VALIDATOR_ENTRYPOINT", "") + if not raw and len(sys.argv) > 1: + raw = sys.argv[1] + if not raw: + log.error("set VALIDATOR_ENTRYPOINT or pass host:port as argument") + return 1 + + if ":" in raw: + host, port_str = raw.rsplit(":", 1) + ep_port = int(port_str) + else: + host = raw + ep_port = 8001 + + gossip_port = int(os.environ.get("GOSSIP_PORT", "8001")) + dynamic_range = os.environ.get("DYNAMIC_PORT_RANGE", "9000-10000") + range_start = int(dynamic_range.split("-")[0]) + expected_ip = os.environ.get("GOSSIP_HOST", "") + + # Test gossip + first 3 ports from dynamic range (4 max per ip_echo message) + udp_ports = [gossip_port, range_start, range_start + 2, range_start + 3] + + ok = run_preflight(host, ep_port, udp_ports, expected_ip) + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py b/stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py new file mode 100644 index 00000000..2af2b976 --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/snapshot_download.py @@ -0,0 +1,878 @@ +#!/usr/bin/env python3 +"""Download Solana snapshots using aria2c for parallel multi-connection downloads. + +Discovers snapshot sources by querying getClusterNodes for all RPCs in the +cluster, probing each for available snapshots, benchmarking download speed, +and downloading from the fastest source using aria2c (16 connections by default). + +Based on the discovery approach from etcusr/solana-snapshot-finder but replaces +the single-connection wget download with aria2c parallel chunked downloads. + +Usage: + # Download to /srv/kind/solana/snapshots (mainnet, 16 connections) + ./snapshot_download.py -o /srv/kind/solana/snapshots + + # Dry run — find best source, print URL + ./snapshot_download.py --dry-run + + # Custom RPC for cluster discovery + 32 connections + ./snapshot_download.py -r https://api.mainnet-beta.solana.com -n 32 + + # Testnet + ./snapshot_download.py -c testnet -o /data/snapshots + + # Programmatic use from entrypoint.py: + from snapshot_download import download_best_snapshot + ok = download_best_snapshot("/data/snapshots") + +Requirements: + - aria2c (apt install aria2) + - python3 >= 3.10 (stdlib only, no pip dependencies) +""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import json +import logging +import os +import re +import shutil +import subprocess +import sys +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from http.client import HTTPResponse +from pathlib import Path +from urllib.request import Request + +log: logging.Logger = logging.getLogger("snapshot-download") + +CLUSTER_RPC: dict[str, str] = { + "mainnet-beta": "https://api.mainnet-beta.solana.com", + "testnet": "https://api.testnet.solana.com", + "devnet": "https://api.devnet.solana.com", +} + +# Snapshot filenames: +# snapshot--.tar.zst +# incremental-snapshot---.tar.zst +FULL_SNAP_RE: re.Pattern[str] = re.compile( + r"^snapshot-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) +INCR_SNAP_RE: re.Pattern[str] = re.compile( + r"^incremental-snapshot-(\d+)-(\d+)-([A-Za-z0-9]+)\.tar\.(zst|bz2)$" +) + + +@dataclass +class SnapshotSource: + """A snapshot file available from a specific RPC node.""" + + rpc_address: str + # Full redirect paths as returned by the server (e.g. /snapshot-123-hash.tar.zst) + file_paths: list[str] = field(default_factory=list) + slots_diff: int = 0 + latency_ms: float = 0.0 + download_speed: float = 0.0 # bytes/sec + + +# -- JSON-RPC helpers ---------------------------------------------------------- + + +class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): + """Handler that captures redirect Location instead of following it.""" + + def redirect_request( + self, + req: Request, + fp: HTTPResponse, + code: int, + msg: str, + headers: dict[str, str], # type: ignore[override] + newurl: str, + ) -> None: + return None + + +def rpc_post(url: str, method: str, params: list[object] | None = None, + timeout: int = 25) -> object | None: + """JSON-RPC POST. Returns parsed 'result' field or None on error.""" + payload: bytes = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": method, "params": params or [], + }).encode() + req = Request(url, data=payload, + headers={"Content-Type": "application/json"}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data: dict[str, object] = json.loads(resp.read()) + return data.get("result") + except (urllib.error.URLError, json.JSONDecodeError, OSError, TimeoutError) as e: + log.debug("rpc_post %s %s failed: %s", url, method, e) + return None + + +def head_no_follow(url: str, timeout: float = 3) -> tuple[str | None, float]: + """HEAD request without following redirects. + + Returns (Location header value, latency_sec) if the server returned a + 3xx redirect. Returns (None, 0.0) on any error or non-redirect response. + """ + opener: urllib.request.OpenerDirector = urllib.request.build_opener(_NoRedirectHandler) + req = Request(url, method="HEAD") + try: + start: float = time.monotonic() + resp: HTTPResponse = opener.open(req, timeout=timeout) # type: ignore[assignment] + latency: float = time.monotonic() - start + # Non-redirect (2xx) — server didn't redirect, not useful for discovery + location: str | None = resp.headers.get("Location") + resp.close() + return location, latency + except urllib.error.HTTPError as e: + # 3xx redirects raise HTTPError with the redirect info + latency = time.monotonic() - start # type: ignore[possibly-undefined] + location = e.headers.get("Location") + if location and 300 <= e.code < 400: + return location, latency + return None, 0.0 + except (urllib.error.URLError, OSError, TimeoutError): + return None, 0.0 + + +# -- Discovery ----------------------------------------------------------------- + + +def get_current_slot(rpc_url: str) -> int | None: + """Get current slot from RPC.""" + result: object | None = rpc_post(rpc_url, "getSlot") + if isinstance(result, int): + return result + return None + + +def get_cluster_rpc_nodes(rpc_url: str, version_filter: str | None = None) -> list[str]: + """Get all RPC node addresses from getClusterNodes.""" + result: object | None = rpc_post(rpc_url, "getClusterNodes") + if not isinstance(result, list): + return [] + + rpc_addrs: list[str] = [] + for node in result: + if not isinstance(node, dict): + continue + if version_filter is not None: + node_version: str | None = node.get("version") + if node_version and not node_version.startswith(version_filter): + continue + rpc: str | None = node.get("rpc") + if rpc: + rpc_addrs.append(rpc) + return list(set(rpc_addrs)) + + +def _parse_snapshot_filename(location: str) -> tuple[str, str | None]: + """Extract filename and full redirect path from Location header. + + Returns (filename, full_path). full_path includes any path prefix + the server returned (e.g. '/snapshots/snapshot-123-hash.tar.zst'). + """ + # Location may be absolute URL or relative path + if location.startswith("http://") or location.startswith("https://"): + # Absolute URL — extract path + from urllib.parse import urlparse + path: str = urlparse(location).path + else: + path = location + + filename: str = path.rsplit("/", 1)[-1] + return filename, path + + +def probe_rpc_snapshot( + rpc_address: str, + current_slot: int, +) -> SnapshotSource | None: + """Probe a single RPC node for available snapshots. + + Discovery only — no filtering. Returns a SnapshotSource with all available + info so the caller can decide what to keep. Filtering happens after all + probes complete, so rejected sources are still visible for debugging. + """ + full_url: str = f"http://{rpc_address}/snapshot.tar.bz2" + + # Full snapshot is required — every source must have one + full_location, full_latency = head_no_follow(full_url, timeout=2) + if not full_location: + return None + + latency_ms: float = full_latency * 1000 + + full_filename, full_path = _parse_snapshot_filename(full_location) + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if not fm: + return None + + full_snap_slot: int = int(fm.group(1)) + slots_diff: int = current_slot - full_snap_slot + + file_paths: list[str] = [full_path] + + # Also check for incremental snapshot + inc_url: str = f"http://{rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if inc_location: + inc_filename, inc_path = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_filename) + if m: + inc_base_slot: int = int(m.group(1)) + # Incremental must be based on this source's full snapshot + if inc_base_slot == full_snap_slot: + file_paths.append(inc_path) + + return SnapshotSource( + rpc_address=rpc_address, + file_paths=file_paths, + slots_diff=slots_diff, + latency_ms=latency_ms, + ) + + +def discover_sources( + rpc_url: str, + current_slot: int, + max_age_slots: int, + max_latency_ms: float, + threads: int, + version_filter: str | None, +) -> list[SnapshotSource]: + """Discover all snapshot sources, then filter. + + Probing and filtering are separate: all reachable sources are collected + first so we can report what exists even if filters reject everything. + """ + rpc_nodes: list[str] = get_cluster_rpc_nodes(rpc_url, version_filter) + if not rpc_nodes: + log.error("No RPC nodes found via getClusterNodes") + return [] + + log.info("Found %d RPC nodes, probing for snapshots...", len(rpc_nodes)) + + all_sources: list[SnapshotSource] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: + futures: dict[concurrent.futures.Future[SnapshotSource | None], str] = { + pool.submit(probe_rpc_snapshot, addr, current_slot): addr + for addr in rpc_nodes + } + done: int = 0 + for future in concurrent.futures.as_completed(futures): + done += 1 + if done % 200 == 0: + log.info(" probed %d/%d nodes, %d reachable", + done, len(rpc_nodes), len(all_sources)) + try: + result: SnapshotSource | None = future.result() + except (urllib.error.URLError, OSError, TimeoutError) as e: + log.debug("Probe failed for %s: %s", futures[future], e) + continue + if result: + all_sources.append(result) + + log.info("Discovered %d reachable sources", len(all_sources)) + + # Apply filters + filtered: list[SnapshotSource] = [] + rejected_age: int = 0 + rejected_latency: int = 0 + for src in all_sources: + if src.slots_diff > max_age_slots or src.slots_diff < -100: + rejected_age += 1 + continue + if src.latency_ms > max_latency_ms: + rejected_latency += 1 + continue + filtered.append(src) + + if rejected_age or rejected_latency: + log.info("Filtered: %d rejected by age (>%d slots), %d by latency (>%.0fms)", + rejected_age, max_age_slots, rejected_latency, max_latency_ms) + + if not filtered and all_sources: + # Show what was available so the user can adjust filters + all_sources.sort(key=lambda s: s.slots_diff) + best = all_sources[0] + log.warning("All %d sources rejected by filters. Best available: " + "%s (age=%d slots, latency=%.0fms). " + "Try --max-snapshot-age %d --max-latency %.0f", + len(all_sources), best.rpc_address, + best.slots_diff, best.latency_ms, + best.slots_diff + 500, + max(best.latency_ms * 1.5, 500)) + + log.info("Found %d sources after filtering", len(filtered)) + return filtered + + +# -- Speed benchmark ----------------------------------------------------------- + + +def measure_speed(rpc_address: str, measure_time: int = 7) -> float: + """Measure download speed from an RPC node. Returns bytes/sec.""" + url: str = f"http://{rpc_address}/snapshot.tar.bz2" + req = Request(url) + try: + with urllib.request.urlopen(req, timeout=measure_time + 5) as resp: + start: float = time.monotonic() + total: int = 0 + while True: + elapsed: float = time.monotonic() - start + if elapsed >= measure_time: + break + chunk: bytes = resp.read(81920) + if not chunk: + break + total += len(chunk) + elapsed = time.monotonic() - start + if elapsed <= 0: + return 0.0 + return total / elapsed + except (urllib.error.URLError, OSError, TimeoutError): + return 0.0 + + +# -- Incremental probing ------------------------------------------------------- + + +def probe_incremental( + fast_sources: list[SnapshotSource], + full_snap_slot: int, +) -> tuple[str | None, list[str]]: + """Probe fast sources for the best incremental matching full_snap_slot. + + Returns (filename, mirror_urls) or (None, []) if no match found. + The "best" incremental is the one with the highest slot (closest to head). + """ + best_filename: str | None = None + best_slot: int = 0 + best_source: SnapshotSource | None = None + best_path: str | None = None + + for source in fast_sources: + inc_url: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2" + inc_location, _ = head_no_follow(inc_url, timeout=2) + if not inc_location: + continue + inc_fn, inc_fp = _parse_snapshot_filename(inc_location) + m: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) + if not m: + continue + if int(m.group(1)) != full_snap_slot: + log.debug(" %s: incremental base slot %s != full %d, skipping", + source.rpc_address, m.group(1), full_snap_slot) + continue + inc_slot: int = int(m.group(2)) + if inc_slot > best_slot: + best_slot = inc_slot + best_filename = inc_fn + best_source = source + best_path = inc_fp + + if best_filename is None or best_source is None or best_path is None: + return None, [] + + # Build mirror list — check other sources for the same filename + mirror_urls: list[str] = [f"http://{best_source.rpc_address}{best_path}"] + for other in fast_sources: + if other.rpc_address == best_source.rpc_address: + continue + other_loc, _ = head_no_follow( + f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2) + if other_loc: + other_fn, other_fp = _parse_snapshot_filename(other_loc) + if other_fn == best_filename: + mirror_urls.append(f"http://{other.rpc_address}{other_fp}") + + return best_filename, mirror_urls + + +# -- Download ------------------------------------------------------------------ + + +def download_aria2c( + urls: list[str], + output_dir: str, + filename: str, + connections: int = 16, +) -> bool: + """Download a file using aria2c with parallel connections. + + When multiple URLs are provided, aria2c treats them as mirrors of the + same file and distributes chunks across all of them. + """ + num_mirrors: int = len(urls) + total_splits: int = max(connections, connections * num_mirrors) + cmd: list[str] = [ + "aria2c", + "--file-allocation=none", + "--continue=false", + f"--max-connection-per-server={connections}", + f"--split={total_splits}", + "--min-split-size=50M", + # aria2c retries individual chunk connections on transient network + # errors (TCP reset, timeout). This is transport-level retry analogous + # to TCP retransmit, not application-level retry of a failed operation. + "--max-tries=5", + "--retry-wait=5", + "--timeout=60", + "--connect-timeout=10", + "--summary-interval=10", + "--console-log-level=notice", + f"--dir={output_dir}", + f"--out={filename}", + "--auto-file-renaming=false", + "--allow-overwrite=true", + *urls, + ] + + log.info("Downloading %s", filename) + log.info(" aria2c: %d connections x %d mirrors (%d splits)", + connections, num_mirrors, total_splits) + + start: float = time.monotonic() + result: subprocess.CompletedProcess[bytes] = subprocess.run(cmd) + elapsed: float = time.monotonic() - start + + if result.returncode != 0: + log.error("aria2c failed with exit code %d", result.returncode) + return False + + filepath: Path = Path(output_dir) / filename + if not filepath.exists(): + log.error("aria2c reported success but %s does not exist", filepath) + return False + + size_bytes: int = filepath.stat().st_size + size_gb: float = size_bytes / (1024 ** 3) + avg_mb: float = size_bytes / elapsed / (1024 ** 2) if elapsed > 0 else 0 + log.info(" Done: %.1f GB in %.0fs (%.1f MiB/s avg)", size_gb, elapsed, avg_mb) + return True + + +# -- Shared helpers ------------------------------------------------------------ + + +def _discover_and_benchmark( + rpc_url: str, + current_slot: int, + *, + max_snapshot_age: int = 10000, + max_latency: float = 500, + threads: int = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, +) -> list[SnapshotSource]: + """Discover snapshot sources and benchmark download speed. + + Returns sources that meet the minimum speed requirement, sorted by speed. + """ + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=max_snapshot_age, + max_latency_ms=max_latency, + threads=threads, + version_filter=version_filter, + ) + if not sources: + return [] + + sources.sort(key=lambda s: s.latency_ms) + + log.info("Benchmarking download speed on top %d sources...", max_speed_checks) + fast_sources: list[SnapshotSource] = [] + checked: int = 0 + min_speed_bytes: int = min_download_speed * 1024 * 1024 + + for source in sources: + if checked >= max_speed_checks: + break + checked += 1 + + speed: float = measure_speed(source.rpc_address, measurement_time) + source.download_speed = speed + speed_mib: float = speed / (1024 ** 2) + + if speed < min_speed_bytes: + log.info(" %s: %.1f MiB/s (too slow, need >=%d MiB/s)", + source.rpc_address, speed_mib, min_download_speed) + continue + + log.info(" %s: %.1f MiB/s (latency: %.0fms, age: %d slots)", + source.rpc_address, speed_mib, + source.latency_ms, source.slots_diff) + fast_sources.append(source) + + return fast_sources + + +def _rolling_incremental_download( + fast_sources: list[SnapshotSource], + full_snap_slot: int, + output_dir: str, + convergence_slots: int, + connections: int, + rpc_url: str, +) -> str | None: + """Download incrementals in a loop until converged. + + Probes fast_sources for incrementals matching full_snap_slot, downloads + the freshest one, then re-probes until the gap to head is within + convergence_slots. Returns the filename of the final incremental, + or None if no incremental was found. + """ + prev_inc_filename: str | None = None + loop_start: float = time.monotonic() + max_convergence_time: float = 1800.0 # 30 min wall-clock limit + + while True: + if time.monotonic() - loop_start > max_convergence_time: + if prev_inc_filename: + log.warning("Convergence timeout (%.0fs) — using %s", + max_convergence_time, prev_inc_filename) + else: + log.warning("Convergence timeout (%.0fs) — no incremental downloaded", + max_convergence_time) + break + + inc_fn, inc_mirrors = probe_incremental(fast_sources, full_snap_slot) + if inc_fn is None: + if prev_inc_filename is None: + log.error("No matching incremental found for base slot %d", + full_snap_slot) + else: + log.info("No newer incremental available, using %s", prev_inc_filename) + break + + m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn) + assert m_inc is not None + inc_slot: int = int(m_inc.group(2)) + + head_slot: int | None = get_current_slot(rpc_url) + if head_slot is None: + log.warning("Cannot get current slot — downloading best available incremental") + gap: int = convergence_slots + 1 + else: + gap = head_slot - inc_slot + + if inc_fn == prev_inc_filename: + if gap <= convergence_slots: + log.info("Incremental %s already downloaded (gap %d slots, converged)", + inc_fn, gap) + break + log.info("No newer incremental yet (slot %d, gap %d slots), waiting...", + inc_slot, gap) + time.sleep(10) + continue + + if prev_inc_filename is not None: + old_path: Path = Path(output_dir) / prev_inc_filename + if old_path.exists(): + log.info("Removing superseded incremental %s", prev_inc_filename) + old_path.unlink() + + log.info("Downloading incremental %s (%d mirrors, slot %d, gap %d slots)", + inc_fn, len(inc_mirrors), inc_slot, gap) + if not download_aria2c(inc_mirrors, output_dir, inc_fn, connections): + log.warning("Failed to download incremental %s — re-probing in 10s", inc_fn) + time.sleep(10) + continue + + prev_inc_filename = inc_fn + + if gap <= convergence_slots: + log.info("Converged: incremental slot %d is %d slots behind head", + inc_slot, gap) + break + + if head_slot is None: + break + + log.info("Not converged (gap %d > %d), re-probing in 10s...", + gap, convergence_slots) + time.sleep(10) + + return prev_inc_filename + + +# -- Public API ---------------------------------------------------------------- + + +def download_incremental_for_slot( + output_dir: str, + full_snap_slot: int, + *, + cluster: str = "mainnet-beta", + rpc_url: str | None = None, + connections: int = 16, + threads: int = 500, + max_snapshot_age: int = 10000, + max_latency: float = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, + convergence_slots: int = 500, +) -> bool: + """Download an incremental snapshot for an existing full snapshot. + + Discovers sources, benchmarks speed, then runs the rolling incremental + download loop for the given full snapshot base slot. Does NOT download + a full snapshot. + + Returns True if an incremental was downloaded, False otherwise. + """ + resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] + + if not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return False + + log.info("Incremental download for base slot %d", full_snap_slot) + current_slot: int | None = get_current_slot(resolved_rpc) + if current_slot is None: + log.error("Cannot get current slot from %s", resolved_rpc) + return False + + fast_sources: list[SnapshotSource] = _discover_and_benchmark( + resolved_rpc, current_slot, + max_snapshot_age=max_snapshot_age, + max_latency=max_latency, + threads=threads, + min_download_speed=min_download_speed, + measurement_time=measurement_time, + max_speed_checks=max_speed_checks, + version_filter=version_filter, + ) + if not fast_sources: + log.error("No fast sources found") + return False + + os.makedirs(output_dir, exist_ok=True) + result: str | None = _rolling_incremental_download( + fast_sources, full_snap_slot, output_dir, + convergence_slots, connections, resolved_rpc, + ) + return result is not None + + +def download_best_snapshot( + output_dir: str, + *, + cluster: str = "mainnet-beta", + rpc_url: str | None = None, + connections: int = 16, + threads: int = 500, + max_snapshot_age: int = 10000, + max_latency: float = 500, + min_download_speed: int = 20, + measurement_time: int = 7, + max_speed_checks: int = 15, + version_filter: str | None = None, + full_only: bool = False, + convergence_slots: int = 500, +) -> bool: + """Download the best available snapshot to output_dir. + + This is the programmatic API — called by entrypoint.py for automatic + snapshot download. Returns True on success, False on failure. + + All parameters have sensible defaults matching the CLI interface. + """ + resolved_rpc: str = rpc_url or CLUSTER_RPC[cluster] + + if not shutil.which("aria2c"): + log.error("aria2c not found. Install with: apt install aria2") + return False + + log.info("Cluster: %s | RPC: %s", cluster, resolved_rpc) + current_slot: int | None = get_current_slot(resolved_rpc) + if current_slot is None: + log.error("Cannot get current slot from %s", resolved_rpc) + return False + log.info("Current slot: %d", current_slot) + + fast_sources: list[SnapshotSource] = _discover_and_benchmark( + resolved_rpc, current_slot, + max_snapshot_age=max_snapshot_age, + max_latency=max_latency, + threads=threads, + min_download_speed=min_download_speed, + measurement_time=measurement_time, + max_speed_checks=max_speed_checks, + version_filter=version_filter, + ) + if not fast_sources: + log.error("No fast sources found") + return False + + # Use the fastest source as primary, build full snapshot download plan + best: SnapshotSource = fast_sources[0] + full_paths: list[str] = [fp for fp in best.file_paths + if fp.rsplit("/", 1)[-1].startswith("snapshot-")] + if not full_paths: + log.error("Best source has no full snapshot") + return False + + # Build mirror URLs for the full snapshot + full_filename: str = full_paths[0].rsplit("/", 1)[-1] + full_mirrors: list[str] = [f"http://{best.rpc_address}{full_paths[0]}"] + for other in fast_sources[1:]: + for other_fp in other.file_paths: + if other_fp.rsplit("/", 1)[-1] == full_filename: + full_mirrors.append(f"http://{other.rpc_address}{other_fp}") + break + + speed_mib: float = best.download_speed / (1024 ** 2) + log.info("Best source: %s (%.1f MiB/s), %d mirrors", + best.rpc_address, speed_mib, len(full_mirrors)) + + # Download full snapshot + os.makedirs(output_dir, exist_ok=True) + total_start: float = time.monotonic() + + filepath: Path = Path(output_dir) / full_filename + if filepath.exists() and filepath.stat().st_size > 0: + log.info("Skipping %s (already exists: %.1f GB)", + full_filename, filepath.stat().st_size / (1024 ** 3)) + else: + if not download_aria2c(full_mirrors, output_dir, full_filename, connections): + log.error("Failed to download %s", full_filename) + return False + + # Download incremental separately — the full download took minutes, + # so any incremental from discovery is stale. Re-probe for fresh ones. + if not full_only: + fm: re.Match[str] | None = FULL_SNAP_RE.match(full_filename) + if fm: + full_snap_slot: int = int(fm.group(1)) + log.info("Downloading incremental for base slot %d...", full_snap_slot) + _rolling_incremental_download( + fast_sources, full_snap_slot, output_dir, + convergence_slots, connections, resolved_rpc, + ) + + total_elapsed: float = time.monotonic() - total_start + log.info("All downloads complete in %.0fs", total_elapsed) + + return True + + +# -- Main (CLI) ---------------------------------------------------------------- + + +def main() -> int: + p: argparse.ArgumentParser = argparse.ArgumentParser( + description="Download Solana snapshots with aria2c parallel downloads", + ) + p.add_argument("-o", "--output", default="/srv/kind/solana/snapshots", + help="Snapshot output directory (default: /srv/kind/solana/snapshots)") + p.add_argument("-c", "--cluster", default="mainnet-beta", + choices=list(CLUSTER_RPC), + help="Solana cluster (default: mainnet-beta)") + p.add_argument("-r", "--rpc", default=None, + help="RPC URL for cluster discovery (default: public RPC)") + p.add_argument("-n", "--connections", type=int, default=16, + help="aria2c connections per download (default: 16)") + p.add_argument("-t", "--threads", type=int, default=500, + help="Threads for parallel RPC probing (default: 500)") + p.add_argument("--max-snapshot-age", type=int, default=10000, + help="Max snapshot age in slots (default: 10000)") + p.add_argument("--max-latency", type=float, default=500, + help="Max RPC probe latency in ms (default: 500)") + p.add_argument("--min-download-speed", type=int, default=20, + help="Min download speed in MiB/s (default: 20)") + p.add_argument("--measurement-time", type=int, default=7, + help="Speed measurement duration in seconds (default: 7)") + p.add_argument("--max-speed-checks", type=int, default=15, + help="Max nodes to benchmark before giving up (default: 15)") + p.add_argument("--version", default=None, + help="Filter nodes by version prefix (e.g. '2.2')") + p.add_argument("--convergence-slots", type=int, default=500, + help="Max slot gap for incremental convergence (default: 500)") + p.add_argument("--full-only", action="store_true", + help="Download only full snapshot, skip incremental") + p.add_argument("--dry-run", action="store_true", + help="Find best source and print URL, don't download") + p.add_argument("--post-cmd", + help="Shell command to run after successful download " + "(e.g. 'kubectl scale deployment ... --replicas=1')") + p.add_argument("-v", "--verbose", action="store_true") + args: argparse.Namespace = p.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + ) + + # Dry-run uses the original inline flow (needs access to sources for URL printing) + if args.dry_run: + rpc_url: str = args.rpc or CLUSTER_RPC[args.cluster] + current_slot: int | None = get_current_slot(rpc_url) + if current_slot is None: + log.error("Cannot get current slot from %s", rpc_url) + return 1 + + sources: list[SnapshotSource] = discover_sources( + rpc_url, current_slot, + max_age_slots=args.max_snapshot_age, + max_latency_ms=args.max_latency, + threads=args.threads, + version_filter=args.version, + ) + if not sources: + log.error("No snapshot sources found") + return 1 + + sources.sort(key=lambda s: s.latency_ms) + best = sources[0] + for fp in best.file_paths: + print(f"http://{best.rpc_address}{fp}") + return 0 + + ok: bool = download_best_snapshot( + args.output, + cluster=args.cluster, + rpc_url=args.rpc, + connections=args.connections, + threads=args.threads, + max_snapshot_age=args.max_snapshot_age, + max_latency=args.max_latency, + min_download_speed=args.min_download_speed, + measurement_time=args.measurement_time, + max_speed_checks=args.max_speed_checks, + version_filter=args.version, + full_only=args.full_only, + convergence_slots=args.convergence_slots, + ) + + if ok and args.post_cmd: + log.info("Running post-download command: %s", args.post_cmd) + result: subprocess.CompletedProcess[bytes] = subprocess.run( + args.post_cmd, shell=True, + ) + if result.returncode != 0: + log.error("Post-download command failed with exit code %d", + result.returncode) + return 1 + log.info("Post-download command completed successfully") + + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/stack-orchestrator/container-build/laconicnetwork-agave/start-test.sh b/stack-orchestrator/container-build/laconicnetwork-agave/start-test.sh new file mode 100644 index 00000000..e003a97a --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-agave/start-test.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------- +# Start solana-test-validator with optional SPL token setup +# +# Environment variables: +# FACILITATOR_PUBKEY - facilitator fee-payer public key (base58) +# SERVER_PUBKEY - server/payee wallet public key (base58) +# CLIENT_PUBKEY - client/payer wallet public key (base58) +# MINT_DECIMALS - token decimals (default: 6, matching USDC) +# MINT_AMOUNT - amount to mint to client (default: 1000000000) +# LEDGER_DIR - ledger directory (default: /data/ledger) +# ----------------------------------------------------------------------- + +LEDGER_DIR="${LEDGER_DIR:-/data/ledger}" +MINT_DECIMALS="${MINT_DECIMALS:-6}" +MINT_AMOUNT="${MINT_AMOUNT:-1000000000}" +SETUP_MARKER="${LEDGER_DIR}/.setup-done" + +sudo chown -R "$(id -u):$(id -g)" "$LEDGER_DIR" 2>/dev/null || true + +# Start test-validator in the background +solana-test-validator \ + --ledger "${LEDGER_DIR}" \ + --rpc-port 8899 \ + --bind-address 0.0.0.0 \ + --quiet & + +VALIDATOR_PID=$! + +# Wait for RPC to become available +echo "Waiting for test-validator RPC..." +for i in $(seq 1 60); do + if solana cluster-version --url http://127.0.0.1:8899 >/dev/null 2>&1; then + echo "Test-validator is ready (attempt ${i})" + break + fi + sleep 1 +done + +solana config set --url http://127.0.0.1:8899 + +# Only run setup once (idempotent via marker file) +if [ ! -f "${SETUP_MARKER}" ]; then + echo "Running first-time setup..." + + # Airdrop SOL to all wallets for gas + for PUBKEY in "${FACILITATOR_PUBKEY:-}" "${SERVER_PUBKEY:-}" "${CLIENT_PUBKEY:-}"; do + if [ -n "${PUBKEY}" ]; then + echo "Airdropping 100 SOL to ${PUBKEY}..." + solana airdrop 100 "${PUBKEY}" --url http://127.0.0.1:8899 || true + fi + done + + # Create a USDC-equivalent SPL token mint if any pubkeys are set + if [ -n "${CLIENT_PUBKEY:-}" ] || [ -n "${FACILITATOR_PUBKEY:-}" ] || [ -n "${SERVER_PUBKEY:-}" ]; then + MINT_AUTHORITY_FILE="${LEDGER_DIR}/mint-authority.json" + if [ ! -f "${MINT_AUTHORITY_FILE}" ]; then + solana-keygen new --no-bip39-passphrase --outfile "${MINT_AUTHORITY_FILE}" --force + MINT_AUTH_PUBKEY=$(solana-keygen pubkey "${MINT_AUTHORITY_FILE}") + solana airdrop 10 "${MINT_AUTH_PUBKEY}" --url http://127.0.0.1:8899 + fi + + MINT_ADDRESS_FILE="${LEDGER_DIR}/usdc-mint-address.txt" + if [ ! -f "${MINT_ADDRESS_FILE}" ]; then + spl-token create-token \ + --decimals "${MINT_DECIMALS}" \ + --mint-authority "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 \ + 2>&1 | grep "Creating token" | awk '{print $3}' > "${MINT_ADDRESS_FILE}" + echo "Created USDC mint: $(cat "${MINT_ADDRESS_FILE}")" + fi + + USDC_MINT=$(cat "${MINT_ADDRESS_FILE}") + + # Create ATAs and mint tokens for the client + if [ -n "${CLIENT_PUBKEY:-}" ]; then + echo "Creating ATA for client ${CLIENT_PUBKEY}..." + spl-token create-account "${USDC_MINT}" \ + --owner "${CLIENT_PUBKEY}" \ + --fee-payer "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + + echo "Minting ${MINT_AMOUNT} tokens to client..." + spl-token mint "${USDC_MINT}" "${MINT_AMOUNT}" \ + --recipient-owner "${CLIENT_PUBKEY}" \ + --mint-authority "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + fi + + # Create ATAs for server and facilitator + for PUBKEY in "${SERVER_PUBKEY:-}" "${FACILITATOR_PUBKEY:-}"; do + if [ -n "${PUBKEY}" ]; then + echo "Creating ATA for ${PUBKEY}..." + spl-token create-account "${USDC_MINT}" \ + --owner "${PUBKEY}" \ + --fee-payer "${MINT_AUTHORITY_FILE}" \ + --url http://127.0.0.1:8899 || true + fi + done + + # Expose mint address for other containers + cp "${MINT_ADDRESS_FILE}" /tmp/usdc-mint-address.txt 2>/dev/null || true + fi + + touch "${SETUP_MARKER}" + echo "Setup complete." +fi + +echo "solana-test-validator running (PID ${VALIDATOR_PID})" +wait ${VALIDATOR_PID} diff --git a/stack-orchestrator/container-build/laconicnetwork-doublezero/Dockerfile b/stack-orchestrator/container-build/laconicnetwork-doublezero/Dockerfile new file mode 100644 index 00000000..df4f27d0 --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-doublezero/Dockerfile @@ -0,0 +1,22 @@ +# DoubleZero network daemon for Solana validators +# Provides GRE tunnel + BGP routing via the DoubleZero fiber backbone + +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + gnupg \ + iproute2 \ + && rm -rf /var/lib/apt/lists/* + +# Install DoubleZero from Cloudsmith apt repo +RUN curl -1sLf https://dl.cloudsmith.io/public/malbeclabs/doublezero/setup.deb.sh | bash \ + && apt-get update \ + && apt-get install -y doublezero \ + && rm -rf /var/lib/apt/lists/* + +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +ENTRYPOINT ["entrypoint.sh"] diff --git a/stack-orchestrator/container-build/laconicnetwork-doublezero/build.sh b/stack-orchestrator/container-build/laconicnetwork-doublezero/build.sh new file mode 100644 index 00000000..8d2dcf68 --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-doublezero/build.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# Build laconicnetwork/doublezero +source ${CERC_CONTAINER_BASE_DIR}/build-base.sh + +docker build -t laconicnetwork/doublezero:local \ + ${build_command_args} \ + -f ${CERC_CONTAINER_BASE_DIR}/laconicnetwork-doublezero/Dockerfile \ + ${CERC_CONTAINER_BASE_DIR}/laconicnetwork-doublezero diff --git a/stack-orchestrator/container-build/laconicnetwork-doublezero/entrypoint.sh b/stack-orchestrator/container-build/laconicnetwork-doublezero/entrypoint.sh new file mode 100644 index 00000000..a2ac156c --- /dev/null +++ b/stack-orchestrator/container-build/laconicnetwork-doublezero/entrypoint.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ----------------------------------------------------------------------- +# Start doublezerod +# +# Optional environment: +# DOUBLEZERO_RPC_ENDPOINT - Solana RPC endpoint (default: http://127.0.0.1:8899) +# DOUBLEZERO_ENV - DoubleZero environment (default: mainnet-beta) +# DOUBLEZERO_EXTRA_ARGS - additional doublezerod arguments +# ----------------------------------------------------------------------- + +RPC_ENDPOINT="${DOUBLEZERO_RPC_ENDPOINT:-http://127.0.0.1:8899}" +DZ_ENV="${DOUBLEZERO_ENV:-mainnet-beta}" + +# Ensure state directories exist +mkdir -p /var/lib/doublezerod /var/run/doublezerod + +# Generate DZ identity if not already present +DZ_CONFIG_DIR="${HOME}/.config/doublezero" +mkdir -p "$DZ_CONFIG_DIR" +if [ ! -f "$DZ_CONFIG_DIR/id.json" ]; then + echo "Generating DoubleZero identity..." + doublezero keygen +fi + +echo "Starting doublezerod..." +echo "Environment: $DZ_ENV" +echo "RPC endpoint: $RPC_ENDPOINT" +echo "DZ address: $(doublezero address)" + +ARGS=() +[ -n "${DOUBLEZERO_EXTRA_ARGS:-}" ] && read -ra ARGS <<< "$DOUBLEZERO_EXTRA_ARGS" + +exec doublezerod \ + -env "$DZ_ENV" \ + -solana-rpc-endpoint "$RPC_ENDPOINT" \ + "${ARGS[@]}" diff --git a/stack-orchestrator/stacks/agave/README.md b/stack-orchestrator/stacks/agave/README.md new file mode 100644 index 00000000..2200bf4c --- /dev/null +++ b/stack-orchestrator/stacks/agave/README.md @@ -0,0 +1,169 @@ +# agave stack + +Unified Agave/Jito Solana stack supporting three modes: + +| Mode | Compose file | Use case | +|------|-------------|----------| +| `test` | `docker-compose-agave-test.yml` | Local dev with instant finality | +| `rpc` | `docker-compose-agave-rpc.yml` | Non-voting mainnet/testnet RPC node | +| `validator` | `docker-compose-agave.yml` | Voting validator | + +## Build + +```bash +# Vanilla Agave v3.1.9 +laconic-so --stack agave build-containers + +# Jito v3.1.8 +AGAVE_REPO=https://github.com/jito-foundation/jito-solana.git \ +AGAVE_VERSION=v3.1.8-jito \ +laconic-so --stack agave build-containers +``` + +Build compiles from source (~30-60 min on first build). + +## Deploy + +```bash +# Test validator (dev) +laconic-so --stack agave deploy init --output spec.yml +laconic-so --stack agave deploy create --spec-file spec.yml --deployment-dir my-test +laconic-so deployment --dir my-test start + +# Mainnet RPC (e.g. biscayne) +# Edit spec.yml to set AGAVE_MODE=rpc, VALIDATOR_ENTRYPOINT, KNOWN_VALIDATOR, etc. +laconic-so --stack agave deploy init --output spec.yml +laconic-so --stack agave deploy create --spec-file spec.yml --deployment-dir my-rpc +laconic-so deployment --dir my-rpc start +``` + +## Configuration + +Mode is selected via `AGAVE_MODE` environment variable (`test`, `rpc`, or `validator`). + +### RPC mode required env +- `VALIDATOR_ENTRYPOINT` - cluster entrypoint (e.g. `entrypoint.mainnet-beta.solana.com:8001`) +- `KNOWN_VALIDATOR` - known validator pubkey + +### Validator mode required env +- `VALIDATOR_ENTRYPOINT` - cluster entrypoint +- `KNOWN_VALIDATOR` - known validator pubkey +- Identity and vote account keypairs mounted at `/data/config/` + +### Jito (optional, any mode except test) +Set `JITO_ENABLE=true` and provide: +- `JITO_BLOCK_ENGINE_URL` +- `JITO_SHRED_RECEIVER_ADDR` +- `JITO_TIP_PAYMENT_PROGRAM` +- `JITO_DISTRIBUTION_PROGRAM` +- `JITO_MERKLE_ROOT_AUTHORITY` +- `JITO_COMMISSION_BPS` + +Image must be built from `jito-foundation/jito-solana` repo for Jito flags to work. + +## Runtime requirements + +The container requires the following (already set in compose files): + +- `privileged: true` — allows `mlock()` and raw network access +- `cap_add: IPC_LOCK` — memory page locking for account indexes and ledger mappings +- `ulimits: memlock: -1` (unlimited) — Agave locks gigabytes of memory +- `ulimits: nofile: 1000000` — gossip/TPU connections + memory-mapped ledger files +- `network_mode: host` — direct host network stack for gossip, TPU, and UDP port ranges + +Without these, Agave either refuses to start or dies under load. + +## Container overhead + +Containers running with `privileged: true` and `network_mode: host` add **zero +measurable overhead** compared to bare metal. Linux containers are not VMs — there +is no hypervisor, no emulation layer, no packet translation: + +- **Network**: `network_mode: host` shares the host's network namespace directly. + No virtual bridge, no NAT, no veth pair. Same kernel code path as bare metal. + GRE tunnels (DoubleZero) and raw sockets work identically. +- **CPU**: No hypervisor. The process runs on the same physical cores with the + same scheduler priority as any host process. +- **Memory**: `IPC_LOCK` + unlimited memlock means Agave can `mlock()` pages + exactly like bare metal. No memory ballooning or overcommit. +- **Disk I/O**: PersistentVolumes backed by hostPath mounts have identical I/O + characteristics to direct filesystem access. + +The only overhead is cgroup accounting (nanoseconds per syscall) and overlayfs +for cold file opens (single-digit microseconds, zero once cached). + +## DoubleZero + +DoubleZero provides optimized network routing for Solana validators via GRE +tunnels (IP protocol 47) and BGP (TCP/179) over link-local 169.254.0.0/16. +Traffic to other DoubleZero participants is routed through private fiber +instead of the public internet. + +### How it works + +`doublezerod` creates a `doublezero0` GRE tunnel interface and runs BGP +peering through it. Routes are injected into the host routing table, so +the validator transparently sends traffic to other DZ validators over +the fiber backbone. IBRL mode falls back to public internet if DZ is down. + +### Container build + +```bash +laconic-so --stack agave build-containers +``` + +This builds both the `laconicnetwork/agave` and `laconicnetwork/doublezero` images. + +### Requirements + +- Validator identity keypair at `/data/config/validator-identity.json` +- `privileged: true` + `NET_ADMIN` (GRE tunnel + route table manipulation) +- `hostNetwork: true` (GRE uses IP protocol 47, not TCP/UDP — cannot be port-mapped) +- Node registered with DoubleZero passport system + +### Docker Compose + +The `docker-compose-doublezero.yml` runs alongside the validator with +`network_mode: host`, sharing the `validator-config` volume for identity access. + +### k8s deployment + +laconic-so does not pass `hostNetwork` through to generated k8s resources. +DoubleZero runs as a DaemonSet defined in `deployment/k8s-manifests/doublezero-daemonset.yaml`, +applied after `deployment start`: + +```bash +kubectl apply -f deployment/k8s-manifests/doublezero-daemonset.yaml +``` + +Since validator pods also use `hostNetwork: true` (via the compose `network_mode: host` +which maps to the pod spec in k8s), they automatically see the GRE routes +injected by `doublezerod` into the node's routing table. + +## Biscayne deployment (biscayne.vaasl.io) + +Mainnet voting validator with Jito MEV and DoubleZero. + +```bash +# Build Jito image +AGAVE_REPO=https://github.com/jito-foundation/jito-solana.git \ +AGAVE_VERSION=v3.1.8-jito \ +laconic-so --stack agave build-containers + +# Create deployment from biscayne spec +laconic-so --stack agave deploy create \ + --spec-file deployment/spec.yml \ + --deployment-dir biscayne-deployment + +# Copy validator keypairs +cp /path/to/validator-identity.json biscayne-deployment/data/validator-config/ +cp /path/to/vote-account-keypair.json biscayne-deployment/data/validator-config/ + +# Start validator +laconic-so deployment --dir biscayne-deployment start + +# Start DoubleZero (after deployment is running) +kubectl apply -f deployment/k8s-manifests/doublezero-daemonset.yaml +``` + +To run as non-voting RPC instead, change `AGAVE_MODE: rpc` in `deployment/spec.yml`. diff --git a/stack-orchestrator/stacks/agave/stack.yml b/stack-orchestrator/stacks/agave/stack.yml new file mode 100644 index 00000000..a014c048 --- /dev/null +++ b/stack-orchestrator/stacks/agave/stack.yml @@ -0,0 +1,10 @@ +version: "1.1" +name: agave +description: "Agave/Jito Solana validator, RPC node, or test-validator" +containers: + - laconicnetwork/agave + - laconicnetwork/doublezero +pods: + - agave + - doublezero + - monitoring From e143bb45c70c1ce8bd26e80aff7a1b0ed0898497 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 06:21:46 +0000 Subject: [PATCH 54/62] feat: add biscayne-restart.yml for graceful restart without cluster teardown Uses laconic-so deployment restart (GitOps) to pick up new container images and config. Gracefully stops the validator first (scale to 0, wait for pod termination, verify no agave processes). Preserves the kind cluster, all data volumes, and cluster state. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-restart.yml | 109 +++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 playbooks/biscayne-restart.yml diff --git a/playbooks/biscayne-restart.yml b/playbooks/biscayne-restart.yml new file mode 100644 index 00000000..fa6becd6 --- /dev/null +++ b/playbooks/biscayne-restart.yml @@ -0,0 +1,109 @@ +--- +# Restart agave validator with updated image/config +# +# Gracefully stops the validator, then uses laconic-so deployment restart +# to pick up new container images and config changes. Does NOT recreate +# the kind cluster — preserves all data volumes and cluster state. +# +# Prerequisites: +# - biscayne-sync-tools.yml has been run (optionally with --tags build-container) +# +# Usage: +# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-restart.yml +# +- name: Restart agave validator + hosts: all + gather_facts: false + environment: + KUBECONFIG: /home/rix/.kube/config + vars: + deployment_dir: /srv/deployments/agave + laconic_so: /home/rix/.local/bin/laconic-so + kind_cluster: laconic-70ce4c4b47e23b85 + k8s_namespace: "laconic-{{ kind_cluster }}" + deployment_name: "{{ kind_cluster }}-deployment" + + tasks: + # ---- graceful stop ------------------------------------------------------- + - name: Get current replica count + ansible.builtin.command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.spec.replicas}' + register: current_replicas + failed_when: false + changed_when: false + + - name: Ensure terminationGracePeriodSeconds is 300 + ansible.builtin.command: > + kubectl patch deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -p '{"spec":{"template":{"spec":{"terminationGracePeriodSeconds":300}}}}' + register: patch_result + changed_when: "'no change' not in patch_result.stdout" + when: current_replicas.stdout | default('0') | int > 0 + + - name: Scale deployment to 0 + ansible.builtin.command: > + kubectl scale deployment {{ deployment_name }} + -n {{ k8s_namespace }} --replicas=0 + changed_when: true + when: current_replicas.stdout | default('0') | int > 0 + + - name: Wait for pods to terminate + ansible.builtin.command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items}' + register: pods_gone + changed_when: false + retries: 60 + delay: 5 + until: pods_gone.stdout == "[]" or pods_gone.stdout == "" + when: current_replicas.stdout | default('0') | int > 0 + + - name: Verify no agave processes in kind node + ansible.builtin.command: > + docker exec {{ kind_cluster }}-control-plane + pgrep -c agave-validator + register: agave_procs + failed_when: false + changed_when: false + + - name: Fail if agave still running + ansible.builtin.fail: + msg: >- + agave-validator still running inside kind node after pod + termination. Investigate before proceeding. + when: agave_procs.rc == 0 + + - name: Report stopped + ansible.builtin.debug: + msg: "Validator stopped cleanly. Applying new config..." + + # ---- apply new config and restart ---------------------------------------- + - name: Restart deployment with updated config/image + ansible.builtin.command: > + {{ laconic_so }} + deployment --dir {{ deployment_dir }} + restart + register: restart_result + changed_when: true + + # ---- verify -------------------------------------------------------------- + - name: Wait for pod running + ansible.builtin.command: > + kubectl get pods -n {{ k8s_namespace }} + -l app={{ deployment_name }} + -o jsonpath='{.items[0].status.phase}' + register: pod_phase + changed_when: false + retries: 30 + delay: 10 + until: pod_phase.stdout == "Running" + + - name: Report restarted + ansible.builtin.debug: + msg: >- + Validator restarted with new image/config. + Pod phase: {{ pod_phase.stdout }}. From ceea8f0572b93c5fc0e16ca33356b467a2badd09 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 06:37:45 +0000 Subject: [PATCH 55/62] fix: restart playbook preserves SSH agent and clears stale PV claimRefs Two fixes for biscayne-restart.yml: 1. ansible_become_flags: "-E" on the restart task preserves SSH_AUTH_SOCK through sudo so laconic-so can git pull the stack repo. 2. After restart, clear claimRef on any Released PVs. laconic-so restart deletes the namespace (cascading to PVCs) then recreates, but the PVs retain stale claimRefs that prevent new PVCs from binding. Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-restart.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/playbooks/biscayne-restart.yml b/playbooks/biscayne-restart.yml index fa6becd6..3bab69e8 100644 --- a/playbooks/biscayne-restart.yml +++ b/playbooks/biscayne-restart.yml @@ -87,9 +87,31 @@ {{ laconic_so }} deployment --dir {{ deployment_dir }} restart + vars: + # -E preserves SSH_AUTH_SOCK through sudo so laconic-so can git pull + ansible_become_flags: "-E" register: restart_result changed_when: true + # laconic-so restart deletes the namespace (dropping PVCs) then recreates. + # PVs survive but enter Released state with stale claimRefs. Clear them + # so the new PVCs can bind. + - name: Clear stale claimRefs on Released PVs + ansible.builtin.shell: + cmd: >- + set -o pipefail && + kubectl get pv -o json | + python3 -c " + import json, subprocess, sys; + pvs = json.load(sys.stdin)['items']; + [subprocess.run(['kubectl', 'patch', 'pv', pv['metadata']['name'], + '--type=json', '-p=[{\"op\":\"remove\",\"path\":\"/spec/claimRef\"}]'], + check=True) for pv in pvs if pv['status'].get('phase') == 'Released'] + " + executable: /bin/bash + changed_when: true + failed_when: false + # ---- verify -------------------------------------------------------------- - name: Wait for pod running ansible.builtin.command: > From a76431a5ddc5de2527b044a2b108eb4d35da2f3c Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 07:18:38 +0000 Subject: [PATCH 56/62] =?UTF-8?q?fix:=20spec.yml=20snapshot=20settings=20?= =?UTF-8?q?=E2=80=94=20retain=201,=20enable=20incrementals?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MAXIMUM_SNAPSHOTS_TO_RETAIN: 1 (was 5) NO_INCREMENTAL_SNAPSHOTS: false (was true) Removed SNAPSHOT_INTERVAL_SLOTS override (compose default 100000 is correct) Spec.yml overrides compose defaults, so changing compose was ineffective. Co-Authored-By: Claude Opus 4.6 --- agave-stack/deployment/spec.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/agave-stack/deployment/spec.yml b/agave-stack/deployment/spec.yml index bce46463..2d262074 100644 --- a/agave-stack/deployment/spec.yml +++ b/agave-stack/deployment/spec.yml @@ -96,9 +96,8 @@ config: EXPECTED_SHRED_VERSION: '50093' # Storage LIMIT_LEDGER_SIZE: '50000000' - SNAPSHOT_INTERVAL_SLOTS: '1000' - MAXIMUM_SNAPSHOTS_TO_RETAIN: '5' - NO_INCREMENTAL_SNAPSHOTS: 'true' + MAXIMUM_SNAPSHOTS_TO_RETAIN: '1' + NO_INCREMENTAL_SNAPSHOTS: 'false' RUST_LOG: info,solana_metrics=warn SOLANA_METRICS_CONFIG: host=http://localhost:8086,db=agave_metrics,u=admin,p=admin # Jito MEV (NY region shred receiver) — disabled until voting enabled From 9009fb03632b9411a5c84bcd218027aca60384c8 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 07:25:39 +0000 Subject: [PATCH 57/62] fix: build.sh must be executable for laconic-so build-containers Also fix --include filter: container name uses slash (laconicnetwork/agave) not dash (laconicnetwork-agave). The old filter silently skipped the build. Co-Authored-By: Claude Opus 4.6 --- .../container-build/laconicnetwork-agave/build.sh | 0 playbooks/biscayne-sync-tools.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 agave-stack/stack-orchestrator/container-build/laconicnetwork-agave/build.sh diff --git a/agave-stack/stack-orchestrator/container-build/laconicnetwork-agave/build.sh b/agave-stack/stack-orchestrator/container-build/laconicnetwork-agave/build.sh old mode 100644 new mode 100755 diff --git a/playbooks/biscayne-sync-tools.yml b/playbooks/biscayne-sync-tools.yml index dfecd340..149d9fdf 100644 --- a/playbooks/biscayne-sync-tools.yml +++ b/playbooks/biscayne-sync-tools.yml @@ -113,7 +113,7 @@ {{ laconic_so }} --stack {{ stack_path }} build-containers - --include laconicnetwork-agave + --include laconicnetwork/agave tags: - build-container - never From 6464492009a83564b4f9cb1703f509f38c2f49dd Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 08:04:29 +0000 Subject: [PATCH 58/62] fix: check-status.py smooth in-place redraw, remove comment bars - Use \033[H\033[J (home + clear-to-end) instead of just \033[H to prevent stale lines from previous frames persisting when output shrinks between refreshes. - Fix cursor restore on exit: was \033[?25l (hide) instead of \033[?25h (show), leaving terminal with invisible cursor. Co-Authored-By: Claude Opus 4.6 --- scripts/check-status.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/check-status.py b/scripts/check-status.py index a7d22157..c2887a4e 100755 --- a/scripts/check-status.py +++ b/scripts/check-status.py @@ -266,8 +266,8 @@ def display(watch: bool, prev_lines: int) -> int: cols = shutil.get_terminal_size().columns if watch: - # Move cursor to top-left without clearing — overwrite in place - sys.stdout.write("\033[H") + # Move home; clear from cursor to end of screen to wipe stale content + sys.stdout.write("\033[H\033[J") for line in output: # Pad to terminal width to overwrite stale characters from prior frame @@ -325,7 +325,7 @@ def main() -> int: finally: if args.watch: # Show cursor again - sys.stdout.write("\033[?25l\n") + sys.stdout.write("\033[?25h\n") sys.stdout.flush() return 0 From 12339ab46e8d8f4cd79504fe45c2c93827b4c5fa Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 08:05:41 +0000 Subject: [PATCH 59/62] pebbles: sync --- .pebbles/events.jsonl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl index 7d1397a3..b343c355 100644 --- a/.pebbles/events.jsonl +++ b/.pebbles/events.jsonl @@ -43,3 +43,10 @@ {"type":"create","timestamp":"2026-03-08T06:58:54.238136989Z","issue_id":"bar-b41","payload":{"description":"Automate the leapfrog recovery strategy documented in CLAUDE.md. When the\nvalidator is stuck in a repair-dependent gap, download a fresh snapshot past\nthe incomplete zone while preserving the existing ledger (which has turbine\nshreds at the tip). Needs: shred completeness check, snapshot slot targeting,\nselective wipe (accounts+snapshots only, keep ledger).","priority":"2","title":"snapshot leapfrog recovery playbook","type":"feature"}} {"type":"create","timestamp":"2026-03-08T06:58:54.756609299Z","issue_id":"bar-0b4","payload":{"description":"biscayne-prepare-agave.yml unconditionally imports ashburn-relay-biscayne.yml\nat the end. This couples filesystem preparation to relay setup. The relay\nplaybook fails if the kind node isn't running (ping to 172.20.0.2 fails).\nShould be a separate playbook invocation, not an import.","priority":"3","title":"biscayne-prepare-agave imports ashburn-relay-biscayne unconditionally","type":"bug"}} {"type":"close","timestamp":"2026-03-08T06:59:00.140156099Z","issue_id":"bar-02e","payload":{}} +{"type":"create","timestamp":"2026-03-10T08:05:07.190617713Z","issue_id":"bar-2c9","payload":{"description":"laconic-so build-containers --include filter does exact string match via\ninclude_exclude_check(). Container names use slash (laconicnetwork/agave),\nnot dash. Using --include laconicnetwork-agave silently skips the build\nand reports success.\n\nFixed in biscayne-sync-tools.yml (commit ceea8f0) but the underlying\nlaconic-so behavior of silently skipping with no warning is a bug.","priority":"2","title":"build-containers --include uses slash not dash in container names","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:12.506655809Z","issue_id":"bar-6cb","payload":{"description":"When laconic-so deployment restart deletes the namespace, PVCs are\ncascade-deleted but PVs (cluster-scoped) survive in Released state with\nstale claimRefs pointing to the old PVC UIDs. New PVCs created by the\nrestarted deployment can't bind because the PVs still reference the\ndeleted PVCs.\n\nWorkaround: patch Released PVs to clear claimRef after restart.\nAdded to biscayne-restart.yml. Root cause is in laconic-so — it should\nclear stale claimRefs as part of the restart flow.\n\nRelated: so-933 (namespace termination race).","priority":"1","title":"PV claimRefs go stale after deployment restart","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:15.941416301Z","issue_id":"bar-fec","payload":{"description":"monitoring-grafana-data volume is defined in spec.yml but laconic-so's\nget_pvcs() does not generate a PVC for it. The PV is created but no\nmatching PVC exists, so the grafana container can't mount its data volume.\n\nWorkaround: manually kubectl apply the PVC after each deployment restart.\nRoot cause is in stack-orchestrator deploy_k8s.py get_pvcs().","priority":"2","title":"grafana PVC not generated by get_pvcs()","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:22.853965263Z","issue_id":"bar-822","payload":{"description":"Rebuilding a container image on the Docker host does NOT update the image\ninside the kind node. With imagePullPolicy: IfNotPresent (the default for\n:local tags), kind uses its cached copy. Must run:\n\n kind load docker-image laconicnetwork/agave:local \\\n --name laconic-70ce4c4b47e23b85\n\nafter every rebuild. This step is not in any playbook or laconic-so flow.\nShould be added to biscayne-sync-tools.yml build-container tag or to\nlaconic-so build-containers itself.","priority":"2","title":"kind load docker-image required after container rebuild","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:28.585915055Z","issue_id":"bar-571","payload":{"description":"Full snapshot slots differ per validator depending on when each started.\nThe entrypoint's incremental download loop assumes it can find an\nincremental keyed to any full snapshot's base slot, but no other validator\nmay have produced a full at that exact slot.\n\nThis causes the incremental download to retry forever when the local\nfull snapshot has a base slot that no network peer has incrementals for.\n\nDocumented for awareness. The entrypoint's infinite retry is intentional\n(user decision) — eventually a matching incremental will appear or the\nentrypoint falls through to download a fresh full+incremental pair.","priority":"3","title":"snapshot base slots are not consensus-aligned across validators","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:32.262889286Z","issue_id":"bar-2d9","payload":{"description":"When spec.yml has explicit values for env vars that also have defaults in\nthe compose file, the spec.yml values win. Changing compose file defaults\nhas no effect unless the spec.yml override is also removed.\n\nThis is by design (spec.yml is deployment-specific config) but the\ninteraction is non-obvious. Bit us when changing snapshot settings in\ncompose but spec.yml still had the old values.\n\nNot a code bug — more a documentation/workflow issue. Operators must\ncheck both compose defaults and spec.yml overrides.","priority":"3","title":"spec.yml overrides compose defaults silently","type":"bug"}} +{"type":"create","timestamp":"2026-03-10T08:05:36.212405156Z","issue_id":"bar-31a","payload":{"description":"laconic-so deployment restart sleeps only 5s between down and up. If the\nnamespace is still terminating when 'up' runs, k8s returns 403 Forbidden\ncreating configmaps in the new namespace.\n\nCross-ref: so-933 in the stack-orchestrator pebbles project.\n\nWorkaround: retry the restart or wait manually. The restart playbook\n(biscayne-restart.yml) handles this by scaling to 0 first, waiting for\npod termination, then calling laconic-so restart.","priority":"1","title":"deployment restart namespace termination race","type":"bug"}} From 7f122709393b1ace13a5d1c2d5ab722b2aeeeb52 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 08:33:45 +0000 Subject: [PATCH 60/62] bar-6cb: fix PV claimRef, namespace race, and PVC creation resilience Three related fixes in the k8s deployer restart/up flow: 1. Clear stale claimRefs on Released PVs (_clear_released_pv_claim_refs): After namespace deletion, PVs survive in Released state with claimRefs pointing to deleted PVC UIDs. New PVCs can't bind until the stale claimRef is removed. Now clears them before PVC creation. 2. Wait for namespace termination (_wait_for_namespace_deletion): _ensure_namespace() now detects a terminating namespace and polls until deletion completes (up to 120s) before creating the new one. Replaces the racy 5s sleep in deployment restart. 3. Resilient PVC creation: wrap each PVC creation in error handling so one failure doesn't prevent subsequent PVCs from being attempted. All errors are collected and reported together. Closes: bar-6cb, bar-31a, bar-fec Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/deployment.py | 6 +- stack_orchestrator/deploy/k8s/deploy_k8s.py | 109 ++++++++++++++++++-- 2 files changed, 104 insertions(+), 11 deletions(-) diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index 0dc9ac37..a8f2f88a 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -17,7 +17,7 @@ import click from pathlib import Path import subprocess import sys -import time + from stack_orchestrator import constants from stack_orchestrator.deploy.images import push_images_operation from stack_orchestrator.deploy.deploy import ( @@ -412,8 +412,8 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip): ctx, delete_volumes=False, extra_args_list=[], skip_cluster_management=True ) - # Brief pause to ensure clean shutdown - time.sleep(5) + # Namespace deletion wait is handled by _ensure_namespace() in + # the deployer — no fixed sleep needed here. # Start deployment up_operation( diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 1eee8ffd..d1e51ddb 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -12,6 +12,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import time from datetime import datetime, timezone from pathlib import Path @@ -153,10 +154,20 @@ class K8sDeployer(Deployer): self.custom_obj_api = client.CustomObjectsApi() def _ensure_namespace(self): - """Create the deployment namespace if it doesn't exist.""" + """Create the deployment namespace if it doesn't exist. + + If the namespace exists but is terminating (e.g., from a prior + down() call), wait for deletion to complete before creating a + fresh namespace. K8s rejects resource creation in a terminating + namespace with 403 Forbidden, so proceeding without waiting + causes PVC/ConfigMap creation failures. + """ if opts.o.dry_run: print(f"Dry run: would create namespace {self.k8s_namespace}") return + + self._wait_for_namespace_deletion() + try: self.core_api.read_namespace(name=self.k8s_namespace) if opts.o.debug: @@ -176,6 +187,38 @@ class K8sDeployer(Deployer): else: raise + def _wait_for_namespace_deletion(self): + """Block until the namespace is fully deleted, if it is terminating. + + Polls every 2s for up to 120s. If the namespace does not exist + (404) or is active, returns immediately. + """ + deadline = time.monotonic() + 120 + while True: + try: + ns = self.core_api.read_namespace(name=self.k8s_namespace) + except ApiException as e: + if e.status == 404: + return # Gone — ready to create + raise + + phase = ns.status.phase if ns.status else None + if phase != "Terminating": + return # Active or unknown — proceed + + if time.monotonic() > deadline: + error_exit( + f"Namespace {self.k8s_namespace} still terminating " + f"after 120s — cannot proceed" + ) + + if opts.o.debug: + print( + f"Namespace {self.k8s_namespace} is terminating, " + f"waiting for deletion..." + ) + time.sleep(2) + def _delete_namespace(self): """Delete the deployment namespace and all resources within it.""" if opts.o.dry_run: @@ -310,6 +353,36 @@ class K8sDeployer(Deployer): else: raise + def _clear_released_pv_claim_refs(self): + """Patch any Released PVs for this deployment to clear stale claimRefs. + + After a namespace is deleted, PVCs are cascade-deleted but + cluster-scoped PVs survive in Released state with claimRefs + pointing to the now-deleted PVC UIDs. New PVCs cannot bind + to these PVs until the stale claimRef is removed. + """ + try: + pvs = self.core_api.list_persistent_volume( + label_selector=f"app={self.cluster_info.app_name}" + ) + except ApiException: + return + + for pv in pvs.items: + phase = pv.status.phase if pv.status else None + if phase == "Released" and pv.spec and pv.spec.claim_ref: + pv_name = pv.metadata.name + if opts.o.debug: + old_ref = pv.spec.claim_ref + print( + f"Clearing stale claimRef on PV {pv_name} " + f"(was {old_ref.namespace}/{old_ref.name})" + ) + self.core_api.patch_persistent_volume( + name=pv_name, + body={"spec": {"claimRef": None}}, + ) + def _create_volume_data(self): # Create the host-path-mounted PVs for this deployment pvs = self.cluster_info.get_pvs() @@ -335,8 +408,14 @@ class K8sDeployer(Deployer): print("PVs created:") print(f"{pv_resp}") + # After PV creation/verification, clear stale claimRefs on any + # Released PVs so that new PVCs can bind to them. + if not opts.o.dry_run: + self._clear_released_pv_claim_refs() + # Figure out the PVCs for this deployment pvcs = self.cluster_info.get_pvcs() + pvc_errors = [] for pvc in pvcs: if opts.o.debug: print(f"Sending this pvc: {pvc}") @@ -355,12 +434,23 @@ class K8sDeployer(Deployer): if e.status != 404: raise - pvc_resp = self.core_api.create_namespaced_persistent_volume_claim( - body=pvc, namespace=self.k8s_namespace - ) - if opts.o.debug: - print("PVCs created:") - print(f"{pvc_resp}") + try: + pvc_resp = self.core_api.create_namespaced_persistent_volume_claim( + body=pvc, namespace=self.k8s_namespace + ) + if opts.o.debug: + print("PVCs created:") + print(f"{pvc_resp}") + except ApiException as e: + pvc_name = pvc.metadata.name + print(f"Error creating PVC {pvc_name}: {e.reason}") + pvc_errors.append(pvc_name) + + if pvc_errors: + error_exit( + f"Failed to create PVCs: {', '.join(pvc_errors)}. " + f"Check namespace state and PV availability." + ) # Figure out the ConfigMaps for this deployment config_maps = self.cluster_info.get_configmaps() @@ -422,7 +512,10 @@ class K8sDeployer(Deployer): self._create_deployment() def _setup_cluster_and_namespace(self, skip_cluster_management): - """Create kind cluster (if needed) and namespace. Shared by up() and prepare().""" + """Create kind cluster (if needed) and namespace. + + Shared by up() and prepare(). + """ self.skip_cluster_management = skip_cluster_management if not opts.o.dry_run: if self.is_kind() and not self.skip_cluster_management: From 8119b25add24dead0f1c278590fe2a97a6b389da Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 08:37:53 +0000 Subject: [PATCH 61/62] bar-822: replace kind load with local registry for image loading kind load docker-image serializes the full image (docker save | ctr import), taking 5-10 minutes per cluster recreate. Replace with a persistent local registry (registry:2 on port 5001) that survives kind cluster deletes. stack-orchestrator changes: - helpers.py: replace load_images_into_kind() with ensure_local_registry(), connect_registry_to_kind_network(), push_images_to_local_registry() - helpers.py: add registry mirror to containerdConfigPatches so kind nodes pull from localhost:5001 via the kind-registry container - deploy_k8s.py: rewrite local container image refs to localhost:5001/... so containerd pulls from the registry instead of local store Ansible changes: - biscayne-sync-tools.yml: ensure registry container before build, then tag+push to local registry after build (build-container tag) Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/deploy_k8s.py | 32 ++++- stack_orchestrator/deploy/k8s/helpers.py | 125 +++++++++++++++++--- 2 files changed, 136 insertions(+), 21 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 1eee8ffd..a4c2465f 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -24,7 +24,10 @@ from stack_orchestrator.deploy.deployer import Deployer, DeployerConfigGenerator from stack_orchestrator.deploy.k8s.helpers import ( create_cluster, destroy_cluster, - load_images_into_kind, + ensure_local_registry, + connect_registry_to_kind_network, + push_images_to_local_registry, + local_registry_image, ) from stack_orchestrator.deploy.k8s.helpers import ( install_ingress_for_kind, @@ -375,11 +378,29 @@ class K8sDeployer(Deployer): deployment = self.cluster_info.get_deployment( image_pull_policy=None if self.is_kind() else "Always" ) + if self.is_kind(): + self._rewrite_local_images(deployment) if opts.o.debug: print(f"Sending this deployment: {deployment}") if not opts.o.dry_run: self._ensure_deployment(deployment) + def _rewrite_local_images(self, deployment): + """Rewrite local container images to use the local registry. + + Images built locally (listed in stack.yml containers) are pushed to + localhost:5001 by push_images_to_local_registry(). The k8s pod spec + must reference them at that address so containerd pulls from the + local registry instead of trying to find them in its local store. + """ + local_containers = self.deployment_context.stack.obj.get("containers", []) + if not local_containers: + return + containers = deployment.spec.template.spec.containers or [] + for container in containers: + if any(c in container.image for c in local_containers): + container.image = local_registry_image(container.image) + def _find_certificate_for_host_name(self, host_name): all_certificates = self.custom_obj_api.list_namespaced_custom_object( group="cert-manager.io", @@ -422,16 +443,21 @@ class K8sDeployer(Deployer): self._create_deployment() def _setup_cluster_and_namespace(self, skip_cluster_management): - """Create kind cluster (if needed) and namespace. Shared by up() and prepare().""" + """Create kind cluster (if needed) and namespace. + + Shared by up() and prepare(). + """ self.skip_cluster_management = skip_cluster_management if not opts.o.dry_run: if self.is_kind() and not self.skip_cluster_management: + ensure_local_registry() kind_config = str( self.deployment_dir.joinpath(constants.kind_config_filename) ) actual_cluster = create_cluster(self.kind_cluster_name, kind_config) if actual_cluster != self.kind_cluster_name: self.kind_cluster_name = actual_cluster + connect_registry_to_kind_network(self.kind_cluster_name) local_containers = self.deployment_context.stack.obj.get( "containers", [] ) @@ -442,7 +468,7 @@ class K8sDeployer(Deployer): if any(c in img for c in local_containers) } if local_images: - load_images_into_kind(self.kind_cluster_name, local_images) + push_images_to_local_registry(local_images) self.connect_api() self._ensure_namespace() if self.is_kind() and not self.skip_cluster_management: diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 85f3d5f7..f8c11414 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -400,13 +400,84 @@ def install_ingress_for_kind(acme_email: str = ""): ) -def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]): - for image in image_set: +LOCAL_REGISTRY_NAME = "kind-registry" +LOCAL_REGISTRY_HOST_PORT = 5001 +LOCAL_REGISTRY_CONTAINER_PORT = 5000 + + +def ensure_local_registry(): + """Ensure a persistent local registry container is running. + + The registry survives kind cluster recreates — images pushed to it + remain available without re-pushing. After ensuring the registry is + running, connects it to the kind Docker network so kind nodes can + pull from it. + """ + # Check if registry container exists (running or stopped) + check = subprocess.run( + f"docker inspect {LOCAL_REGISTRY_NAME}", + shell=True, + capture_output=True, + ) + if check.returncode != 0: + # Create the registry container result = _run_command( - f"kind load docker-image {image} --name {kind_cluster_name}" + f"docker run -d --restart=always" + f" -p {LOCAL_REGISTRY_HOST_PORT}:{LOCAL_REGISTRY_CONTAINER_PORT}" + f" --name {LOCAL_REGISTRY_NAME} registry:2" ) if result.returncode != 0: - raise DeployerException(f"kind load docker-image failed: {result}") + raise DeployerException(f"Failed to start local registry: {result}") + print(f"Started local registry on port {LOCAL_REGISTRY_HOST_PORT}") + else: + # Ensure it's running (may have been stopped) + _run_command(f"docker start {LOCAL_REGISTRY_NAME}") + if opts.o.debug: + print("Local registry already exists, ensured running") + + +def connect_registry_to_kind_network(kind_cluster_name: str): + """Connect the local registry to the kind Docker network. + + Idempotent — silently succeeds if already connected. + """ + network = "kind" + result = subprocess.run( + f"docker network connect {network} {LOCAL_REGISTRY_NAME}", + shell=True, + capture_output=True, + ) + if result.returncode != 0 and b"already exists" not in result.stderr: + raise DeployerException( + f"Failed to connect registry to kind network: " f"{result.stderr.decode()}" + ) + + +def push_images_to_local_registry(image_set: Set[str]): + """Tag and push images to the local registry. + + Near-instant compared to kind load (shared filesystem, layer dedup). + """ + for image in image_set: + registry_image = local_registry_image(image) + tag_result = _run_command(f"docker tag {image} {registry_image}") + if tag_result.returncode != 0: + raise DeployerException(f"docker tag failed for {image}: {tag_result}") + push_result = _run_command(f"docker push {registry_image}") + if push_result.returncode != 0: + raise DeployerException( + f"docker push failed for {registry_image}: {push_result}" + ) + if opts.o.debug: + print(f"Pushed {registry_image} to local registry") + + +def local_registry_image(image: str) -> str: + """Rewrite an image reference to use the local registry. + + e.g. laconicnetwork/agave:local -> localhost:5001/laconicnetwork/agave:local + """ + return f"localhost:{LOCAL_REGISTRY_HOST_PORT}/{image}" def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): @@ -906,24 +977,42 @@ def generate_cri_base_json(): def _generate_containerd_config_patches( deployment_dir: Path, has_high_memlock: bool ) -> str: - """Generate containerdConfigPatches YAML for custom runtime handlers. + """Generate containerdConfigPatches YAML for containerd configuration. - This configures containerd to have a runtime handler named 'high-memlock' - that uses a custom OCI base spec with unlimited RLIMIT_MEMLOCK. + Includes: + - Local registry mirror (localhost:5001 -> http://kind-registry:5000) + - Custom runtime handler for high-memlock (if enabled) """ - if not has_high_memlock: + patches = [] + + # Always configure the local registry mirror so kind nodes pull from it + registry_plugin = ( + 'plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:{}"'.format( + LOCAL_REGISTRY_HOST_PORT + ) + ) + endpoint = f"http://{LOCAL_REGISTRY_NAME}:{LOCAL_REGISTRY_CONTAINER_PORT}" + patches.append(f" [{registry_plugin}]\n" f' endpoint = ["{endpoint}"]') + + if has_high_memlock: + spec_path = deployment_dir.joinpath( + constants.high_memlock_spec_filename + ).resolve() + runtime_name = constants.high_memlock_runtime + plugin_path = 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes' + patches.append( + f" [{plugin_path}.{runtime_name}]\n" + ' runtime_type = "io.containerd.runc.v2"\n' + f' base_runtime_spec = "{spec_path}"' + ) + + if not patches: return "" - spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve() - runtime_name = constants.high_memlock_runtime - plugin_path = 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes' - return ( - "containerdConfigPatches:\n" - " - |-\n" - f" [{plugin_path}.{runtime_name}]\n" - ' runtime_type = "io.containerd.runc.v2"\n' - f' base_runtime_spec = "{spec_path}"\n' - ) + result = "containerdConfigPatches:\n" + for patch in patches: + result += " - |-\n" + patch + "\n" + return result # Note: this makes any duplicate definition in b overwrite a From fdde3be5c8b134af76747fe63c99304e92d1afd7 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 14:56:22 +0000 Subject: [PATCH 62/62] fix: add pre-commit hooks and fix all lint/type/format errors Process bug fix: no pre-commit existed for this repo's Python code. Added pyproject.toml with unified dependencies (ruff, mypy, ansible-lint), .pre-commit-config.yaml with repo-based hooks (ruff) and local uv-run hooks (mypy, ansible-lint). Fixed 249 ruff errors (B023, B904, B006, B007, UP008, UP031, C408), ~13 mypy type errors, 11 ansible-lint violations, and ruff-format across all Python files including stack-orchestrator subtree. Co-Authored-By: Claude Opus 4.6 --- .gitea/workflows/triggers/test-database | 2 +- setup.py | 8 +- stack_orchestrator/base.py | 4 +- stack_orchestrator/build/build_containers.py | 53 ++--- stack_orchestrator/build/build_npms.py | 47 ++--- stack_orchestrator/build/build_types.py | 3 +- stack_orchestrator/build/build_util.py | 4 +- stack_orchestrator/build/build_webapp.py | 20 +- stack_orchestrator/build/fetch_containers.py | 37 ++-- stack_orchestrator/build/publish.py | 1 + .../keycloak-mirror/keycloak-mirror.py | 23 +-- .../genesis/accounts/mnemonic_to_csv.py | 13 +- .../stacks/mainnet-blast/deploy/commands.py | 3 +- .../stacks/mainnet-eth/deploy/commands.py | 4 +- .../stacks/mainnet-laconic/deploy/commands.py | 89 +++----- .../data/stacks/test/deploy/commands.py | 7 +- .../deploy/compose/deploy_docker.py | 49 ++--- stack_orchestrator/deploy/deploy.py | 106 ++++------ stack_orchestrator/deploy/deploy_types.py | 17 +- stack_orchestrator/deploy/deploy_util.py | 17 +- stack_orchestrator/deploy/deployer.py | 11 +- stack_orchestrator/deploy/deployer_factory.py | 13 +- stack_orchestrator/deploy/deployment.py | 59 ++---- .../deploy/deployment_context.py | 8 +- .../deploy/deployment_create.py | 191 +++++++----------- stack_orchestrator/deploy/dns_probe.py | 11 +- stack_orchestrator/deploy/images.py | 27 +-- stack_orchestrator/deploy/k8s/cluster_info.py | 96 ++++----- stack_orchestrator/deploy/k8s/deploy_k8s.py | 95 +++------ .../deploy/k8s/helm/chart_generator.py | 39 ++-- .../deploy/k8s/helm/job_runner.py | 27 +-- .../deploy/k8s/helm/kompose_wrapper.py | 16 +- stack_orchestrator/deploy/k8s/helpers.py | 143 +++++-------- stack_orchestrator/deploy/spec.py | 56 ++--- stack_orchestrator/deploy/stack.py | 5 +- .../deploy/webapp/deploy_webapp.py | 25 +-- .../webapp/deploy_webapp_from_registry.py | 138 ++++--------- .../webapp/handle_deployment_auction.py | 28 +-- .../webapp/publish_deployment_auction.py | 4 +- .../deploy/webapp/publish_webapp_deployer.py | 18 +- .../deploy/webapp/registry_mutex.py | 8 +- .../webapp/request_webapp_deployment.py | 49 ++--- .../webapp/request_webapp_undeployment.py | 12 +- .../deploy/webapp/run_webapp.py | 1 + .../webapp/undeploy_webapp_from_registry.py | 61 ++---- stack_orchestrator/deploy/webapp/util.py | 114 ++++------- stack_orchestrator/main.py | 24 +-- stack_orchestrator/repos/fetch_stack.py | 6 +- .../repos/setup_repositories.py | 61 ++---- stack_orchestrator/update.py | 22 +- stack_orchestrator/util.py | 35 ++-- stack_orchestrator/version.py | 3 +- 52 files changed, 692 insertions(+), 1221 deletions(-) diff --git a/.gitea/workflows/triggers/test-database b/.gitea/workflows/triggers/test-database index f867b40b..0232087b 100644 --- a/.gitea/workflows/triggers/test-database +++ b/.gitea/workflows/triggers/test-database @@ -1,2 +1,2 @@ -Change this file to trigger running the test-database CI job +Change this file to trigger running the test-database CI job Trigger test run diff --git a/setup.py b/setup.py index b295802f..32efec3a 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,12 @@ # See # https://medium.com/nerd-for-tech/how-to-build-and-distribute-a-cli-tool-with-python-537ae41d9d78 -from setuptools import setup, find_packages +from setuptools import find_packages, setup -with open("README.md", "r", encoding="utf-8") as fh: +with open("README.md", encoding="utf-8") as fh: long_description = fh.read() -with open("requirements.txt", "r", encoding="utf-8") as fh: +with open("requirements.txt", encoding="utf-8") as fh: requirements = fh.read() -with open("stack_orchestrator/data/version.txt", "r", encoding="utf-8") as fh: +with open("stack_orchestrator/data/version.txt", encoding="utf-8") as fh: version = fh.readlines()[-1].strip(" \n") setup( name="laconic-stack-orchestrator", diff --git a/stack_orchestrator/base.py b/stack_orchestrator/base.py index eb4b7e77..0a14008c 100644 --- a/stack_orchestrator/base.py +++ b/stack_orchestrator/base.py @@ -15,9 +15,11 @@ import os from abc import ABC, abstractmethod -from stack_orchestrator.deploy.deploy import get_stack_status + from decouple import config +from stack_orchestrator.deploy.deploy import get_stack_status + def get_stack(config, stack): if stack == "package-registry": diff --git a/stack_orchestrator/build/build_containers.py b/stack_orchestrator/build/build_containers.py index 4717b7a6..9a9ded26 100644 --- a/stack_orchestrator/build/build_containers.py +++ b/stack_orchestrator/build/build_containers.py @@ -22,17 +22,19 @@ # allow re-build of either all or specific containers import os -import sys -from decouple import config import subprocess -import click +import sys from pathlib import Path -from stack_orchestrator.opts import opts -from stack_orchestrator.util import include_exclude_check, stack_is_external, error_exit + +import click +from decouple import config + from stack_orchestrator.base import get_npm_registry_url from stack_orchestrator.build.build_types import BuildContext -from stack_orchestrator.build.publish import publish_image from stack_orchestrator.build.build_util import get_containers_in_scope +from stack_orchestrator.build.publish import publish_image +from stack_orchestrator.opts import opts +from stack_orchestrator.util import error_exit, include_exclude_check, stack_is_external # TODO: find a place for this # epilog="Config provided either in .env or settings.ini or env vars: @@ -59,9 +61,7 @@ def make_container_build_env( container_build_env.update({"CERC_SCRIPT_DEBUG": "true"} if debug else {}) container_build_env.update({"CERC_FORCE_REBUILD": "true"} if force_rebuild else {}) container_build_env.update( - {"CERC_CONTAINER_EXTRA_BUILD_ARGS": extra_build_args} - if extra_build_args - else {} + {"CERC_CONTAINER_EXTRA_BUILD_ARGS": extra_build_args} if extra_build_args else {} ) docker_host_env = os.getenv("DOCKER_HOST") if docker_host_env: @@ -81,12 +81,8 @@ def process_container(build_context: BuildContext) -> bool: # Check if this is in an external stack if stack_is_external(build_context.stack): - container_parent_dir = Path(build_context.stack).parent.parent.joinpath( - "container-build" - ) - temp_build_dir = container_parent_dir.joinpath( - build_context.container.replace("/", "-") - ) + container_parent_dir = Path(build_context.stack).parent.parent.joinpath("container-build") + temp_build_dir = container_parent_dir.joinpath(build_context.container.replace("/", "-")) temp_build_script_filename = temp_build_dir.joinpath("build.sh") # Now check if the container exists in the external stack. if not temp_build_script_filename.exists(): @@ -104,18 +100,13 @@ def process_container(build_context: BuildContext) -> bool: build_command = build_script_filename.as_posix() else: if opts.o.verbose: - print( - f"No script file found: {build_script_filename}, " - "using default build script" - ) + print(f"No script file found: {build_script_filename}, " "using default build script") repo_dir = build_context.container.split("/")[1] # TODO: make this less of a hack -- should be specified in # some metadata somewhere. Check if we have a repo for this # container. If not, set the context dir to container-build subdir repo_full_path = os.path.join(build_context.dev_root_path, repo_dir) - repo_dir_or_build_dir = ( - repo_full_path if os.path.exists(repo_full_path) else build_dir - ) + repo_dir_or_build_dir = repo_full_path if os.path.exists(repo_full_path) else build_dir build_command = ( os.path.join(build_context.container_build_dir, "default-build.sh") + f" {default_container_tag} {repo_dir_or_build_dir}" @@ -159,9 +150,7 @@ def process_container(build_context: BuildContext) -> bool: default=False, help="Publish the built images in the specified image registry", ) -@click.option( - "--image-registry", help="Specify the image registry for --publish-images" -) +@click.option("--image-registry", help="Specify the image registry for --publish-images") @click.pass_context def command( ctx, @@ -185,14 +174,9 @@ def command( if local_stack: dev_root_path = os.getcwd()[0 : os.getcwd().rindex("stack-orchestrator")] - print( - f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " - f"{dev_root_path}" - ) + print(f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " f"{dev_root_path}") else: - dev_root_path = os.path.expanduser( - config("CERC_REPO_BASE_DIR", default="~/cerc") - ) + dev_root_path = os.path.expanduser(config("CERC_REPO_BASE_DIR", default="~/cerc")) if not opts.o.quiet: print(f"Dev Root is: {dev_root_path}") @@ -230,10 +214,7 @@ def command( else: print(f"Error running build for {build_context.container}") if not opts.o.continue_on_error: - error_exit( - "container build failed and --continue-on-error " - "not set, exiting" - ) + error_exit("container build failed and --continue-on-error " "not set, exiting") sys.exit(1) else: print( diff --git a/stack_orchestrator/build/build_npms.py b/stack_orchestrator/build/build_npms.py index 00992546..ba82a93a 100644 --- a/stack_orchestrator/build/build_npms.py +++ b/stack_orchestrator/build/build_npms.py @@ -18,15 +18,17 @@ # env vars: # CERC_REPO_BASE_DIR defaults to ~/cerc +import importlib.resources import os import sys -from shutil import rmtree, copytree -from decouple import config +from shutil import copytree, rmtree + import click -import importlib.resources -from python_on_whales import docker, DockerException +from decouple import config +from python_on_whales import DockerException, docker + from stack_orchestrator.base import get_stack -from stack_orchestrator.util import include_exclude_check, get_parsed_stack_config +from stack_orchestrator.util import get_parsed_stack_config, include_exclude_check builder_js_image_name = "cerc/builder-js:local" @@ -70,14 +72,9 @@ def command(ctx, include, exclude, force_rebuild, extra_build_args): if local_stack: dev_root_path = os.getcwd()[0 : os.getcwd().rindex("stack-orchestrator")] - print( - f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " - f"{dev_root_path}" - ) + print(f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " f"{dev_root_path}") else: - dev_root_path = os.path.expanduser( - config("CERC_REPO_BASE_DIR", default="~/cerc") - ) + dev_root_path = os.path.expanduser(config("CERC_REPO_BASE_DIR", default="~/cerc")) build_root_path = os.path.join(dev_root_path, "build-trees") @@ -94,9 +91,7 @@ def command(ctx, include, exclude, force_rebuild, extra_build_args): # See: https://stackoverflow.com/a/20885799/1701505 from stack_orchestrator import data - with importlib.resources.open_text( - data, "npm-package-list.txt" - ) as package_list_file: + with importlib.resources.open_text(data, "npm-package-list.txt") as package_list_file: all_packages = package_list_file.read().splitlines() packages_in_scope = [] @@ -132,8 +127,7 @@ def command(ctx, include, exclude, force_rebuild, extra_build_args): build_command = [ "sh", "-c", - "cd /workspace && " - f"build-npm-package-local-dependencies.sh {npm_registry_url}", + "cd /workspace && " f"build-npm-package-local-dependencies.sh {npm_registry_url}", ] if not dry_run: if verbose: @@ -151,9 +145,7 @@ def command(ctx, include, exclude, force_rebuild, extra_build_args): envs.update({"CERC_SCRIPT_DEBUG": "true"} if debug else {}) envs.update({"CERC_FORCE_REBUILD": "true"} if force_rebuild else {}) envs.update( - {"CERC_CONTAINER_EXTRA_BUILD_ARGS": extra_build_args} - if extra_build_args - else {} + {"CERC_CONTAINER_EXTRA_BUILD_ARGS": extra_build_args} if extra_build_args else {} ) try: docker.run( @@ -176,16 +168,10 @@ def command(ctx, include, exclude, force_rebuild, extra_build_args): except DockerException as e: print(f"Error executing build for {package} in container:\n {e}") if not continue_on_error: - print( - "FATAL Error: build failed and --continue-on-error " - "not set, exiting" - ) + print("FATAL Error: build failed and --continue-on-error " "not set, exiting") sys.exit(1) else: - print( - "****** Build Error, continuing because " - "--continue-on-error is set" - ) + print("****** Build Error, continuing because " "--continue-on-error is set") else: print("Skipped") @@ -203,10 +189,7 @@ def _ensure_prerequisites(): # Tell the user how to build it if not images = docker.image.list(builder_js_image_name) if len(images) == 0: - print( - f"FATAL: builder image: {builder_js_image_name} is required " - "but was not found" - ) + print(f"FATAL: builder image: {builder_js_image_name} is required " "but was not found") print( "Please run this command to create it: " "laconic-so --stack build-support build-containers" diff --git a/stack_orchestrator/build/build_types.py b/stack_orchestrator/build/build_types.py index 53b24932..4aacd024 100644 --- a/stack_orchestrator/build/build_types.py +++ b/stack_orchestrator/build/build_types.py @@ -16,7 +16,6 @@ from dataclasses import dataclass from pathlib import Path -from typing import Mapping @dataclass @@ -24,5 +23,5 @@ class BuildContext: stack: str container: str container_build_dir: Path - container_build_env: Mapping[str, str] + container_build_env: dict[str, str] dev_root_path: str diff --git a/stack_orchestrator/build/build_util.py b/stack_orchestrator/build/build_util.py index a8a0c395..fe1a7742 100644 --- a/stack_orchestrator/build/build_util.py +++ b/stack_orchestrator/build/build_util.py @@ -30,9 +30,7 @@ def get_containers_in_scope(stack: str): # See: https://stackoverflow.com/a/20885799/1701505 from stack_orchestrator import data - with importlib.resources.open_text( - data, "container-image-list.txt" - ) as container_list_file: + with importlib.resources.open_text(data, "container-image-list.txt") as container_list_file: containers_in_scope = container_list_file.read().splitlines() if opts.o.verbose: diff --git a/stack_orchestrator/build/build_webapp.py b/stack_orchestrator/build/build_webapp.py index f204df82..2037e449 100644 --- a/stack_orchestrator/build/build_webapp.py +++ b/stack_orchestrator/build/build_webapp.py @@ -23,20 +23,19 @@ import os import sys - -from decouple import config -import click from pathlib import Path + +import click +from decouple import config + from stack_orchestrator.build import build_containers -from stack_orchestrator.deploy.webapp.util import determine_base_container, TimedLogger from stack_orchestrator.build.build_types import BuildContext +from stack_orchestrator.deploy.webapp.util import TimedLogger, determine_base_container @click.command() @click.option("--base-container") -@click.option( - "--source-repo", help="directory containing the webapp to build", required=True -) +@click.option("--source-repo", help="directory containing the webapp to build", required=True) @click.option( "--force-rebuild", is_flag=True, @@ -64,13 +63,10 @@ def command(ctx, base_container, source_repo, force_rebuild, extra_build_args, t if local_stack: dev_root_path = os.getcwd()[0 : os.getcwd().rindex("stack-orchestrator")] logger.log( - f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " - f"{dev_root_path}" + f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " f"{dev_root_path}" ) else: - dev_root_path = os.path.expanduser( - config("CERC_REPO_BASE_DIR", default="~/cerc") - ) + dev_root_path = os.path.expanduser(config("CERC_REPO_BASE_DIR", default="~/cerc")) if verbose: logger.log(f"Dev Root is: {dev_root_path}") diff --git a/stack_orchestrator/build/fetch_containers.py b/stack_orchestrator/build/fetch_containers.py index e0f31dd0..96c6f2e7 100644 --- a/stack_orchestrator/build/fetch_containers.py +++ b/stack_orchestrator/build/fetch_containers.py @@ -13,19 +13,19 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import click -from dataclasses import dataclass import json import platform +from dataclasses import dataclass + +import click +import requests from python_on_whales import DockerClient from python_on_whales.components.manifest.cli_wrapper import ManifestCLI, ManifestList from python_on_whales.utils import run -import requests -from typing import List -from stack_orchestrator.opts import opts -from stack_orchestrator.util import include_exclude_check, error_exit from stack_orchestrator.build.build_util import get_containers_in_scope +from stack_orchestrator.opts import opts +from stack_orchestrator.util import error_exit, include_exclude_check # Experimental fetch-container command @@ -55,7 +55,7 @@ def _local_tag_for(container: str): # $ curl -u "my-username:my-token" -X GET \ # "https:///v2/cerc-io/cerc/test-container/tags/list" # {"name":"cerc-io/cerc/test-container","tags":["202402232130","202402232208"]} -def _get_tags_for_container(container: str, registry_info: RegistryInfo) -> List[str]: +def _get_tags_for_container(container: str, registry_info: RegistryInfo) -> list[str]: # registry looks like: git.vdb.to/cerc-io registry_parts = registry_info.registry.split("/") url = f"https://{registry_parts[0]}/v2/{registry_parts[1]}/{container}/tags/list" @@ -68,16 +68,15 @@ def _get_tags_for_container(container: str, registry_info: RegistryInfo) -> List tag_info = response.json() if opts.o.debug: print(f"container tags list: {tag_info}") - tags_array = tag_info["tags"] + tags_array: list[str] = tag_info["tags"] return tags_array else: error_exit( - f"failed to fetch tags from image registry, " - f"status code: {response.status_code}" + f"failed to fetch tags from image registry, " f"status code: {response.status_code}" ) -def _find_latest(candidate_tags: List[str]): +def _find_latest(candidate_tags: list[str]): # Lex sort should give us the latest first sorted_candidates = sorted(candidate_tags) if opts.o.debug: @@ -86,8 +85,8 @@ def _find_latest(candidate_tags: List[str]): def _filter_for_platform( - container: str, registry_info: RegistryInfo, tag_list: List[str] -) -> List[str]: + container: str, registry_info: RegistryInfo, tag_list: list[str] +) -> list[str]: filtered_tags = [] this_machine = platform.machine() # Translate between Python and docker platform names @@ -151,15 +150,9 @@ def _add_local_tag(remote_tag: str, registry: str, local_tag: str): default=False, help="Overwrite a locally built image, if present", ) -@click.option( - "--image-registry", required=True, help="Specify the image registry to fetch from" -) -@click.option( - "--registry-username", required=True, help="Specify the image registry username" -) -@click.option( - "--registry-token", required=True, help="Specify the image registry access token" -) +@click.option("--image-registry", required=True, help="Specify the image registry to fetch from") +@click.option("--registry-username", required=True, help="Specify the image registry username") +@click.option("--registry-token", required=True, help="Specify the image registry access token") @click.pass_context def command( ctx, diff --git a/stack_orchestrator/build/publish.py b/stack_orchestrator/build/publish.py index 78059680..b1b72684 100644 --- a/stack_orchestrator/build/publish.py +++ b/stack_orchestrator/build/publish.py @@ -14,6 +14,7 @@ # along with this program. If not, see . from datetime import datetime + from python_on_whales import DockerClient from stack_orchestrator.opts import opts diff --git a/stack_orchestrator/data/config/mainnet-eth-keycloak/scripts/keycloak-mirror/keycloak-mirror.py b/stack_orchestrator/data/config/mainnet-eth-keycloak/scripts/keycloak-mirror/keycloak-mirror.py index 9c4bd78e..1f19651b 100755 --- a/stack_orchestrator/data/config/mainnet-eth-keycloak/scripts/keycloak-mirror/keycloak-mirror.py +++ b/stack_orchestrator/data/config/mainnet-eth-keycloak/scripts/keycloak-mirror/keycloak-mirror.py @@ -2,12 +2,11 @@ import argparse import os +import random import sys +from subprocess import Popen import psycopg -import random - -from subprocess import Popen from fabric import Connection @@ -27,27 +26,19 @@ def dump_src_db_to_file(db_host, db_port, db_user, db_password, db_name, file_na def establish_ssh_tunnel(ssh_host, ssh_port, ssh_user, db_host, db_port): local_port = random.randint(11000, 12000) conn = Connection(host=ssh_host, port=ssh_port, user=ssh_user) - fw = conn.forward_local( - local_port=local_port, remote_port=db_port, remote_host=db_host - ) + fw = conn.forward_local(local_port=local_port, remote_port=db_port, remote_host=db_host) return conn, fw, local_port def load_db_from_file(db_host, db_port, db_user, db_password, db_name, file_name): - connstr = "host=%s port=%s user=%s password=%s sslmode=disable dbname=%s" % ( - db_host, - db_port, - db_user, - db_password, - db_name, - ) + connstr = f"host={db_host} port={db_port} user={db_user} password={db_password} sslmode=disable dbname={db_name}" with psycopg.connect(connstr) as conn: with conn.cursor() as cur: print( f"Importing from {file_name} to {db_host}:{db_port}/{db_name}... ", end="", ) - cur.execute(open(file_name, "rt").read()) + cur.execute(open(file_name).read()) print("DONE") @@ -60,9 +51,7 @@ if __name__ == "__main__": parser.add_argument("--src-dbpw", help="DB password", required=True) parser.add_argument("--src-dbname", help="dbname", default="keycloak") - parser.add_argument( - "--dst-file", help="Destination filename", default="keycloak-mirror.sql" - ) + parser.add_argument("--dst-file", help="Destination filename", default="keycloak-mirror.sql") parser.add_argument("--live-import", help="run the import", action="store_true") diff --git a/stack_orchestrator/data/container-build/cerc-fixturenet-eth-genesis/genesis/accounts/mnemonic_to_csv.py b/stack_orchestrator/data/container-build/cerc-fixturenet-eth-genesis/genesis/accounts/mnemonic_to_csv.py index 4e74e1df..714c9a67 100644 --- a/stack_orchestrator/data/container-build/cerc-fixturenet-eth-genesis/genesis/accounts/mnemonic_to_csv.py +++ b/stack_orchestrator/data/container-build/cerc-fixturenet-eth-genesis/genesis/accounts/mnemonic_to_csv.py @@ -1,7 +1,8 @@ -from web3.auto import w3 -import ruamel.yaml as yaml import sys +import ruamel.yaml as yaml +from web3.auto import w3 + w3.eth.account.enable_unaudited_hdwallet_features() testnet_config_path = "genesis-config.yaml" @@ -11,8 +12,6 @@ if len(sys.argv) > 1: with open(testnet_config_path) as stream: data = yaml.safe_load(stream) -for key, value in data["el_premine"].items(): - acct = w3.eth.account.from_mnemonic( - data["mnemonic"], account_path=key, passphrase="" - ) - print("%s,%s,%s" % (key, acct.address, acct.key.hex())) +for key, _value in data["el_premine"].items(): + acct = w3.eth.account.from_mnemonic(data["mnemonic"], account_path=key, passphrase="") + print(f"{key},{acct.address},{acct.key.hex()}") diff --git a/stack_orchestrator/data/stacks/mainnet-blast/deploy/commands.py b/stack_orchestrator/data/stacks/mainnet-blast/deploy/commands.py index 6d3b32d4..41a51325 100644 --- a/stack_orchestrator/data/stacks/mainnet-blast/deploy/commands.py +++ b/stack_orchestrator/data/stacks/mainnet-blast/deploy/commands.py @@ -16,13 +16,14 @@ from pathlib import Path from shutil import copy + import yaml def create(context, extra_args): # Our goal here is just to copy the json files for blast yml_path = context.deployment_dir.joinpath("spec.yml") - with open(yml_path, "r") as file: + with open(yml_path) as file: data = yaml.safe_load(file) mount_point = data["volumes"]["blast-data"] diff --git a/stack_orchestrator/data/stacks/mainnet-eth/deploy/commands.py b/stack_orchestrator/data/stacks/mainnet-eth/deploy/commands.py index 545e16a1..332bf472 100644 --- a/stack_orchestrator/data/stacks/mainnet-eth/deploy/commands.py +++ b/stack_orchestrator/data/stacks/mainnet-eth/deploy/commands.py @@ -27,8 +27,6 @@ def setup(ctx): def create(ctx, extra_args): # Generate the JWT secret and save to its config file secret = token_hex(32) - jwt_file_path = ctx.deployment_dir.joinpath( - "data", "mainnet_eth_config_data", "jwtsecret" - ) + jwt_file_path = ctx.deployment_dir.joinpath("data", "mainnet_eth_config_data", "jwtsecret") with open(jwt_file_path, "w+") as jwt_file: jwt_file.write(secret) diff --git a/stack_orchestrator/data/stacks/mainnet-laconic/deploy/commands.py b/stack_orchestrator/data/stacks/mainnet-laconic/deploy/commands.py index 9364a9c8..118e2b9d 100644 --- a/stack_orchestrator/data/stacks/mainnet-laconic/deploy/commands.py +++ b/stack_orchestrator/data/stacks/mainnet-laconic/deploy/commands.py @@ -13,22 +13,23 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from stack_orchestrator.util import get_yaml +import os +import re +import sys +from enum import Enum +from pathlib import Path +from shutil import copyfile, copytree + +import tomli from stack_orchestrator.deploy.deploy_types import ( DeployCommandContext, LaconicStackSetupCommand, ) +from stack_orchestrator.deploy.deploy_util import VolumeMapping, run_container_command from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.deploy.stack_state import State -from stack_orchestrator.deploy.deploy_util import VolumeMapping, run_container_command from stack_orchestrator.opts import opts -from enum import Enum -from pathlib import Path -from shutil import copyfile, copytree -import os -import sys -import tomli -import re +from stack_orchestrator.util import get_yaml default_spec_file_content = "" @@ -80,9 +81,7 @@ def _copy_gentx_files(network_dir: Path, gentx_file_list: str): gentx_file_path = Path(gentx_file) copyfile( gentx_file_path, - os.path.join( - network_dir, "config", "gentx", os.path.basename(gentx_file_path) - ), + os.path.join(network_dir, "config", "gentx", os.path.basename(gentx_file_path)), ) @@ -91,7 +90,7 @@ def _remove_persistent_peers(network_dir: Path): if not config_file_path.exists(): print("Error: config.toml not found") sys.exit(1) - with open(config_file_path, "r") as input_file: + with open(config_file_path) as input_file: config_file_content = input_file.read() persistent_peers_pattern = '^persistent_peers = "(.+?)"' replace_with = 'persistent_peers = ""' @@ -110,7 +109,7 @@ def _insert_persistent_peers(config_dir: Path, new_persistent_peers: str): if not config_file_path.exists(): print("Error: config.toml not found") sys.exit(1) - with open(config_file_path, "r") as input_file: + with open(config_file_path) as input_file: config_file_content = input_file.read() persistent_peers_pattern = r'^persistent_peers = ""' replace_with = f'persistent_peers = "{new_persistent_peers}"' @@ -129,7 +128,7 @@ def _enable_cors(config_dir: Path): if not config_file_path.exists(): print("Error: config.toml not found") sys.exit(1) - with open(config_file_path, "r") as input_file: + with open(config_file_path) as input_file: config_file_content = input_file.read() cors_pattern = r"^cors_allowed_origins = \[]" replace_with = 'cors_allowed_origins = ["*"]' @@ -142,13 +141,11 @@ def _enable_cors(config_dir: Path): if not app_file_path.exists(): print("Error: app.toml not found") sys.exit(1) - with open(app_file_path, "r") as input_file: + with open(app_file_path) as input_file: app_file_content = input_file.read() cors_pattern = r"^enabled-unsafe-cors = false" replace_with = "enabled-unsafe-cors = true" - app_file_content = re.sub( - cors_pattern, replace_with, app_file_content, flags=re.MULTILINE - ) + app_file_content = re.sub(cors_pattern, replace_with, app_file_content, flags=re.MULTILINE) with open(app_file_path, "w") as output_file: output_file.write(app_file_content) @@ -158,7 +155,7 @@ def _set_listen_address(config_dir: Path): if not config_file_path.exists(): print("Error: config.toml not found") sys.exit(1) - with open(config_file_path, "r") as input_file: + with open(config_file_path) as input_file: config_file_content = input_file.read() existing_pattern = r'^laddr = "tcp://127.0.0.1:26657"' replace_with = 'laddr = "tcp://0.0.0.0:26657"' @@ -172,7 +169,7 @@ def _set_listen_address(config_dir: Path): if not app_file_path.exists(): print("Error: app.toml not found") sys.exit(1) - with open(app_file_path, "r") as input_file: + with open(app_file_path) as input_file: app_file_content = input_file.read() existing_pattern1 = r'^address = "tcp://localhost:1317"' replace_with1 = 'address = "tcp://0.0.0.0:1317"' @@ -192,10 +189,7 @@ def _phase_from_params(parameters): phase = SetupPhase.ILLEGAL if parameters.initialize_network: if parameters.join_network or parameters.create_network: - print( - "Can't supply --join-network or --create-network " - "with --initialize-network" - ) + print("Can't supply --join-network or --create-network " "with --initialize-network") sys.exit(1) if not parameters.chain_id: print("--chain-id is required") @@ -207,26 +201,17 @@ def _phase_from_params(parameters): phase = SetupPhase.INITIALIZE elif parameters.join_network: if parameters.initialize_network or parameters.create_network: - print( - "Can't supply --initialize-network or --create-network " - "with --join-network" - ) + print("Can't supply --initialize-network or --create-network " "with --join-network") sys.exit(1) phase = SetupPhase.JOIN elif parameters.create_network: if parameters.initialize_network or parameters.join_network: - print( - "Can't supply --initialize-network or --join-network " - "with --create-network" - ) + print("Can't supply --initialize-network or --join-network " "with --create-network") sys.exit(1) phase = SetupPhase.CREATE elif parameters.connect_network: if parameters.initialize_network or parameters.join_network: - print( - "Can't supply --initialize-network or --join-network " - "with --connect-network" - ) + print("Can't supply --initialize-network or --join-network " "with --connect-network") sys.exit(1) phase = SetupPhase.CONNECT return phase @@ -341,8 +326,7 @@ def setup( output3, status3 = run_container_command( command_context, "laconicd", - f"laconicd cometbft show-validator " - f"--home {laconicd_home_path_in_container}", + f"laconicd cometbft show-validator " f"--home {laconicd_home_path_in_container}", mounts, ) print(f"Node validator address: {output3}") @@ -361,23 +345,16 @@ def setup( # Copy it into our network dir genesis_file_path = Path(parameters.genesis_file) if not os.path.exists(genesis_file_path): - print( - f"Error: supplied genesis file: {parameters.genesis_file} " - "does not exist." - ) + print(f"Error: supplied genesis file: {parameters.genesis_file} " "does not exist.") sys.exit(1) copyfile( genesis_file_path, - os.path.join( - network_dir, "config", os.path.basename(genesis_file_path) - ), + os.path.join(network_dir, "config", os.path.basename(genesis_file_path)), ) else: # We're generating the genesis file # First look in the supplied gentx files for the other nodes' keys - other_node_keys = _get_node_keys_from_gentx_files( - parameters.gentx_address_list - ) + other_node_keys = _get_node_keys_from_gentx_files(parameters.gentx_address_list) # Add those keys to our genesis, with balances we determine here (why?) outputk = None for other_node_key in other_node_keys: @@ -398,8 +375,7 @@ def setup( output1, status1 = run_container_command( command_context, "laconicd", - f"laconicd genesis collect-gentxs " - f"--home {laconicd_home_path_in_container}", + f"laconicd genesis collect-gentxs " f"--home {laconicd_home_path_in_container}", mounts, ) if options.debug: @@ -416,8 +392,7 @@ def setup( output2, status1 = run_container_command( command_context, "laconicd", - f"laconicd genesis validate-genesis " - f"--home {laconicd_home_path_in_container}", + f"laconicd genesis validate-genesis " f"--home {laconicd_home_path_in_container}", mounts, ) print(f"validate-genesis result: {output2}") @@ -452,9 +427,7 @@ def create(deployment_context: DeploymentContext, extra_args): sys.exit(1) # Copy the network directory contents into our deployment # TODO: change this to work with non local paths - deployment_config_dir = deployment_context.deployment_dir.joinpath( - "data", "laconicd-config" - ) + deployment_config_dir = deployment_context.deployment_dir.joinpath("data", "laconicd-config") copytree(config_dir_path, deployment_config_dir, dirs_exist_ok=True) # If supplied, add the initial persistent peers to the config file if extra_args[1]: @@ -465,9 +438,7 @@ def create(deployment_context: DeploymentContext, extra_args): _set_listen_address(deployment_config_dir) # Copy the data directory contents into our deployment # TODO: change this to work with non local paths - deployment_data_dir = deployment_context.deployment_dir.joinpath( - "data", "laconicd-data" - ) + deployment_data_dir = deployment_context.deployment_dir.joinpath("data", "laconicd-data") copytree(data_dir_path, deployment_data_dir, dirs_exist_ok=True) diff --git a/stack_orchestrator/data/stacks/test/deploy/commands.py b/stack_orchestrator/data/stacks/test/deploy/commands.py index 356338af..7363695c 100644 --- a/stack_orchestrator/data/stacks/test/deploy/commands.py +++ b/stack_orchestrator/data/stacks/test/deploy/commands.py @@ -13,12 +13,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from stack_orchestrator.util import get_yaml +from pathlib import Path + from stack_orchestrator.deploy.deploy_types import DeployCommandContext +from stack_orchestrator.deploy.deploy_util import VolumeMapping, run_container_command from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.deploy.stack_state import State -from stack_orchestrator.deploy.deploy_util import VolumeMapping, run_container_command -from pathlib import Path +from stack_orchestrator.util import get_yaml default_spec_file_content = """config: test-variable-1: test-value-1 diff --git a/stack_orchestrator/deploy/compose/deploy_docker.py b/stack_orchestrator/deploy/compose/deploy_docker.py index fa0ac1d4..b74eef6a 100644 --- a/stack_orchestrator/deploy/compose/deploy_docker.py +++ b/stack_orchestrator/deploy/compose/deploy_docker.py @@ -14,12 +14,13 @@ # along with this program. If not, see . from pathlib import Path -from typing import Optional + from python_on_whales import DockerClient, DockerException + from stack_orchestrator.deploy.deployer import ( Deployer, - DeployerException, DeployerConfigGenerator, + DeployerException, ) from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.opts import opts @@ -32,10 +33,10 @@ class DockerDeployer(Deployer): def __init__( self, type: str, - deployment_context: Optional[DeploymentContext], + deployment_context: DeploymentContext | None, compose_files: list, - compose_project_name: Optional[str], - compose_env_file: Optional[str], + compose_project_name: str | None, + compose_env_file: str | None, ) -> None: self.docker = DockerClient( compose_files=compose_files, @@ -53,21 +54,21 @@ class DockerDeployer(Deployer): try: return self.docker.compose.up(detach=detach, services=services) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def down(self, timeout, volumes, skip_cluster_management): if not opts.o.dry_run: try: return self.docker.compose.down(timeout=timeout, volumes=volumes) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def update_envs(self): if not opts.o.dry_run: try: return self.docker.compose.restart() except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def status(self): if not opts.o.dry_run: @@ -75,23 +76,21 @@ class DockerDeployer(Deployer): for p in self.docker.compose.ps(): print(f"{p.name}\t{p.state.status}") except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def ps(self): if not opts.o.dry_run: try: return self.docker.compose.ps() except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def port(self, service, private_port): if not opts.o.dry_run: try: - return self.docker.compose.port( - service=service, private_port=private_port - ) + return self.docker.compose.port(service=service, private_port=private_port) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def execute(self, service, command, tty, envs): if not opts.o.dry_run: @@ -100,7 +99,7 @@ class DockerDeployer(Deployer): service=service, command=command, tty=tty, envs=envs ) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def logs(self, services, tail, follow, stream): if not opts.o.dry_run: @@ -109,7 +108,7 @@ class DockerDeployer(Deployer): services=services, tail=tail, follow=follow, stream=stream ) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e def run( self, @@ -118,10 +117,14 @@ class DockerDeployer(Deployer): user=None, volumes=None, entrypoint=None, - env={}, - ports=[], + env=None, + ports=None, detach=False, ): + if ports is None: + ports = [] + if env is None: + env = {} if not opts.o.dry_run: try: return self.docker.run( @@ -136,9 +139,9 @@ class DockerDeployer(Deployer): publish_all=len(ports) == 0, ) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e - def run_job(self, job_name: str, release_name: Optional[str] = None): + def run_job(self, job_name: str, release_name: str | None = None): # release_name is ignored for Docker deployments (only used for K8s/Helm) if not opts.o.dry_run: try: @@ -155,9 +158,7 @@ class DockerDeployer(Deployer): ) if not job_compose_file.exists(): - raise DeployerException( - f"Job compose file not found: {job_compose_file}" - ) + raise DeployerException(f"Job compose file not found: {job_compose_file}") if opts.o.verbose: print(f"Running job from: {job_compose_file}") @@ -175,7 +176,7 @@ class DockerDeployer(Deployer): return job_docker.compose.run(service=job_name, remove=True, tty=True) except DockerException as e: - raise DeployerException(e) + raise DeployerException(e) from e class DockerDeployerConfigGenerator(DeployerConfigGenerator): diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py index 6e914b92..362330ea 100644 --- a/stack_orchestrator/deploy/deploy.py +++ b/stack_orchestrator/deploy/deploy.py @@ -15,36 +15,37 @@ # Deploys the system components using a deployer (either docker-compose or k8s) -import hashlib import copy +import hashlib import os +import subprocess import sys from dataclasses import dataclass from importlib import resources -from typing import Optional -import subprocess -import click from pathlib import Path + +import click + from stack_orchestrator import constants -from stack_orchestrator.opts import opts -from stack_orchestrator.util import ( - get_stack_path, - include_exclude_check, - get_parsed_stack_config, - global_options2, - get_dev_root_path, - stack_is_in_deployment, - resolve_compose_file, -) -from stack_orchestrator.deploy.deployer import DeployerException -from stack_orchestrator.deploy.deployer_factory import getDeployer from stack_orchestrator.deploy.compose.deploy_docker import DockerDeployer from stack_orchestrator.deploy.deploy_types import ClusterContext, DeployCommandContext +from stack_orchestrator.deploy.deployer import DeployerException +from stack_orchestrator.deploy.deployer_factory import getDeployer from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.deploy.deployment_create import create as deployment_create from stack_orchestrator.deploy.deployment_create import init as deployment_init from stack_orchestrator.deploy.deployment_create import setup as deployment_setup from stack_orchestrator.deploy.k8s import k8s_command +from stack_orchestrator.opts import opts +from stack_orchestrator.util import ( + get_dev_root_path, + get_parsed_stack_config, + get_stack_path, + global_options2, + include_exclude_check, + resolve_compose_file, + stack_is_in_deployment, +) @click.group() @@ -52,9 +53,7 @@ from stack_orchestrator.deploy.k8s import k8s_command @click.option("--exclude", help="don't start these components") @click.option("--env-file", help="env file to be used") @click.option("--cluster", help="specify a non-default cluster name") -@click.option( - "--deploy-to", help="cluster system to deploy to (compose or k8s or k8s-kind)" -) +@click.option("--deploy-to", help="cluster system to deploy to (compose or k8s or k8s-kind)") @click.pass_context def command(ctx, include, exclude, env_file, cluster, deploy_to): """deploy a stack""" @@ -93,7 +92,7 @@ def command(ctx, include, exclude, env_file, cluster, deploy_to): def create_deploy_context( global_context, - deployment_context: Optional[DeploymentContext], + deployment_context: DeploymentContext | None, stack, include, exclude, @@ -116,9 +115,7 @@ def create_deploy_context( # For helm chart deployments, skip compose file loading if is_helm_chart_deployment: - cluster_context = ClusterContext( - global_context, cluster, [], [], [], None, env_file - ) + cluster_context = ClusterContext(global_context, cluster, [], [], [], None, env_file) else: cluster_context = _make_cluster_context( global_context, stack, include, exclude, cluster, env_file @@ -134,9 +131,7 @@ def create_deploy_context( return DeployCommandContext(stack, cluster_context, deployer) -def up_operation( - ctx, services_list, stay_attached=False, skip_cluster_management=False -): +def up_operation(ctx, services_list, stay_attached=False, skip_cluster_management=False): global_context = ctx.parent.parent.obj deploy_context = ctx.obj cluster_context = deploy_context.cluster_context @@ -209,8 +204,7 @@ def ps_operation(ctx): print(f"{port_mapping}", end="") else: print( - f"{mapping[0]['HostIp']}:{mapping[0]['HostPort']}" - f"->{port_mapping}", + f"{mapping[0]['HostIp']}:{mapping[0]['HostPort']}" f"->{port_mapping}", end="", ) comma = ", " @@ -260,11 +254,11 @@ def logs_operation(ctx, tail: int, follow: bool, extra_args: str): logs_stream = ctx.obj.deployer.logs( services=services_list, tail=tail, follow=follow, stream=True ) - for stream_type, stream_content in logs_stream: + for _stream_type, stream_content in logs_stream: print(stream_content.decode("utf-8"), end="") -def run_job_operation(ctx, job_name: str, helm_release: Optional[str] = None): +def run_job_operation(ctx, job_name: str, helm_release: str | None = None): global_context = ctx.parent.parent.obj if not global_context.dry_run: print(f"Running job: {job_name}") @@ -284,9 +278,7 @@ def up(ctx, extra_args): @command.command() -@click.option( - "--delete-volumes/--preserve-volumes", default=False, help="delete data volumes" -) +@click.option("--delete-volumes/--preserve-volumes", default=False, help="delete data volumes") @click.argument("extra_args", nargs=-1) # help: command: down @click.pass_context def down(ctx, delete_volumes, extra_args): @@ -386,14 +378,10 @@ def _make_cluster_context(ctx, stack, include, exclude, cluster, env_file): else: # See: # https://stackoverflow.com/questions/25389095/python-get-path-of-root-project-structure - compose_dir = ( - Path(__file__).absolute().parent.parent.joinpath("data", "compose") - ) + compose_dir = Path(__file__).absolute().parent.parent.joinpath("data", "compose") if cluster is None: - cluster = _make_default_cluster_name( - deployment, compose_dir, stack, include, exclude - ) + cluster = _make_default_cluster_name(deployment, compose_dir, stack, include, exclude) else: _make_default_cluster_name(deployment, compose_dir, stack, include, exclude) @@ -410,9 +398,7 @@ def _make_cluster_context(ctx, stack, include, exclude, cluster, env_file): if stack_config is not None: # TODO: syntax check the input here pods_in_scope = stack_config["pods"] - cluster_config = ( - stack_config["config"] if "config" in stack_config else None - ) + cluster_config = stack_config["config"] if "config" in stack_config else None else: pods_in_scope = all_pods @@ -434,43 +420,29 @@ def _make_cluster_context(ctx, stack, include, exclude, cluster, env_file): if include_exclude_check(pod_name, include, exclude): if pod_repository is None or pod_repository == "internal": if deployment: - compose_file_name = os.path.join( - compose_dir, f"docker-compose-{pod_path}.yml" - ) + compose_file_name = os.path.join(compose_dir, f"docker-compose-{pod_path}.yml") else: compose_file_name = resolve_compose_file(stack, pod_name) else: if deployment: - compose_file_name = os.path.join( - compose_dir, f"docker-compose-{pod_name}.yml" - ) + compose_file_name = os.path.join(compose_dir, f"docker-compose-{pod_name}.yml") pod_pre_start_command = pod.get("pre_start_command") pod_post_start_command = pod.get("post_start_command") - script_dir = compose_dir.parent.joinpath( - "pods", pod_name, "scripts" - ) + script_dir = compose_dir.parent.joinpath("pods", pod_name, "scripts") if pod_pre_start_command is not None: - pre_start_commands.append( - os.path.join(script_dir, pod_pre_start_command) - ) + pre_start_commands.append(os.path.join(script_dir, pod_pre_start_command)) if pod_post_start_command is not None: - post_start_commands.append( - os.path.join(script_dir, pod_post_start_command) - ) + post_start_commands.append(os.path.join(script_dir, pod_post_start_command)) else: # TODO: fix this code for external stack with scripts pod_root_dir = os.path.join( dev_root_path, pod_repository.split("/")[-1], pod["path"] ) - compose_file_name = os.path.join( - pod_root_dir, f"docker-compose-{pod_name}.yml" - ) + compose_file_name = os.path.join(pod_root_dir, f"docker-compose-{pod_name}.yml") pod_pre_start_command = pod.get("pre_start_command") pod_post_start_command = pod.get("post_start_command") if pod_pre_start_command is not None: - pre_start_commands.append( - os.path.join(pod_root_dir, pod_pre_start_command) - ) + pre_start_commands.append(os.path.join(pod_root_dir, pod_pre_start_command)) if pod_post_start_command is not None: post_start_commands.append( os.path.join(pod_root_dir, pod_post_start_command) @@ -514,9 +486,7 @@ def _run_command(ctx, cluster_name, command): command_env["CERC_SO_COMPOSE_PROJECT"] = cluster_name if ctx.debug: command_env["CERC_SCRIPT_DEBUG"] = "true" - command_result = subprocess.run( - command_file, shell=True, env=command_env, cwd=command_dir - ) + command_result = subprocess.run(command_file, shell=True, env=command_env, cwd=command_dir) if command_result.returncode != 0: print(f"FATAL Error running command: {command}") sys.exit(1) @@ -573,9 +543,7 @@ def _orchestrate_cluster_config(ctx, cluster_config, deployer, container_exec_en # "It returned with code 1" if "It returned with code 1" in str(error): if ctx.verbose: - print( - "Config export script returned an error, re-trying" - ) + print("Config export script returned an error, re-trying") # If the script failed to execute # (e.g. the file is not there) then we get: # "It returned with code 2" diff --git a/stack_orchestrator/deploy/deploy_types.py b/stack_orchestrator/deploy/deploy_types.py index 202e0fa5..8151242c 100644 --- a/stack_orchestrator/deploy/deploy_types.py +++ b/stack_orchestrator/deploy/deploy_types.py @@ -13,8 +13,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import List, Mapping, Optional +from collections.abc import Mapping from dataclasses import dataclass + from stack_orchestrator.command_types import CommandOptions from stack_orchestrator.deploy.deployer import Deployer @@ -23,19 +24,19 @@ from stack_orchestrator.deploy.deployer import Deployer class ClusterContext: # TODO: this should be in its own object not stuffed in here options: CommandOptions - cluster: Optional[str] - compose_files: List[str] - pre_start_commands: List[str] - post_start_commands: List[str] - config: Optional[str] - env_file: Optional[str] + cluster: str | None + compose_files: list[str] + pre_start_commands: list[str] + post_start_commands: list[str] + config: str | None + env_file: str | None @dataclass class DeployCommandContext: stack: str cluster_context: ClusterContext - deployer: Optional[Deployer] + deployer: Deployer | None @dataclass diff --git a/stack_orchestrator/deploy/deploy_util.py b/stack_orchestrator/deploy/deploy_util.py index 65111653..8d73f671 100644 --- a/stack_orchestrator/deploy/deploy_util.py +++ b/stack_orchestrator/deploy/deploy_util.py @@ -13,15 +13,16 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import List, Any +from typing import Any + from stack_orchestrator.deploy.deploy_types import DeployCommandContext, VolumeMapping +from stack_orchestrator.opts import opts from stack_orchestrator.util import ( get_parsed_stack_config, - get_yaml, get_pod_list, + get_yaml, resolve_compose_file, ) -from stack_orchestrator.opts import opts def _container_image_from_service(stack: str, service: str): @@ -32,7 +33,7 @@ def _container_image_from_service(stack: str, service: str): yaml = get_yaml() for pod in pods: pod_file_path = resolve_compose_file(stack, pod) - parsed_pod_file = yaml.load(open(pod_file_path, "r")) + parsed_pod_file = yaml.load(open(pod_file_path)) if "services" in parsed_pod_file: services = parsed_pod_file["services"] if service in services: @@ -45,7 +46,7 @@ def _container_image_from_service(stack: str, service: str): def parsed_pod_files_map_from_file_names(pod_files): parsed_pod_yaml_map: Any = {} for pod_file in pod_files: - with open(pod_file, "r") as pod_file_descriptor: + with open(pod_file) as pod_file_descriptor: parsed_pod_file = get_yaml().load(pod_file_descriptor) parsed_pod_yaml_map[pod_file] = parsed_pod_file if opts.o.debug: @@ -53,7 +54,7 @@ def parsed_pod_files_map_from_file_names(pod_files): return parsed_pod_yaml_map -def images_for_deployment(pod_files: List[str]): +def images_for_deployment(pod_files: list[str]): image_set = set() parsed_pod_yaml_map = parsed_pod_files_map_from_file_names(pod_files) # Find the set of images in the pods @@ -69,7 +70,7 @@ def images_for_deployment(pod_files: List[str]): return image_set -def _volumes_to_docker(mounts: List[VolumeMapping]): +def _volumes_to_docker(mounts: list[VolumeMapping]): # Example from doc: [("/", "/host"), ("/etc/hosts", "/etc/hosts", "rw")] result = [] for mount in mounts: @@ -79,7 +80,7 @@ def _volumes_to_docker(mounts: List[VolumeMapping]): def run_container_command( - ctx: DeployCommandContext, service: str, command: str, mounts: List[VolumeMapping] + ctx: DeployCommandContext, service: str, command: str, mounts: list[VolumeMapping] ): deployer = ctx.deployer if deployer is None: diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py index b950e29b..2f81dde3 100644 --- a/stack_orchestrator/deploy/deployer.py +++ b/stack_orchestrator/deploy/deployer.py @@ -15,7 +15,6 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Optional class Deployer(ABC): @@ -59,14 +58,14 @@ class Deployer(ABC): user=None, volumes=None, entrypoint=None, - env={}, - ports=[], + env=None, + ports=None, detach=False, ): pass @abstractmethod - def run_job(self, job_name: str, release_name: Optional[str] = None): + def run_job(self, job_name: str, release_name: str | None = None): pass def prepare(self, skip_cluster_management): @@ -74,9 +73,7 @@ class Deployer(ABC): Only supported for k8s deployers. Compose deployers raise an error. """ - raise DeployerException( - "prepare is only supported for k8s deployments" - ) + raise DeployerException("prepare is only supported for k8s deployments") class DeployerException(Exception): diff --git a/stack_orchestrator/deploy/deployer_factory.py b/stack_orchestrator/deploy/deployer_factory.py index 1de14cc5..e925199a 100644 --- a/stack_orchestrator/deploy/deployer_factory.py +++ b/stack_orchestrator/deploy/deployer_factory.py @@ -14,14 +14,14 @@ # along with this program. If not, see . from stack_orchestrator import constants -from stack_orchestrator.deploy.k8s.deploy_k8s import ( - K8sDeployer, - K8sDeployerConfigGenerator, -) from stack_orchestrator.deploy.compose.deploy_docker import ( DockerDeployer, DockerDeployerConfigGenerator, ) +from stack_orchestrator.deploy.k8s.deploy_k8s import ( + K8sDeployer, + K8sDeployerConfigGenerator, +) def getDeployerConfigGenerator(type: str, deployment_context): @@ -44,10 +44,7 @@ def getDeployer( compose_project_name, compose_env_file, ) - elif ( - type == type == constants.k8s_deploy_type - or type == constants.k8s_kind_deploy_type - ): + elif type == type == constants.k8s_deploy_type or type == constants.k8s_kind_deploy_type: return K8sDeployer( type, deployment_context, diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index a8f2f88a..768720fa 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -13,29 +13,28 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import click -from pathlib import Path import subprocess import sys +from pathlib import Path + +import click from stack_orchestrator import constants -from stack_orchestrator.deploy.images import push_images_operation from stack_orchestrator.deploy.deploy import ( - up_operation, + create_deploy_context, down_operation, - prepare_operation, - ps_operation, - port_operation, - status_operation, -) -from stack_orchestrator.deploy.deploy import ( exec_operation, logs_operation, - create_deploy_context, + port_operation, + prepare_operation, + ps_operation, + status_operation, + up_operation, update_envs_operation, ) from stack_orchestrator.deploy.deploy_types import DeployCommandContext from stack_orchestrator.deploy.deployment_context import DeploymentContext +from stack_orchestrator.deploy.images import push_images_operation @click.group() @@ -149,9 +148,7 @@ def prepare(ctx, skip_cluster_management): # TODO: remove legacy up command since it's an alias for stop @command.command() -@click.option( - "--delete-volumes/--preserve-volumes", default=False, help="delete data volumes" -) +@click.option("--delete-volumes/--preserve-volumes", default=False, help="delete data volumes") @click.option( "--skip-cluster-management/--perform-cluster-management", default=True, @@ -168,9 +165,7 @@ def down(ctx, delete_volumes, skip_cluster_management, extra_args): # stop is the preferred alias for down @command.command() -@click.option( - "--delete-volumes/--preserve-volumes", default=False, help="delete data volumes" -) +@click.option("--delete-volumes/--preserve-volumes", default=False, help="delete data volumes") @click.option( "--skip-cluster-management/--perform-cluster-management", default=True, @@ -256,9 +251,7 @@ def run_job(ctx, job_name, helm_release): @command.command() @click.option("--stack-path", help="Path to stack git repo (overrides stored path)") -@click.option( - "--spec-file", help="Path to GitOps spec.yml in repo (e.g., deployment/spec.yml)" -) +@click.option("--spec-file", help="Path to GitOps spec.yml in repo (e.g., deployment/spec.yml)") @click.option("--config-file", help="Config file to pass to deploy init") @click.option( "--force", @@ -292,33 +285,27 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip): commands.py on each restart. Use 'deploy init' only for initial spec generation, then customize and commit to your operator repo. """ - from stack_orchestrator.util import get_yaml, get_parsed_deployment_spec from stack_orchestrator.deploy.deployment_create import create_operation from stack_orchestrator.deploy.dns_probe import verify_dns_via_probe + from stack_orchestrator.util import get_parsed_deployment_spec, get_yaml deployment_context: DeploymentContext = ctx.obj # Get current spec info (before git pull) current_spec = deployment_context.spec current_http_proxy = current_spec.get_http_proxy() - current_hostname = ( - current_http_proxy[0]["host-name"] if current_http_proxy else None - ) + current_hostname = current_http_proxy[0]["host-name"] if current_http_proxy else None # Resolve stack source path if stack_path: stack_source = Path(stack_path).resolve() else: # Try to get from deployment.yml - deployment_file = ( - deployment_context.deployment_dir / constants.deployment_file_name - ) + deployment_file = deployment_context.deployment_dir / constants.deployment_file_name deployment_data = get_yaml().load(open(deployment_file)) stack_source_str = deployment_data.get("stack-source") if not stack_source_str: - print( - "Error: No stack-source in deployment.yml and --stack-path not provided" - ) + print("Error: No stack-source in deployment.yml and --stack-path not provided") print("Use --stack-path to specify the stack git repository location") sys.exit(1) stack_source = Path(stack_source_str) @@ -334,9 +321,7 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip): # Step 1: Git pull (brings in updated spec.yml from operator's repo) print("\n[1/4] Pulling latest code from stack repository...") - git_result = subprocess.run( - ["git", "pull"], cwd=stack_source, capture_output=True, text=True - ) + git_result = subprocess.run(["git", "pull"], cwd=stack_source, capture_output=True, text=True) if git_result.returncode != 0: print(f"Git pull failed: {git_result.stderr}") sys.exit(1) @@ -408,17 +393,13 @@ def restart(ctx, stack_path, spec_file, config_file, force, expected_ip): # Stop deployment print("\n[4/4] Restarting deployment...") ctx.obj = make_deploy_context(ctx) - down_operation( - ctx, delete_volumes=False, extra_args_list=[], skip_cluster_management=True - ) + down_operation(ctx, delete_volumes=False, extra_args_list=[], skip_cluster_management=True) # Namespace deletion wait is handled by _ensure_namespace() in # the deployer — no fixed sleep needed here. # Start deployment - up_operation( - ctx, services_list=None, stay_attached=False, skip_cluster_management=True - ) + up_operation(ctx, services_list=None, stay_attached=False, skip_cluster_management=True) print("\n=== Restart Complete ===") print("Deployment restarted with git-tracked configuration.") diff --git a/stack_orchestrator/deploy/deployment_context.py b/stack_orchestrator/deploy/deployment_context.py index 79fc4bb9..2a8c2f05 100644 --- a/stack_orchestrator/deploy/deployment_context.py +++ b/stack_orchestrator/deploy/deployment_context.py @@ -18,9 +18,9 @@ import os from pathlib import Path from stack_orchestrator import constants -from stack_orchestrator.util import get_yaml -from stack_orchestrator.deploy.stack import Stack from stack_orchestrator.deploy.spec import Spec +from stack_orchestrator.deploy.stack import Stack +from stack_orchestrator.util import get_yaml class DeploymentContext: @@ -58,7 +58,7 @@ class DeploymentContext: self.stack.init_from_file(self.get_stack_file()) deployment_file_path = self.get_deployment_file() if deployment_file_path.exists(): - obj = get_yaml().load(open(deployment_file_path, "r")) + obj = get_yaml().load(open(deployment_file_path)) self.id = obj[constants.cluster_id_key] # Handle the case of a legacy deployment with no file # Code below is intended to match the output from _make_default_cluster_name() @@ -75,7 +75,7 @@ class DeploymentContext: raise ValueError(f"File is not inside deployment directory: {file_path}") yaml = get_yaml() - with open(file_path, "r") as f: + with open(file_path) as f: yaml_data = yaml.load(f) modifier_func(yaml_data) diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py index 511445be..8fee65dd 100644 --- a/stack_orchestrator/deploy/deployment_create.py +++ b/stack_orchestrator/deploy/deployment_create.py @@ -13,44 +13,44 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import click -from importlib import util +import base64 +import filecmp import json import os -import re -import base64 -from pathlib import Path -from typing import List, Optional import random -from shutil import copy, copyfile, copytree, rmtree -from secrets import token_hex +import re import sys -import filecmp import tempfile +from importlib import util +from pathlib import Path +from secrets import token_hex +from shutil import copy, copyfile, copytree, rmtree + +import click from stack_orchestrator import constants -from stack_orchestrator.opts import opts -from stack_orchestrator.util import ( - get_stack_path, - get_parsed_deployment_spec, - get_parsed_stack_config, - global_options, - get_yaml, - get_pod_list, - get_pod_file_path, - pod_has_scripts, - get_pod_script_paths, - get_plugin_code_paths, - error_exit, - env_var_map_from_file, - resolve_config_dir, - get_job_list, - get_job_file_path, -) -from stack_orchestrator.deploy.spec import Spec from stack_orchestrator.deploy.deploy_types import LaconicStackSetupCommand from stack_orchestrator.deploy.deployer_factory import getDeployerConfigGenerator from stack_orchestrator.deploy.deployment_context import DeploymentContext +from stack_orchestrator.deploy.spec import Spec +from stack_orchestrator.opts import opts +from stack_orchestrator.util import ( + env_var_map_from_file, + error_exit, + get_job_file_path, + get_job_list, + get_parsed_deployment_spec, + get_parsed_stack_config, + get_plugin_code_paths, + get_pod_file_path, + get_pod_list, + get_pod_script_paths, + get_stack_path, + get_yaml, + global_options, + pod_has_scripts, + resolve_config_dir, +) def _make_default_deployment_dir(): @@ -66,7 +66,7 @@ def _get_ports(stack): pod_file_path = get_pod_file_path(stack, parsed_stack, pod) if pod_file_path is None: continue - parsed_pod_file = yaml.load(open(pod_file_path, "r")) + parsed_pod_file = yaml.load(open(pod_file_path)) if "services" in parsed_pod_file: for svc_name, svc in parsed_pod_file["services"].items(): if "ports" in svc: @@ -102,7 +102,7 @@ def _get_named_volumes(stack): pod_file_path = get_pod_file_path(stack, parsed_stack, pod) if pod_file_path is None: continue - parsed_pod_file = yaml.load(open(pod_file_path, "r")) + parsed_pod_file = yaml.load(open(pod_file_path)) if "volumes" in parsed_pod_file: volumes = parsed_pod_file["volumes"] for volume in volumes.keys(): @@ -132,9 +132,7 @@ def _create_bind_dir_if_relative(volume, path_string, compose_dir): absolute_path.mkdir(parents=True, exist_ok=True) else: if not path.exists(): - print( - f"WARNING: mount path for volume {volume} does not exist: {path_string}" - ) + print(f"WARNING: mount path for volume {volume} does not exist: {path_string}") # See: @@ -151,9 +149,7 @@ def _fixup_pod_file(pod, spec, compose_dir): volume_spec = spec_volumes[volume] if volume_spec: volume_spec_fixedup = ( - volume_spec - if Path(volume_spec).is_absolute() - else f".{volume_spec}" + volume_spec if Path(volume_spec).is_absolute() else f".{volume_spec}" ) _create_bind_dir_if_relative(volume, volume_spec, compose_dir) # this is Docker specific @@ -328,10 +324,7 @@ def _get_mapped_ports(stack: str, map_recipe: str): else: print("Error: bad map_recipe") else: - print( - f"Error: --map-ports-to-host must specify one of: " - f"{port_map_recipes}" - ) + print(f"Error: --map-ports-to-host must specify one of: " f"{port_map_recipes}") sys.exit(1) return ports @@ -356,9 +349,7 @@ def _parse_config_variables(variable_values: str): @click.command() @click.option("--config", help="Provide config variables for the deployment") -@click.option( - "--config-file", help="Provide config variables in a file for the deployment" -) +@click.option("--config-file", help="Provide config variables in a file for the deployment") @click.option("--kube-config", help="Provide a config file for a k8s deployment") @click.option( "--image-registry", @@ -372,9 +363,7 @@ def _parse_config_variables(variable_values: str): "localhost-same, any-same, localhost-fixed-random, any-fixed-random", ) @click.pass_context -def init( - ctx, config, config_file, kube_config, image_registry, output, map_ports_to_host -): +def init(ctx, config, config_file, kube_config, image_registry, output, map_ports_to_host): stack = global_options(ctx).stack deployer_type = ctx.obj.deployer.type deploy_command_context = ctx.obj @@ -421,13 +410,9 @@ def init_operation( else: # Check for --kube-config supplied for non-relevant deployer types if kube_config is not None: - error_exit( - f"--kube-config is not allowed with a {deployer_type} deployment" - ) + error_exit(f"--kube-config is not allowed with a {deployer_type} deployment") if image_registry is not None: - error_exit( - f"--image-registry is not allowed with a {deployer_type} deployment" - ) + error_exit(f"--image-registry is not allowed with a {deployer_type} deployment") if default_spec_file_content: spec_file_content.update(default_spec_file_content) config_variables = _parse_config_variables(config) @@ -479,9 +464,7 @@ def init_operation( spec_file_content["configmaps"] = configmap_descriptors if opts.o.debug: - print( - f"Creating spec file for stack: {stack} with content: {spec_file_content}" - ) + print(f"Creating spec file for stack: {stack} with content: {spec_file_content}") with open(output, "w") as output_file: get_yaml().dump(spec_file_content, output_file) @@ -497,7 +480,8 @@ def _generate_and_store_secrets(config_vars: dict, deployment_name: str): Called by `deploy create` - generates fresh secrets and stores them. Returns the generated secrets dict for reference. """ - from kubernetes import client, config as k8s_config + from kubernetes import client + from kubernetes import config as k8s_config secrets = {} for name, value in config_vars.items(): @@ -526,9 +510,7 @@ def _generate_and_store_secrets(config_vars: dict, deployment_name: str): try: k8s_config.load_incluster_config() except Exception: - print( - "Warning: Could not load kube config, secrets will not be stored in K8s" - ) + print("Warning: Could not load kube config, secrets will not be stored in K8s") return secrets v1 = client.CoreV1Api() @@ -555,7 +537,7 @@ def _generate_and_store_secrets(config_vars: dict, deployment_name: str): return secrets -def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: +def create_registry_secret(spec: Spec, deployment_name: str) -> str | None: """Create K8s docker-registry secret from spec + environment. Reads registry configuration from spec.yml and creates a Kubernetes @@ -568,7 +550,8 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: Returns: The secret name if created, None if no registry config """ - from kubernetes import client, config as k8s_config + from kubernetes import client + from kubernetes import config as k8s_config registry_config = spec.get_image_registry_config() if not registry_config: @@ -585,17 +568,12 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: assert token_env is not None token = os.environ.get(token_env) if not token: - print( - f"Warning: Registry token env var '{token_env}' not set, " - "skipping registry secret" - ) + print(f"Warning: Registry token env var '{token_env}' not set, " "skipping registry secret") return None # Create dockerconfigjson format (Docker API uses "password" field for tokens) auth = base64.b64encode(f"{username}:{token}".encode()).decode() - docker_config = { - "auths": {server: {"username": username, "password": token, "auth": auth}} - } + docker_config = {"auths": {server: {"username": username, "password": token, "auth": auth}}} # Secret name derived from deployment name secret_name = f"{deployment_name}-registry" @@ -615,11 +593,7 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: k8s_secret = client.V1Secret( metadata=client.V1ObjectMeta(name=secret_name), - data={ - ".dockerconfigjson": base64.b64encode( - json.dumps(docker_config).encode() - ).decode() - }, + data={".dockerconfigjson": base64.b64encode(json.dumps(docker_config).encode()).decode()}, type="kubernetes.io/dockerconfigjson", ) @@ -636,17 +610,14 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: return secret_name -def _write_config_file( - spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None -): +def _write_config_file(spec_file: Path, config_env_file: Path, deployment_name: str | None = None): spec_content = get_parsed_deployment_spec(spec_file) config_vars = spec_content.get("config", {}) or {} # Generate and store secrets in K8s if deployment_name provided and tokens exist if deployment_name and config_vars: has_generate_tokens = any( - isinstance(v, str) and GENERATE_TOKEN_PATTERN.search(v) - for v in config_vars.values() + isinstance(v, str) and GENERATE_TOKEN_PATTERN.search(v) for v in config_vars.values() ) if has_generate_tokens: _generate_and_store_secrets(config_vars, deployment_name) @@ -669,13 +640,13 @@ def _write_kube_config_file(external_path: Path, internal_path: Path): copyfile(external_path, internal_path) -def _copy_files_to_directory(file_paths: List[Path], directory: Path): +def _copy_files_to_directory(file_paths: list[Path], directory: Path): for path in file_paths: # Using copy to preserve the execute bit copy(path, os.path.join(directory, os.path.basename(path))) -def _create_deployment_file(deployment_dir: Path, stack_source: Optional[Path] = None): +def _create_deployment_file(deployment_dir: Path, stack_source: Path | None = None): deployment_file_path = deployment_dir.joinpath(constants.deployment_file_name) cluster = f"{constants.cluster_name_prefix}{token_hex(8)}" deployment_content = {constants.cluster_id_key: cluster} @@ -701,9 +672,7 @@ def _check_volume_definitions(spec): @click.command() -@click.option( - "--spec-file", required=True, help="Spec file to use to create this deployment" -) +@click.option("--spec-file", required=True, help="Spec file to use to create this deployment") @click.option("--deployment-dir", help="Create deployment files in this directory") @click.option( "--update", @@ -757,9 +726,7 @@ def create_operation( initial_peers=None, extra_args=(), ): - parsed_spec = Spec( - os.path.abspath(spec_file), get_parsed_deployment_spec(spec_file) - ) + parsed_spec = Spec(os.path.abspath(spec_file), get_parsed_deployment_spec(spec_file)) _check_volume_definitions(parsed_spec) stack_name = parsed_spec["stack"] deployment_type = parsed_spec[constants.deploy_to_key] @@ -816,9 +783,7 @@ def create_operation( # Exclude config file to preserve deployment settings # (XXX breaks passing config vars from spec) exclude_patterns = ["data", "data/*", constants.config_file_name] - _safe_copy_tree( - temp_dir, deployment_dir_path, exclude_patterns=exclude_patterns - ) + _safe_copy_tree(temp_dir, deployment_dir_path, exclude_patterns=exclude_patterns) finally: # Clean up temp dir rmtree(temp_dir) @@ -841,18 +806,14 @@ def create_operation( deployment_context = DeploymentContext() deployment_context.init(deployment_dir_path) # Call the deployer to generate any deployer-specific files (e.g. for kind) - deployer_config_generator = getDeployerConfigGenerator( - deployment_type, deployment_context - ) + deployer_config_generator = getDeployerConfigGenerator(deployment_type, deployment_context) # TODO: make deployment_dir_path a Path above if deployer_config_generator is not None: deployer_config_generator.generate(deployment_dir_path) - call_stack_deploy_create( - deployment_context, [network_dir, initial_peers, *extra_args] - ) + call_stack_deploy_create(deployment_context, [network_dir, initial_peers, *extra_args]) -def _safe_copy_tree(src: Path, dst: Path, exclude_patterns: Optional[List[str]] = None): +def _safe_copy_tree(src: Path, dst: Path, exclude_patterns: list[str] | None = None): """ Recursively copy a directory tree, backing up changed files with .bak suffix. @@ -873,11 +834,7 @@ def _safe_copy_tree(src: Path, dst: Path, exclude_patterns: Optional[List[str]] def safe_copy_file(src_file: Path, dst_file: Path): """Copy file, backing up destination if it differs.""" - if ( - dst_file.exists() - and not dst_file.is_dir() - and not filecmp.cmp(src_file, dst_file) - ): + if dst_file.exists() and not dst_file.is_dir() and not filecmp.cmp(src_file, dst_file): os.rename(dst_file, f"{dst_file}.bak") copy(src_file, dst_file) @@ -903,7 +860,7 @@ def _write_deployment_files( stack_name: str, deployment_type: str, include_deployment_file: bool = True, - stack_source: Optional[Path] = None, + stack_source: Path | None = None, ): """ Write deployment files to target directory. @@ -931,9 +888,7 @@ def _write_deployment_files( # Use stack_name as deployment_name for K8s secret naming # Extract just the name part if stack_name is a path ("path/to/stack" -> "stack") deployment_name = Path(stack_name).name.replace("_", "-") - _write_config_file( - spec_file, target_dir.joinpath(constants.config_file_name), deployment_name - ) + _write_config_file(spec_file, target_dir.joinpath(constants.config_file_name), deployment_name) # Copy any k8s config file into the target dir if deployment_type == "k8s": @@ -954,7 +909,7 @@ def _write_deployment_files( pod_file_path = get_pod_file_path(stack_name, parsed_stack, pod) if pod_file_path is None: continue - parsed_pod_file = yaml.load(open(pod_file_path, "r")) + parsed_pod_file = yaml.load(open(pod_file_path)) extra_config_dirs = _find_extra_config_dirs(parsed_pod_file, pod) destination_pod_dir = destination_pods_dir.joinpath(pod) os.makedirs(destination_pod_dir, exist_ok=True) @@ -962,7 +917,7 @@ def _write_deployment_files( print(f"extra config dirs: {extra_config_dirs}") _fixup_pod_file(parsed_pod_file, parsed_spec, destination_compose_dir) with open( - destination_compose_dir.joinpath("docker-compose-%s.yml" % pod), "w" + destination_compose_dir.joinpath(f"docker-compose-{pod}.yml"), "w" ) as output_file: yaml.dump(parsed_pod_file, output_file) @@ -986,12 +941,8 @@ def _write_deployment_files( for configmap in parsed_spec.get_configmaps(): source_config_dir = resolve_config_dir(stack_name, configmap) if os.path.exists(source_config_dir): - destination_config_dir = target_dir.joinpath( - "configmaps", configmap - ) - copytree( - source_config_dir, destination_config_dir, dirs_exist_ok=True - ) + destination_config_dir = target_dir.joinpath("configmaps", configmap) + copytree(source_config_dir, destination_config_dir, dirs_exist_ok=True) else: # TODO: # This is odd - looks up config dir that matches a volume name, @@ -1022,12 +973,10 @@ def _write_deployment_files( for job in jobs: job_file_path = get_job_file_path(stack_name, parsed_stack, job) if job_file_path and job_file_path.exists(): - parsed_job_file = yaml.load(open(job_file_path, "r")) + parsed_job_file = yaml.load(open(job_file_path)) _fixup_pod_file(parsed_job_file, parsed_spec, destination_compose_dir) with open( - destination_compose_jobs_dir.joinpath( - "docker-compose-%s.yml" % job - ), + destination_compose_jobs_dir.joinpath(f"docker-compose-{job}.yml"), "w", ) as output_file: yaml.dump(parsed_job_file, output_file) @@ -1042,18 +991,14 @@ def _write_deployment_files( @click.option("--node-moniker", help="Moniker for this node") @click.option("--chain-id", help="The new chain id") @click.option("--key-name", help="Name for new node key") -@click.option( - "--gentx-files", help="List of comma-delimited gentx filenames from other nodes" -) +@click.option("--gentx-files", help="List of comma-delimited gentx filenames from other nodes") @click.option( "--gentx-addresses", type=str, help="List of comma-delimited validator addresses for other nodes", ) @click.option("--genesis-file", help="Genesis file for the network") -@click.option( - "--initialize-network", is_flag=True, default=False, help="Initialize phase" -) +@click.option("--initialize-network", is_flag=True, default=False, help="Initialize phase") @click.option("--join-network", is_flag=True, default=False, help="Join phase") @click.option("--connect-network", is_flag=True, default=False, help="Connect phase") @click.option("--create-network", is_flag=True, default=False, help="Create phase") diff --git a/stack_orchestrator/deploy/dns_probe.py b/stack_orchestrator/deploy/dns_probe.py index e04b4ea2..90e9f9e0 100644 --- a/stack_orchestrator/deploy/dns_probe.py +++ b/stack_orchestrator/deploy/dns_probe.py @@ -6,7 +6,7 @@ import secrets import socket import time -from typing import Optional + import requests from kubernetes import client @@ -15,7 +15,8 @@ def get_server_egress_ip() -> str: """Get this server's public egress IP via ipify.""" response = requests.get("https://api.ipify.org", timeout=10) response.raise_for_status() - return response.text.strip() + result: str = response.text.strip() + return result def resolve_hostname(hostname: str) -> list[str]: @@ -27,7 +28,7 @@ def resolve_hostname(hostname: str) -> list[str]: return [] -def verify_dns_simple(hostname: str, expected_ip: Optional[str] = None) -> bool: +def verify_dns_simple(hostname: str, expected_ip: str | None = None) -> bool: """Simple DNS verification - check hostname resolves to expected IP. If expected_ip not provided, uses server's egress IP. @@ -98,9 +99,7 @@ def delete_probe_ingress(namespace: str = "default"): """Delete the temporary probe ingress.""" networking_api = client.NetworkingV1Api() try: - networking_api.delete_namespaced_ingress( - name="laconic-dns-probe", namespace=namespace - ) + networking_api.delete_namespaced_ingress(name="laconic-dns-probe", namespace=namespace) except client.exceptions.ApiException: pass # Ignore if already deleted diff --git a/stack_orchestrator/deploy/images.py b/stack_orchestrator/deploy/images.py index 2c57bf47..f1a24acc 100644 --- a/stack_orchestrator/deploy/images.py +++ b/stack_orchestrator/deploy/images.py @@ -13,15 +13,14 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import Set from python_on_whales import DockerClient from stack_orchestrator import constants -from stack_orchestrator.opts import opts -from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.deploy.deploy_types import DeployCommandContext from stack_orchestrator.deploy.deploy_util import images_for_deployment +from stack_orchestrator.deploy.deployment_context import DeploymentContext +from stack_orchestrator.opts import opts def _image_needs_pushed(image: str): @@ -32,9 +31,7 @@ def _image_needs_pushed(image: str): def _remote_tag_for_image(image: str, remote_repo_url: str): # Turns image tags of the form: foo/bar:local into remote.repo/org/bar:deploy major_parts = image.split("/", 2) - image_name_with_version = ( - major_parts[1] if 2 == len(major_parts) else major_parts[0] - ) + image_name_with_version = major_parts[1] if 2 == len(major_parts) else major_parts[0] (image_name, image_version) = image_name_with_version.split(":") if image_version == "local": return f"{remote_repo_url}/{image_name}:deploy" @@ -63,18 +60,14 @@ def add_tags_to_image(remote_repo_url: str, local_tag: str, *additional_tags): docker = DockerClient() remote_tag = _remote_tag_for_image(local_tag, remote_repo_url) - new_remote_tags = [ - _remote_tag_for_image(tag, remote_repo_url) for tag in additional_tags - ] + new_remote_tags = [_remote_tag_for_image(tag, remote_repo_url) for tag in additional_tags] docker.buildx.imagetools.create(sources=[remote_tag], tags=new_remote_tags) def remote_tag_for_image_unique(image: str, remote_repo_url: str, deployment_id: str): # Turns image tags of the form: foo/bar:local into remote.repo/org/bar:deploy major_parts = image.split("/", 2) - image_name_with_version = ( - major_parts[1] if 2 == len(major_parts) else major_parts[0] - ) + image_name_with_version = major_parts[1] if 2 == len(major_parts) else major_parts[0] (image_name, image_version) = image_name_with_version.split(":") if image_version == "local": # Salt the tag with part of the deployment id to make it unique to this @@ -91,24 +84,20 @@ def push_images_operation( ): # Get the list of images for the stack cluster_context = command_context.cluster_context - images: Set[str] = images_for_deployment(cluster_context.compose_files) + images: set[str] = images_for_deployment(cluster_context.compose_files) # Tag the images for the remote repo remote_repo_url = deployment_context.spec.obj[constants.image_registry_key] docker = DockerClient() for image in images: if _image_needs_pushed(image): - remote_tag = remote_tag_for_image_unique( - image, remote_repo_url, deployment_context.id - ) + remote_tag = remote_tag_for_image_unique(image, remote_repo_url, deployment_context.id) if opts.o.verbose: print(f"Tagging {image} to {remote_tag}") docker.image.tag(image, remote_tag) # Run docker push commands to upload for image in images: if _image_needs_pushed(image): - remote_tag = remote_tag_for_image_unique( - image, remote_repo_url, deployment_context.id - ) + remote_tag = remote_tag_for_image_unique(image, remote_repo_url, deployment_context.id) if opts.o.verbose: print(f"Pushing image {remote_tag}") docker.image.push(remote_tag) diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index 2ebf96f2..c6ed6cee 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -13,33 +13,31 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import os import base64 +import os +from typing import Any from kubernetes import client -from typing import Any, List, Optional, Set -from stack_orchestrator.opts import opts -from stack_orchestrator.util import env_var_map_from_file +from stack_orchestrator.deploy.deploy_types import DeployEnvVars +from stack_orchestrator.deploy.deploy_util import ( + images_for_deployment, + parsed_pod_files_map_from_file_names, +) +from stack_orchestrator.deploy.images import remote_tag_for_image_unique from stack_orchestrator.deploy.k8s.helpers import ( + envs_from_compose_file, + envs_from_environment_variables_map, + get_kind_pv_bind_mount_path, + merge_envs, named_volumes_from_pod_files, + translate_sidecar_service_names, volume_mounts_for_service, volumes_for_pod_files, ) -from stack_orchestrator.deploy.k8s.helpers import get_kind_pv_bind_mount_path -from stack_orchestrator.deploy.k8s.helpers import ( - envs_from_environment_variables_map, - envs_from_compose_file, - merge_envs, - translate_sidecar_service_names, -) -from stack_orchestrator.deploy.deploy_util import ( - parsed_pod_files_map_from_file_names, - images_for_deployment, -) -from stack_orchestrator.deploy.deploy_types import DeployEnvVars -from stack_orchestrator.deploy.spec import Spec, Resources, ResourceLimits -from stack_orchestrator.deploy.images import remote_tag_for_image_unique +from stack_orchestrator.deploy.spec import ResourceLimits, Resources, Spec +from stack_orchestrator.opts import opts +from stack_orchestrator.util import env_var_map_from_file DEFAULT_VOLUME_RESOURCES = Resources({"reservations": {"storage": "2Gi"}}) @@ -52,7 +50,7 @@ DEFAULT_CONTAINER_RESOURCES = Resources( def to_k8s_resource_requirements(resources: Resources) -> client.V1ResourceRequirements: - def to_dict(limits: Optional[ResourceLimits]): + def to_dict(limits: ResourceLimits | None): if not limits: return None @@ -72,7 +70,7 @@ def to_k8s_resource_requirements(resources: Resources) -> client.V1ResourceRequi class ClusterInfo: parsed_pod_yaml_map: Any - image_set: Set[str] = set() + image_set: set[str] = set() app_name: str environment_variables: DeployEnvVars spec: Spec @@ -80,14 +78,12 @@ class ClusterInfo: def __init__(self) -> None: pass - def int(self, pod_files: List[str], compose_env_file, deployment_name, spec: Spec): + def int(self, pod_files: list[str], compose_env_file, deployment_name, spec: Spec): self.parsed_pod_yaml_map = parsed_pod_files_map_from_file_names(pod_files) # Find the set of images in the pods self.image_set = images_for_deployment(pod_files) # Filter out None values from env file - env_vars = { - k: v for k, v in env_var_map_from_file(compose_env_file).items() if v - } + env_vars = {k: v for k, v in env_var_map_from_file(compose_env_file).items() if v} self.environment_variables = DeployEnvVars(env_vars) self.app_name = deployment_name self.spec = spec @@ -124,8 +120,7 @@ class ClusterInfo: service = client.V1Service( metadata=client.V1ObjectMeta( name=( - f"{self.app_name}-nodeport-" - f"{pod_port}-{protocol.lower()}" + f"{self.app_name}-nodeport-" f"{pod_port}-{protocol.lower()}" ), labels={"app": self.app_name}, ), @@ -145,9 +140,7 @@ class ClusterInfo: nodeports.append(service) return nodeports - def get_ingress( - self, use_tls=False, certificate=None, cluster_issuer="letsencrypt-prod" - ): + def get_ingress(self, use_tls=False, certificate=None, cluster_issuer="letsencrypt-prod"): # No ingress for a deployment that has no http-proxy defined, for now http_proxy_info_list = self.spec.get_http_proxy() ingress = None @@ -162,9 +155,7 @@ class ClusterInfo: tls = ( [ client.V1IngressTLS( - hosts=certificate["spec"]["dnsNames"] - if certificate - else [host_name], + hosts=certificate["spec"]["dnsNames"] if certificate else [host_name], secret_name=certificate["spec"]["secretName"] if certificate else f"{self.app_name}-tls", @@ -237,8 +228,7 @@ class ClusterInfo: return None service_ports = [ - client.V1ServicePort(port=p, target_port=p, name=f"port-{p}") - for p in sorted(ports_set) + client.V1ServicePort(port=p, target_port=p, name=f"port-{p}") for p in sorted(ports_set) ] service = client.V1Service( @@ -290,9 +280,7 @@ class ClusterInfo: volume_name=k8s_volume_name, ) pvc = client.V1PersistentVolumeClaim( - metadata=client.V1ObjectMeta( - name=f"{self.app_name}-{volume_name}", labels=labels - ), + metadata=client.V1ObjectMeta(name=f"{self.app_name}-{volume_name}", labels=labels), spec=spec, ) result.append(pvc) @@ -309,9 +297,7 @@ class ClusterInfo: continue if not cfg_map_path.startswith("/") and self.spec.file_path is not None: - cfg_map_path = os.path.join( - os.path.dirname(str(self.spec.file_path)), cfg_map_path - ) + cfg_map_path = os.path.join(os.path.dirname(str(self.spec.file_path)), cfg_map_path) # Read in all the files at a single-level of the directory. # This mimics the behavior of @@ -320,9 +306,7 @@ class ClusterInfo: for f in os.listdir(cfg_map_path): full_path = os.path.join(cfg_map_path, f) if os.path.isfile(full_path): - data[f] = base64.b64encode(open(full_path, "rb").read()).decode( - "ASCII" - ) + data[f] = base64.b64encode(open(full_path, "rb").read()).decode("ASCII") spec = client.V1ConfigMap( metadata=client.V1ObjectMeta( @@ -425,7 +409,7 @@ class ClusterInfo: return global_resources # TODO: put things like image pull policy into an object-scope struct - def get_deployment(self, image_pull_policy: Optional[str] = None): + def get_deployment(self, image_pull_policy: str | None = None): containers = [] services = {} global_resources = self.spec.get_container_resources() @@ -453,9 +437,7 @@ class ClusterInfo: port_str = port_str.split(":")[-1] port = int(port_str) container_ports.append( - client.V1ContainerPort( - container_port=port, protocol=protocol - ) + client.V1ContainerPort(container_port=port, protocol=protocol) ) if opts.o.debug: print(f"image: {image}") @@ -473,9 +455,7 @@ class ClusterInfo: # Translate docker-compose service names to localhost for sidecars # All services in the same pod share the network namespace sibling_services = [s for s in services.keys() if s != service_name] - merged_envs = translate_sidecar_service_names( - merged_envs, sibling_services - ) + merged_envs = translate_sidecar_service_names(merged_envs, sibling_services) envs = envs_from_environment_variables_map(merged_envs) if opts.o.debug: print(f"Merged envs: {envs}") @@ -488,18 +468,14 @@ class ClusterInfo: if self.spec.get_image_registry() is not None else image ) - volume_mounts = volume_mounts_for_service( - self.parsed_pod_yaml_map, service_name - ) + volume_mounts = volume_mounts_for_service(self.parsed_pod_yaml_map, service_name) # Handle command/entrypoint from compose file # In docker-compose: entrypoint -> k8s command, command -> k8s args container_command = None container_args = None if "entrypoint" in service_info: entrypoint = service_info["entrypoint"] - container_command = ( - entrypoint if isinstance(entrypoint, list) else [entrypoint] - ) + container_command = entrypoint if isinstance(entrypoint, list) else [entrypoint] if "command" in service_info: cmd = service_info["command"] container_args = cmd if isinstance(cmd, list) else cmd.split() @@ -528,18 +504,14 @@ class ClusterInfo: volume_mounts=volume_mounts, security_context=client.V1SecurityContext( privileged=self.spec.get_privileged(), - capabilities=client.V1Capabilities( - add=self.spec.get_capabilities() - ) + capabilities=client.V1Capabilities(add=self.spec.get_capabilities()) if self.spec.get_capabilities() else None, ), resources=to_k8s_resource_requirements(container_resources), ) containers.append(container) - volumes = volumes_for_pod_files( - self.parsed_pod_yaml_map, self.spec, self.app_name - ) + volumes = volumes_for_pod_files(self.parsed_pod_yaml_map, self.spec, self.app_name) registry_config = self.spec.get_image_registry_config() if registry_config: secret_name = f"{self.app_name}-registry" diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index d1e51ddb..947daa59 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -14,42 +14,36 @@ import time from datetime import datetime, timezone - from pathlib import Path +from typing import Any, cast + from kubernetes import client, config from kubernetes.client.exceptions import ApiException -from typing import Any, Dict, List, Optional, cast from stack_orchestrator import constants from stack_orchestrator.deploy.deployer import Deployer, DeployerConfigGenerator +from stack_orchestrator.deploy.deployment_context import DeploymentContext +from stack_orchestrator.deploy.k8s.cluster_info import ClusterInfo from stack_orchestrator.deploy.k8s.helpers import ( + containers_in_pod, create_cluster, destroy_cluster, - load_images_into_kind, -) -from stack_orchestrator.deploy.k8s.helpers import ( - install_ingress_for_kind, - wait_for_ingress_in_kind, - is_ingress_running, -) -from stack_orchestrator.deploy.k8s.helpers import ( - pods_in_deployment, - containers_in_pod, - log_stream_from_string, -) -from stack_orchestrator.deploy.k8s.helpers import ( - generate_kind_config, generate_high_memlock_spec_json, + generate_kind_config, + install_ingress_for_kind, + is_ingress_running, + load_images_into_kind, + log_stream_from_string, + pods_in_deployment, + wait_for_ingress_in_kind, ) -from stack_orchestrator.deploy.k8s.cluster_info import ClusterInfo from stack_orchestrator.opts import opts -from stack_orchestrator.deploy.deployment_context import DeploymentContext from stack_orchestrator.util import error_exit class AttrDict(dict): def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.__dict__ = self @@ -144,9 +138,7 @@ class K8sDeployer(Deployer): else: # Get the config file and pass to load_kube_config() config.load_kube_config( - config_file=self.deployment_dir.joinpath( - constants.kube_config_filename - ).as_posix() + config_file=self.deployment_dir.joinpath(constants.kube_config_filename).as_posix() ) self.core_api = client.CoreV1Api() self.networking_api = client.NetworkingV1Api() @@ -213,10 +205,7 @@ class K8sDeployer(Deployer): ) if opts.o.debug: - print( - f"Namespace {self.k8s_namespace} is terminating, " - f"waiting for deletion..." - ) + print(f"Namespace {self.k8s_namespace} is terminating, " f"waiting for deletion...") time.sleep(2) def _delete_namespace(self): @@ -276,9 +265,7 @@ class K8sDeployer(Deployer): name=deployment.metadata.name, namespace=self.k8s_namespace, ) - deployment.metadata.resource_version = ( - existing.metadata.resource_version - ) + deployment.metadata.resource_version = existing.metadata.resource_version resp = cast( client.V1Deployment, self.apps_api.replace_namespaced_deployment( @@ -391,9 +378,7 @@ class K8sDeployer(Deployer): print(f"Sending this pv: {pv}") if not opts.o.dry_run: try: - pv_resp = self.core_api.read_persistent_volume( - name=pv.metadata.name - ) + pv_resp = self.core_api.read_persistent_volume(name=pv.metadata.name) if pv_resp: if opts.o.debug: print("PVs already present:") @@ -500,9 +485,9 @@ class K8sDeployer(Deployer): if before < now < after: # Check the status is Ready for condition in status.get("conditions", []): - if "True" == condition.get( - "status" - ) and "Ready" == condition.get("type"): + if "True" == condition.get("status") and "Ready" == condition.get( + "type" + ): return cert return None @@ -519,15 +504,11 @@ class K8sDeployer(Deployer): self.skip_cluster_management = skip_cluster_management if not opts.o.dry_run: if self.is_kind() and not self.skip_cluster_management: - kind_config = str( - self.deployment_dir.joinpath(constants.kind_config_filename) - ) + kind_config = str(self.deployment_dir.joinpath(constants.kind_config_filename)) actual_cluster = create_cluster(self.kind_cluster_name, kind_config) if actual_cluster != self.kind_cluster_name: self.kind_cluster_name = actual_cluster - local_containers = self.deployment_context.stack.obj.get( - "containers", [] - ) + local_containers = self.deployment_context.stack.obj.get("containers", []) if local_containers: local_images = { img @@ -579,9 +560,7 @@ class K8sDeployer(Deployer): if opts.o.debug and certificate: print(f"Using existing certificate: {certificate}") - ingress = self.cluster_info.get_ingress( - use_tls=use_tls, certificate=certificate - ) + ingress = self.cluster_info.get_ingress(use_tls=use_tls, certificate=certificate) if ingress: if opts.o.debug: print(f"Sending this ingress: {ingress}") @@ -590,7 +569,7 @@ class K8sDeployer(Deployer): elif opts.o.debug: print("No ingress configured") - nodeports: List[client.V1Service] = self.cluster_info.get_nodeports() + nodeports: list[client.V1Service] = self.cluster_info.get_nodeports() for nodeport in nodeports: if opts.o.debug: print(f"Sending this nodeport: {nodeport}") @@ -670,7 +649,7 @@ class K8sDeployer(Deployer): return cert = cast( - Dict[str, Any], + dict[str, Any], self.custom_obj_api.get_namespaced_custom_object( group="cert-manager.io", version="v1", @@ -686,7 +665,7 @@ class K8sDeployer(Deployer): if lb_ingress: ip = lb_ingress[0].ip or "?" cert_status = cert.get("status", {}) - tls = "notBefore: %s; notAfter: %s; names: %s" % ( + tls = "notBefore: {}; notAfter: {}; names: {}".format( cert_status.get("notBefore", "?"), cert_status.get("notAfter", "?"), ingress.spec.tls[0].hosts, @@ -727,9 +706,7 @@ class K8sDeployer(Deployer): if c.ports: for prt in c.ports: ports[str(prt.container_port)] = [ - AttrDict( - {"HostIp": pod_ip, "HostPort": prt.container_port} - ) + AttrDict({"HostIp": pod_ip, "HostPort": prt.container_port}) ] ret.append( @@ -791,9 +768,7 @@ class K8sDeployer(Deployer): deployment = cast( client.V1Deployment, - self.apps_api.read_namespaced_deployment( - name=ref_name, namespace=self.k8s_namespace - ), + self.apps_api.read_namespaced_deployment(name=ref_name, namespace=self.k8s_namespace), ) if not deployment.spec or not deployment.spec.template: return @@ -832,14 +807,14 @@ class K8sDeployer(Deployer): user=None, volumes=None, entrypoint=None, - env={}, - ports=[], + env=None, + ports=None, detach=False, ): # We need to figure out how to do this -- check why we're being called first pass - def run_job(self, job_name: str, helm_release: Optional[str] = None): + def run_job(self, job_name: str, helm_release: str | None = None): if not opts.o.dry_run: from stack_orchestrator.deploy.k8s.helm.job_runner import run_helm_job @@ -881,13 +856,9 @@ class K8sDeployerConfigGenerator(DeployerConfigGenerator): # Must be done before generate_kind_config() which references it. if self.deployment_context.spec.get_unlimited_memlock(): spec_content = generate_high_memlock_spec_json() - spec_file = deployment_dir.joinpath( - constants.high_memlock_spec_filename - ) + spec_file = deployment_dir.joinpath(constants.high_memlock_spec_filename) if opts.o.debug: - print( - f"Creating high-memlock spec for unlimited memlock: {spec_file}" - ) + print(f"Creating high-memlock spec for unlimited memlock: {spec_file}") with open(spec_file, "w") as output_file: output_file.write(spec_content) diff --git a/stack_orchestrator/deploy/k8s/helm/chart_generator.py b/stack_orchestrator/deploy/k8s/helm/chart_generator.py index 7e9c974e..2459a77f 100644 --- a/stack_orchestrator/deploy/k8s/helm/chart_generator.py +++ b/stack_orchestrator/deploy/k8s/helm/chart_generator.py @@ -16,21 +16,21 @@ from pathlib import Path from stack_orchestrator import constants -from stack_orchestrator.opts import opts -from stack_orchestrator.util import ( - get_parsed_stack_config, - get_pod_list, - get_pod_file_path, - get_job_list, - get_job_file_path, - error_exit, -) from stack_orchestrator.deploy.k8s.helm.kompose_wrapper import ( check_kompose_available, - get_kompose_version, convert_to_helm_chart, + get_kompose_version, +) +from stack_orchestrator.opts import opts +from stack_orchestrator.util import ( + error_exit, + get_job_file_path, + get_job_list, + get_parsed_stack_config, + get_pod_file_path, + get_pod_list, + get_yaml, ) -from stack_orchestrator.util import get_yaml def _wrap_job_templates_with_conditionals(chart_dir: Path, jobs: list) -> None: @@ -88,7 +88,7 @@ def _post_process_chart(chart_dir: Path, chart_name: str, jobs: list) -> None: # Fix Chart.yaml chart_yaml_path = chart_dir / "Chart.yaml" if chart_yaml_path.exists(): - chart_yaml = yaml.load(open(chart_yaml_path, "r")) + chart_yaml = yaml.load(open(chart_yaml_path)) # Fix name chart_yaml["name"] = chart_name @@ -108,9 +108,7 @@ def _post_process_chart(chart_dir: Path, chart_name: str, jobs: list) -> None: _wrap_job_templates_with_conditionals(chart_dir, jobs) -def generate_helm_chart( - stack_path: str, spec_file: str, deployment_dir_path: Path -) -> None: +def generate_helm_chart(stack_path: str, spec_file: str, deployment_dir_path: Path) -> None: """ Generate a self-sufficient Helm chart from stack compose files using Kompose. @@ -152,7 +150,7 @@ def generate_helm_chart( error_exit(f"Deployment file not found: {deployment_file}") yaml = get_yaml() - deployment_config = yaml.load(open(deployment_file, "r")) + deployment_config = yaml.load(open(deployment_file)) cluster_id = deployment_config.get(constants.cluster_id_key) if not cluster_id: error_exit(f"cluster-id not found in {deployment_file}") @@ -219,10 +217,7 @@ def generate_helm_chart( # 5. Create chart directory and invoke Kompose chart_dir = deployment_dir_path / "chart" - print( - f"Converting {len(compose_files)} compose file(s) to Helm chart " - "using Kompose..." - ) + print(f"Converting {len(compose_files)} compose file(s) to Helm chart " "using Kompose...") try: output = convert_to_helm_chart( @@ -304,9 +299,7 @@ Edit the generated template files in `templates/` to customize: # Count generated files template_files = ( - list((chart_dir / "templates").glob("*.yaml")) - if (chart_dir / "templates").exists() - else [] + list((chart_dir / "templates").glob("*.yaml")) if (chart_dir / "templates").exists() else [] ) print(f" Files: {len(template_files)} template(s) generated") diff --git a/stack_orchestrator/deploy/k8s/helm/job_runner.py b/stack_orchestrator/deploy/k8s/helm/job_runner.py index 9f34ce6c..7601c580 100644 --- a/stack_orchestrator/deploy/k8s/helm/job_runner.py +++ b/stack_orchestrator/deploy/k8s/helm/job_runner.py @@ -13,12 +13,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import json +import os import subprocess import tempfile -import os -import json from pathlib import Path -from typing import Optional + from stack_orchestrator.util import get_yaml @@ -40,18 +40,19 @@ def get_release_name_from_chart(chart_dir: Path) -> str: raise Exception(f"Chart.yaml not found: {chart_yaml_path}") yaml = get_yaml() - chart_yaml = yaml.load(open(chart_yaml_path, "r")) + chart_yaml = yaml.load(open(chart_yaml_path)) if "name" not in chart_yaml: raise Exception(f"Chart name not found in {chart_yaml_path}") - return chart_yaml["name"] + name: str = chart_yaml["name"] + return name def run_helm_job( chart_dir: Path, job_name: str, - release: Optional[str] = None, + release: str | None = None, namespace: str = "default", timeout: int = 600, verbose: bool = False, @@ -94,9 +95,7 @@ def run_helm_job( print(f"Running job '{job_name}' from helm chart: {chart_dir}") # Use helm template to render the job manifest - with tempfile.NamedTemporaryFile( - mode="w", suffix=".yaml", delete=False - ) as tmp_file: + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as tmp_file: try: # Render job template with job enabled # Use --set-json to properly handle job names with dashes @@ -116,9 +115,7 @@ def run_helm_job( if verbose: print(f"Running: {' '.join(helm_cmd)}") - result = subprocess.run( - helm_cmd, check=True, capture_output=True, text=True - ) + result = subprocess.run(helm_cmd, check=True, capture_output=True, text=True) tmp_file.write(result.stdout) tmp_file.flush() @@ -139,9 +136,7 @@ def run_helm_job( "-n", namespace, ] - subprocess.run( - kubectl_apply_cmd, check=True, capture_output=True, text=True - ) + subprocess.run(kubectl_apply_cmd, check=True, capture_output=True, text=True) if verbose: print(f"Job {actual_job_name} created, waiting for completion...") @@ -164,7 +159,7 @@ def run_helm_job( except subprocess.CalledProcessError as e: error_msg = e.stderr if e.stderr else str(e) - raise Exception(f"Job failed: {error_msg}") + raise Exception(f"Job failed: {error_msg}") from e finally: # Clean up temp file if os.path.exists(tmp_file.name): diff --git a/stack_orchestrator/deploy/k8s/helm/kompose_wrapper.py b/stack_orchestrator/deploy/k8s/helm/kompose_wrapper.py index 520a668e..08fcf96f 100644 --- a/stack_orchestrator/deploy/k8s/helm/kompose_wrapper.py +++ b/stack_orchestrator/deploy/k8s/helm/kompose_wrapper.py @@ -13,10 +13,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import subprocess import shutil +import subprocess from pathlib import Path -from typing import List, Optional def check_kompose_available() -> bool: @@ -37,9 +36,7 @@ def get_kompose_version() -> str: if not check_kompose_available(): raise Exception("kompose not found in PATH") - result = subprocess.run( - ["kompose", "version"], capture_output=True, text=True, timeout=10 - ) + result = subprocess.run(["kompose", "version"], capture_output=True, text=True, timeout=10) if result.returncode != 0: raise Exception(f"Failed to get kompose version: {result.stderr}") @@ -53,7 +50,7 @@ def get_kompose_version() -> str: def convert_to_helm_chart( - compose_files: List[Path], output_dir: Path, chart_name: Optional[str] = None + compose_files: list[Path], output_dir: Path, chart_name: str | None = None ) -> str: """ Invoke kompose to convert Docker Compose files to a Helm chart. @@ -71,8 +68,7 @@ def convert_to_helm_chart( """ if not check_kompose_available(): raise Exception( - "kompose not found in PATH. " - "Install from: https://kompose.io/installation/" + "kompose not found in PATH. " "Install from: https://kompose.io/installation/" ) # Ensure output directory exists @@ -95,9 +91,7 @@ def convert_to_helm_chart( if result.returncode != 0: raise Exception( - f"Kompose conversion failed:\n" - f"Command: {' '.join(cmd)}\n" - f"Error: {result.stderr}" + f"Kompose conversion failed:\n" f"Command: {' '.join(cmd)}\n" f"Error: {result.stderr}" ) return result.stdout diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 85f3d5f7..5cf749f4 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -13,20 +13,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import os +import re +import subprocess +from collections.abc import Mapping +from pathlib import Path +from typing import cast + +import yaml from kubernetes import client, utils, watch from kubernetes.client.exceptions import ApiException -import os -from pathlib import Path -import subprocess -import re -from typing import Set, Mapping, List, Optional, cast -import yaml -from stack_orchestrator.util import get_k8s_dir, error_exit -from stack_orchestrator.opts import opts +from stack_orchestrator import constants from stack_orchestrator.deploy.deploy_util import parsed_pod_files_map_from_file_names from stack_orchestrator.deploy.deployer import DeployerException -from stack_orchestrator import constants +from stack_orchestrator.opts import opts +from stack_orchestrator.util import error_exit, get_k8s_dir def is_host_path_mount(volume_name: str) -> bool: @@ -77,9 +79,7 @@ def get_kind_cluster(): Uses `kind get clusters` to find existing clusters. Returns the cluster name or None if no cluster exists. """ - result = subprocess.run( - "kind get clusters", shell=True, capture_output=True, text=True - ) + result = subprocess.run("kind get clusters", shell=True, capture_output=True, text=True) if result.returncode != 0: return None @@ -98,12 +98,12 @@ def _run_command(command: str): return result -def _get_etcd_host_path_from_kind_config(config_file: str) -> Optional[str]: +def _get_etcd_host_path_from_kind_config(config_file: str) -> str | None: """Extract etcd host path from kind config extraMounts.""" import yaml try: - with open(config_file, "r") as f: + with open(config_file) as f: config = yaml.safe_load(f) except Exception: return None @@ -113,7 +113,8 @@ def _get_etcd_host_path_from_kind_config(config_file: str) -> Optional[str]: extra_mounts = node.get("extraMounts", []) for mount in extra_mounts: if mount.get("containerPath") == "/var/lib/etcd": - return mount.get("hostPath") + host_path: str | None = mount.get("hostPath") + return host_path return None @@ -133,8 +134,7 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool: db_path = Path(etcd_path) / "member" / "snap" / "db" # Check existence using docker since etcd dir is root-owned check_cmd = ( - f"docker run --rm -v {etcd_path}:/etcd:ro alpine:3.19 " - "test -f /etcd/member/snap/db" + f"docker run --rm -v {etcd_path}:/etcd:ro alpine:3.19 " "test -f /etcd/member/snap/db" ) check_result = subprocess.run(check_cmd, shell=True, capture_output=True) if check_result.returncode != 0: @@ -337,7 +337,7 @@ def is_ingress_running() -> bool: def wait_for_ingress_in_kind(): core_v1 = client.CoreV1Api() - for i in range(20): + for _i in range(20): warned_waiting = False w = watch.Watch() for event in w.stream( @@ -364,9 +364,7 @@ def wait_for_ingress_in_kind(): def install_ingress_for_kind(acme_email: str = ""): api_client = client.ApiClient() ingress_install = os.path.abspath( - get_k8s_dir().joinpath( - "components", "ingress", "ingress-caddy-kind-deploy.yaml" - ) + get_k8s_dir().joinpath("components", "ingress", "ingress-caddy-kind-deploy.yaml") ) if opts.o.debug: print("Installing Caddy ingress controller in kind cluster") @@ -400,11 +398,9 @@ def install_ingress_for_kind(acme_email: str = ""): ) -def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]): +def load_images_into_kind(kind_cluster_name: str, image_set: set[str]): for image in image_set: - result = _run_command( - f"kind load docker-image {image} --name {kind_cluster_name}" - ) + result = _run_command(f"kind load docker-image {image} --name {kind_cluster_name}") if result.returncode != 0: raise DeployerException(f"kind load docker-image failed: {result}") @@ -422,11 +418,9 @@ def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): return pods -def containers_in_pod(core_api: client.CoreV1Api, pod_name: str) -> List[str]: - containers: List[str] = [] - pod_response = cast( - client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace="default") - ) +def containers_in_pod(core_api: client.CoreV1Api, pod_name: str) -> list[str]: + containers: list[str] = [] + pod_response = cast(client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace="default")) if opts.o.debug: print(f"pod_response: {pod_response}") if not pod_response.spec or not pod_response.spec.containers: @@ -449,7 +443,7 @@ def named_volumes_from_pod_files(parsed_pod_files): parsed_pod_file = parsed_pod_files[pod] if "volumes" in parsed_pod_file: volumes = parsed_pod_file["volumes"] - for volume, value in volumes.items(): + for volume, _value in volumes.items(): # Volume definition looks like: # 'laconicd-data': None named_volumes.append(volume) @@ -481,14 +475,10 @@ def volume_mounts_for_service(parsed_pod_files, service): mount_split = mount_string.split(":") volume_name = mount_split[0] mount_path = mount_split[1] - mount_options = ( - mount_split[2] if len(mount_split) == 3 else None - ) + mount_options = mount_split[2] if len(mount_split) == 3 else None # For host path mounts, use sanitized name if is_host_path_mount(volume_name): - k8s_volume_name = sanitize_host_path_to_volume_name( - volume_name - ) + k8s_volume_name = sanitize_host_path_to_volume_name(volume_name) else: k8s_volume_name = volume_name if opts.o.debug: @@ -527,9 +517,7 @@ def volumes_for_pod_files(parsed_pod_files, spec, app_name): claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=f"{app_name}-{volume_name}" ) - volume = client.V1Volume( - name=volume_name, persistent_volume_claim=claim - ) + volume = client.V1Volume(name=volume_name, persistent_volume_claim=claim) result.append(volume) # Handle host path mounts from service volumes @@ -542,15 +530,11 @@ def volumes_for_pod_files(parsed_pod_files, spec, app_name): mount_split = mount_string.split(":") volume_source = mount_split[0] if is_host_path_mount(volume_source): - sanitized_name = sanitize_host_path_to_volume_name( - volume_source - ) + sanitized_name = sanitize_host_path_to_volume_name(volume_source) if sanitized_name not in seen_host_path_volumes: seen_host_path_volumes.add(sanitized_name) # Create hostPath volume for mount inside kind node - kind_mount_path = get_kind_host_path_mount_path( - sanitized_name - ) + kind_mount_path = get_kind_host_path_mount_path(sanitized_name) host_path_source = client.V1HostPathVolumeSource( path=kind_mount_path, type="FileOrCreate" ) @@ -585,18 +569,14 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): deployment_id = deployment_context.id backup_subdir = f"cluster-backups/{deployment_id}" - etcd_host_path = _make_absolute_host_path( - Path(f"./data/{backup_subdir}/etcd"), deployment_dir - ) + etcd_host_path = _make_absolute_host_path(Path(f"./data/{backup_subdir}/etcd"), deployment_dir) volume_definitions.append( f" - hostPath: {etcd_host_path}\n" f" containerPath: /var/lib/etcd\n" f" propagation: HostToContainer\n" ) - pki_host_path = _make_absolute_host_path( - Path(f"./data/{backup_subdir}/pki"), deployment_dir - ) + pki_host_path = _make_absolute_host_path(Path(f"./data/{backup_subdir}/pki"), deployment_dir) volume_definitions.append( f" - hostPath: {pki_host_path}\n" f" containerPath: /etc/kubernetes/pki\n" @@ -626,18 +606,12 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): if is_host_path_mount(volume_name): # Host path mount - add extraMount for kind - sanitized_name = sanitize_host_path_to_volume_name( - volume_name - ) + sanitized_name = sanitize_host_path_to_volume_name(volume_name) if sanitized_name not in seen_host_path_mounts: seen_host_path_mounts.add(sanitized_name) # Resolve path relative to compose directory - host_path = resolve_host_path_for_kind( - volume_name, deployment_dir - ) - container_path = get_kind_host_path_mount_path( - sanitized_name - ) + host_path = resolve_host_path_for_kind(volume_name, deployment_dir) + container_path = get_kind_host_path_mount_path(sanitized_name) volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" @@ -651,10 +625,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): print(f"volume_name: {volume_name}") print(f"map: {volume_host_path_map}") print(f"mount path: {mount_path}") - if ( - volume_name - not in deployment_context.spec.get_configmaps() - ): + if volume_name not in deployment_context.spec.get_configmaps(): if ( volume_name in volume_host_path_map and volume_host_path_map[volume_name] @@ -663,9 +634,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_host_path_map[volume_name], deployment_dir, ) - container_path = get_kind_pv_bind_mount_path( - volume_name - ) + container_path = get_kind_pv_bind_mount_path(volume_name) volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" @@ -693,8 +662,7 @@ def _generate_kind_port_mappings_from_services(parsed_pod_files): # TODO handle the complex cases # Looks like: 80 or something more complicated port_definitions.append( - f" - containerPort: {port_string}\n" - f" hostPort: {port_string}\n" + f" - containerPort: {port_string}\n" f" hostPort: {port_string}\n" ) return ( "" @@ -707,9 +675,7 @@ def _generate_kind_port_mappings(parsed_pod_files): port_definitions = [] # Map port 80 and 443 for the Caddy ingress controller (HTTPS support) for port_string in ["80", "443"]: - port_definitions.append( - f" - containerPort: {port_string}\n hostPort: {port_string}\n" - ) + port_definitions.append(f" - containerPort: {port_string}\n hostPort: {port_string}\n") return ( "" if len(port_definitions) == 0 @@ -903,9 +869,7 @@ def generate_cri_base_json(): return generate_high_memlock_spec_json() -def _generate_containerd_config_patches( - deployment_dir: Path, has_high_memlock: bool -) -> str: +def _generate_containerd_config_patches(deployment_dir: Path, has_high_memlock: bool) -> str: """Generate containerdConfigPatches YAML for custom runtime handlers. This configures containerd to have a runtime handler named 'high-memlock' @@ -932,9 +896,7 @@ def merge_envs(a: Mapping[str, str], b: Mapping[str, str]) -> Mapping[str, str]: return result -def _expand_shell_vars( - raw_val: str, env_map: Optional[Mapping[str, str]] = None -) -> str: +def _expand_shell_vars(raw_val: str, env_map: Mapping[str, str] | None = None) -> str: # Expand docker-compose style variable substitution: # ${VAR} - use VAR value or empty string # ${VAR:-default} - use VAR value or default if unset/empty @@ -959,7 +921,7 @@ def _expand_shell_vars( def envs_from_compose_file( - compose_file_envs: Mapping[str, str], env_map: Optional[Mapping[str, str]] = None + compose_file_envs: Mapping[str, str], env_map: Mapping[str, str] | None = None ) -> Mapping[str, str]: result = {} for env_var, env_val in compose_file_envs.items(): @@ -969,7 +931,7 @@ def envs_from_compose_file( def translate_sidecar_service_names( - envs: Mapping[str, str], sibling_service_names: List[str] + envs: Mapping[str, str], sibling_service_names: list[str] ) -> Mapping[str, str]: """Translate docker-compose service names to localhost for sidecar containers. @@ -996,7 +958,12 @@ def translate_sidecar_service_names( # Handle URLs like: postgres://user:pass@db:5432/dbname # and simple refs like: db:5432 or just db pattern = rf"\b{re.escape(service_name)}(:\d+)?\b" - new_val = re.sub(pattern, lambda m: f'localhost{m.group(1) or ""}', new_val) + + def _replace_with_localhost(m: re.Match[str]) -> str: + port: str = m.group(1) or "" + return "localhost" + port + + new_val = re.sub(pattern, _replace_with_localhost, new_val) result[env_var] = new_val @@ -1004,8 +971,8 @@ def translate_sidecar_service_names( def envs_from_environment_variables_map( - map: Mapping[str, str] -) -> List[client.V1EnvVar]: + map: Mapping[str, str], +) -> list[client.V1EnvVar]: result = [] for env_var, env_val in map.items(): result.append(client.V1EnvVar(env_var, env_val)) @@ -1036,17 +1003,13 @@ def generate_kind_config(deployment_dir: Path, deployment_context): pod_files = [p for p in compose_file_dir.iterdir() if p.is_file()] parsed_pod_files_map = parsed_pod_files_map_from_file_names(pod_files) port_mappings_yml = _generate_kind_port_mappings(parsed_pod_files_map) - mounts_yml = _generate_kind_mounts( - parsed_pod_files_map, deployment_dir, deployment_context - ) + mounts_yml = _generate_kind_mounts(parsed_pod_files_map, deployment_dir, deployment_context) # Check if unlimited_memlock is enabled unlimited_memlock = deployment_context.spec.get_unlimited_memlock() # Generate containerdConfigPatches for RuntimeClass support - containerd_patches_yml = _generate_containerd_config_patches( - deployment_dir, unlimited_memlock - ) + containerd_patches_yml = _generate_containerd_config_patches(deployment_dir, unlimited_memlock) # Add high-memlock spec file mount if needed if unlimited_memlock: diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index bd62779e..a795d6ec 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -14,19 +14,18 @@ # along with this program. If not, see . import typing -from typing import Optional -import humanfriendly - from pathlib import Path -from stack_orchestrator.util import get_yaml +import humanfriendly + from stack_orchestrator import constants +from stack_orchestrator.util import get_yaml class ResourceLimits: - cpus: Optional[float] = None - memory: Optional[int] = None - storage: Optional[int] = None + cpus: float | None = None + memory: int | None = None + storage: int | None = None def __init__(self, obj=None): if obj is None: @@ -50,8 +49,8 @@ class ResourceLimits: class Resources: - limits: Optional[ResourceLimits] = None - reservations: Optional[ResourceLimits] = None + limits: ResourceLimits | None = None + reservations: ResourceLimits | None = None def __init__(self, obj=None): if obj is None: @@ -74,9 +73,9 @@ class Resources: class Spec: obj: typing.Any - file_path: Optional[Path] + file_path: Path | None - def __init__(self, file_path: Optional[Path] = None, obj=None) -> None: + def __init__(self, file_path: Path | None = None, obj=None) -> None: if obj is None: obj = {} self.file_path = file_path @@ -92,13 +91,13 @@ class Spec: return self.obj.get(item, default) def init_from_file(self, file_path: Path): - self.obj = get_yaml().load(open(file_path, "r")) + self.obj = get_yaml().load(open(file_path)) self.file_path = file_path def get_image_registry(self): return self.obj.get(constants.image_registry_key) - def get_image_registry_config(self) -> typing.Optional[typing.Dict]: + def get_image_registry_config(self) -> dict | None: """Returns registry auth config: {server, username, token-env}. Used for private container registries like GHCR. The token-env field @@ -107,7 +106,8 @@ class Spec: Note: Uses 'registry-credentials' key to avoid collision with 'image-registry' key which is for pushing images. """ - return self.obj.get("registry-credentials") + result: dict[str, str] | None = self.obj.get("registry-credentials") + return result def get_volumes(self): return self.obj.get(constants.volumes_key, {}) @@ -116,35 +116,25 @@ class Spec: return self.obj.get(constants.configmaps_key, {}) def get_container_resources(self): - return Resources( - self.obj.get(constants.resources_key, {}).get("containers", {}) - ) + return Resources(self.obj.get(constants.resources_key, {}).get("containers", {})) - def get_container_resources_for( - self, container_name: str - ) -> typing.Optional[Resources]: + def get_container_resources_for(self, container_name: str) -> Resources | None: """Look up per-container resource overrides from spec.yml. Checks resources.containers. in the spec. Returns None if no per-container override exists (caller falls back to other sources). """ - containers_block = self.obj.get(constants.resources_key, {}).get( - "containers", {} - ) + containers_block = self.obj.get(constants.resources_key, {}).get("containers", {}) if container_name in containers_block: entry = containers_block[container_name] # Only treat it as a per-container override if it's a dict with # reservations/limits nested inside (not a top-level global key) - if isinstance(entry, dict) and ( - "reservations" in entry or "limits" in entry - ): + if isinstance(entry, dict) and ("reservations" in entry or "limits" in entry): return Resources(entry) return None def get_volume_resources(self): - return Resources( - self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {}) - ) + return Resources(self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})) def get_http_proxy(self): return self.obj.get(constants.network_key, {}).get(constants.http_proxy_key, []) @@ -167,9 +157,7 @@ class Spec: def get_privileged(self): return ( "true" - == str( - self.obj.get(constants.security_key, {}).get("privileged", "false") - ).lower() + == str(self.obj.get(constants.security_key, {}).get("privileged", "false")).lower() ) def get_capabilities(self): @@ -196,9 +184,7 @@ class Spec: Runtime class name string, or None to use default runtime. """ # Explicit runtime class takes precedence - explicit = self.obj.get(constants.security_key, {}).get( - constants.runtime_class_key, None - ) + explicit = self.obj.get(constants.security_key, {}).get(constants.runtime_class_key, None) if explicit: return explicit diff --git a/stack_orchestrator/deploy/stack.py b/stack_orchestrator/deploy/stack.py index 75d40705..618419d3 100644 --- a/stack_orchestrator/deploy/stack.py +++ b/stack_orchestrator/deploy/stack.py @@ -13,8 +13,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from pathlib import Path import typing +from pathlib import Path + from stack_orchestrator.util import get_yaml @@ -26,4 +27,4 @@ class Stack: self.name = name def init_from_file(self, file_path: Path): - self.obj = get_yaml().load(open(file_path, "r")) + self.obj = get_yaml().load(open(file_path)) diff --git a/stack_orchestrator/deploy/webapp/deploy_webapp.py b/stack_orchestrator/deploy/webapp/deploy_webapp.py index 6170dbe3..17ab3ca5 100644 --- a/stack_orchestrator/deploy/webapp/deploy_webapp.py +++ b/stack_orchestrator/deploy/webapp/deploy_webapp.py @@ -13,23 +13,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import click import os from pathlib import Path -from urllib.parse import urlparse from tempfile import NamedTemporaryFile +from urllib.parse import urlparse + +import click -from stack_orchestrator.util import error_exit, global_options2 -from stack_orchestrator.deploy.deployment_create import init_operation, create_operation from stack_orchestrator.deploy.deploy import create_deploy_context from stack_orchestrator.deploy.deploy_types import DeployCommandContext +from stack_orchestrator.deploy.deployment_create import create_operation, init_operation +from stack_orchestrator.util import error_exit, global_options2 def _fixup_container_tag(deployment_dir: str, image: str): deployment_dir_path = Path(deployment_dir) - compose_file = deployment_dir_path.joinpath( - "compose", "docker-compose-webapp-template.yml" - ) + compose_file = deployment_dir_path.joinpath("compose", "docker-compose-webapp-template.yml") # replace "cerc/webapp-container:local" in the file with our image tag with open(compose_file) as rfile: contents = rfile.read() @@ -56,9 +55,7 @@ def _fixup_url_spec(spec_file_name: str, url: str): wfile.write(contents) -def create_deployment( - ctx, deployment_dir, image, url, kube_config, image_registry, env_file -): +def create_deployment(ctx, deployment_dir, image, url, kube_config, image_registry, env_file): # Do the equivalent of: # 1. laconic-so --stack webapp-template deploy --deploy-to k8s init \ # --output webapp-spec.yml @@ -117,9 +114,7 @@ def command(ctx): "--image-registry", help="Provide a container image registry url for this k8s cluster", ) -@click.option( - "--deployment-dir", help="Create deployment files in this directory", required=True -) +@click.option("--deployment-dir", help="Create deployment files in this directory", required=True) @click.option("--image", help="image to deploy", required=True) @click.option("--url", help="url to serve", required=True) @click.option("--env-file", help="environment file for webapp") @@ -127,6 +122,4 @@ def command(ctx): def create(ctx, deployment_dir, image, url, kube_config, image_registry, env_file): """create a deployment for the specified webapp container""" - return create_deployment( - ctx, deployment_dir, image, url, kube_config, image_registry, env_file - ) + return create_deployment(ctx, deployment_dir, image, url, kube_config, image_registry, env_file) diff --git a/stack_orchestrator/deploy/webapp/deploy_webapp_from_registry.py b/stack_orchestrator/deploy/webapp/deploy_webapp_from_registry.py index 92458c47..df5aa26a 100644 --- a/stack_orchestrator/deploy/webapp/deploy_webapp_from_registry.py +++ b/stack_orchestrator/deploy/webapp/deploy_webapp_from_registry.py @@ -21,10 +21,10 @@ import sys import tempfile import time import uuid -import yaml import click import gnupg +import yaml from stack_orchestrator.deploy.images import remote_image_exists from stack_orchestrator.deploy.webapp import deploy_webapp @@ -34,16 +34,16 @@ from stack_orchestrator.deploy.webapp.util import ( TimedLogger, build_container_image, confirm_auction, - push_container_image, - file_hash, - deploy_to_k8s, - publish_deployment, - hostname_for_deployment_request, - generate_hostname_for_app, - match_owner, - skip_by_tag, confirm_payment, + deploy_to_k8s, + file_hash, + generate_hostname_for_app, + hostname_for_deployment_request, load_known_requests, + match_owner, + publish_deployment, + push_container_image, + skip_by_tag, ) @@ -70,9 +70,7 @@ def process_app_deployment_request( logger.log("BEGIN - process_app_deployment_request") # 1. look up application - app = laconic.get_record( - app_deployment_request.attributes.application, require=True - ) + app = laconic.get_record(app_deployment_request.attributes.application, require=True) assert app is not None # require=True ensures this logger.log(f"Retrieved app record {app_deployment_request.attributes.application}") @@ -84,9 +82,7 @@ def process_app_deployment_request( if "allow" == fqdn_policy or "preexisting" == fqdn_policy: fqdn = requested_name else: - raise Exception( - f"{requested_name} is invalid: only unqualified hostnames are allowed." - ) + raise Exception(f"{requested_name} is invalid: only unqualified hostnames are allowed.") else: fqdn = f"{requested_name}.{default_dns_suffix}" @@ -108,8 +104,7 @@ def process_app_deployment_request( logger.log(f"Matched DnsRecord ownership: {matched_owner}") else: raise Exception( - "Unable to confirm ownership of DnsRecord %s for request %s" - % (dns_lrn, app_deployment_request.id) + f"Unable to confirm ownership of DnsRecord {dns_lrn} for request {app_deployment_request.id}" ) elif "preexisting" == fqdn_policy: raise Exception( @@ -144,7 +139,7 @@ def process_app_deployment_request( env_filename = tempfile.mktemp() with open(env_filename, "w") as file: for k, v in env.items(): - file.write("%s=%s\n" % (k, shlex.quote(str(v)))) + file.write(f"{k}={shlex.quote(str(v))}\n") # 5. determine new or existing deployment # a. check for deployment lrn @@ -153,8 +148,7 @@ def process_app_deployment_request( app_deployment_lrn = app_deployment_request.attributes.deployment if not app_deployment_lrn.startswith(deployment_record_namespace): raise Exception( - "Deployment LRN %s is not in a supported namespace" - % app_deployment_request.attributes.deployment + f"Deployment LRN {app_deployment_request.attributes.deployment} is not in a supported namespace" ) deployment_record = laconic.get_record(app_deployment_lrn) @@ -165,14 +159,14 @@ def process_app_deployment_request( # already-unique deployment id unique_deployment_id = hashlib.md5(fqdn.encode()).hexdigest()[:16] deployment_config_file = os.path.join(deployment_dir, "config.env") - deployment_container_tag = "laconic-webapp/%s:local" % unique_deployment_id + deployment_container_tag = f"laconic-webapp/{unique_deployment_id}:local" app_image_shared_tag = f"laconic-webapp/{app.id}:local" # b. check for deployment directory (create if necessary) if not os.path.exists(deployment_dir): if deployment_record: raise Exception( - "Deployment record %s exists, but not deployment dir %s. " - "Please remove name." % (app_deployment_lrn, deployment_dir) + f"Deployment record {app_deployment_lrn} exists, but not deployment dir {deployment_dir}. " + "Please remove name." ) logger.log( f"Creating webapp deployment in: {deployment_dir} " @@ -198,11 +192,7 @@ def process_app_deployment_request( ) # 6. build container (if needed) # TODO: add a comment that explains what this code is doing (not clear to me) - if ( - not deployment_record - or deployment_record.attributes.application != app.id - or force_rebuild - ): + if not deployment_record or deployment_record.attributes.application != app.id or force_rebuild: needs_k8s_deploy = True # check if the image already exists shared_tag_exists = remote_image_exists(image_registry, app_image_shared_tag) @@ -224,11 +214,9 @@ def process_app_deployment_request( # ) logger.log("Tag complete") else: - extra_build_args = [] # TODO: pull from request + extra_build_args: list[str] = [] # TODO: pull from request logger.log(f"Building container image: {deployment_container_tag}") - build_container_image( - app, deployment_container_tag, extra_build_args, logger - ) + build_container_image(app, deployment_container_tag, extra_build_args, logger) logger.log("Build complete") logger.log(f"Pushing container image: {deployment_container_tag}") push_container_image(deployment_dir, logger) @@ -287,9 +275,7 @@ def dump_known_requests(filename, requests, status="SEEN"): @click.command() @click.option("--kube-config", help="Provide a config file for a k8s deployment") -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option( "--image-registry", help="Provide a container image registry url for this k8s cluster", @@ -306,9 +292,7 @@ def dump_known_requests(filename, requests, status="SEEN"): is_flag=True, default=False, ) -@click.option( - "--state-file", help="File to store state about previously seen requests." -) +@click.option("--state-file", help="File to store state about previously seen requests.") @click.option( "--only-update-state", help="Only update the state file, don't process any requests anything.", @@ -331,9 +315,7 @@ def dump_known_requests(filename, requests, status="SEEN"): help="eg, lrn://laconic/deployments", required=True, ) -@click.option( - "--dry-run", help="Don't do anything, just report what would be done.", is_flag=True -) +@click.option("--dry-run", help="Don't do anything, just report what would be done.", is_flag=True) @click.option( "--include-tags", help="Only include requests with matching tags (comma-separated).", @@ -344,17 +326,13 @@ def dump_known_requests(filename, requests, status="SEEN"): help="Exclude requests with matching tags (comma-separated).", default="", ) -@click.option( - "--force-rebuild", help="Rebuild even if the image already exists.", is_flag=True -) +@click.option("--force-rebuild", help="Rebuild even if the image already exists.", is_flag=True) @click.option( "--recreate-on-deploy", help="Remove and recreate deployments instead of updating them.", is_flag=True, ) -@click.option( - "--log-dir", help="Output build/deployment logs to directory.", default=None -) +@click.option("--log-dir", help="Output build/deployment logs to directory.", default=None) @click.option( "--min-required-payment", help="Requests must have a minimum payment to be processed (in alnt)", @@ -378,9 +356,7 @@ def dump_known_requests(filename, requests, status="SEEN"): help="The directory containing uploaded config.", required=True, ) -@click.option( - "--private-key-file", help="The private key for decrypting config.", required=True -) +@click.option("--private-key-file", help="The private key for decrypting config.", required=True) @click.option( "--registry-lock-file", help="File path to use for registry mutex lock", @@ -435,11 +411,7 @@ def command( # noqa: C901 sys.exit(2) if not only_update_state: - if ( - not record_namespace_dns - or not record_namespace_deployments - or not dns_suffix - ): + if not record_namespace_dns or not record_namespace_deployments or not dns_suffix: print( "--dns-suffix, --record-namespace-dns, and " "--record-namespace-deployments are all required", @@ -491,8 +463,7 @@ def command( # noqa: C901 if min_required_payment and not payment_address: print( - f"Minimum payment required, but no payment address listed " - f"for deployer: {lrn}.", + f"Minimum payment required, but no payment address listed " f"for deployer: {lrn}.", file=sys.stderr, ) sys.exit(2) @@ -557,26 +528,18 @@ def command( # noqa: C901 requested_name = r.attributes.dns if not requested_name: requested_name = generate_hostname_for_app(app) - main_logger.log( - "Generating name %s for request %s." % (requested_name, r_id) - ) + main_logger.log(f"Generating name {requested_name} for request {r_id}.") - if ( - requested_name in skipped_by_name - or requested_name in requests_by_name - ): - main_logger.log( - "Ignoring request %s, it has been superseded." % r_id - ) + if requested_name in skipped_by_name or requested_name in requests_by_name: + main_logger.log(f"Ignoring request {r_id}, it has been superseded.") result = "SKIP" continue if skip_by_tag(r, include_tags, exclude_tags): r_tags = r.attributes.tags if r.attributes else None main_logger.log( - "Skipping request %s, filtered by tag " - "(include %s, exclude %s, present %s)" - % (r_id, include_tags, exclude_tags, r_tags) + f"Skipping request {r_id}, filtered by tag " + f"(include {include_tags}, exclude {exclude_tags}, present {r_tags})" ) skipped_by_name[requested_name] = r result = "SKIP" @@ -584,8 +547,7 @@ def command( # noqa: C901 r_app = r.attributes.application if r.attributes else "unknown" main_logger.log( - "Found pending request %s to run application %s on %s." - % (r_id, r_app, requested_name) + f"Found pending request {r_id} to run application {r_app} on {requested_name}." ) requests_by_name[requested_name] = r except Exception as e: @@ -617,17 +579,14 @@ def command( # noqa: C901 requests_to_check_for_payment = [] for r in requests_by_name.values(): - if r.id in cancellation_requests and match_owner( - cancellation_requests[r.id], r - ): + if r.id in cancellation_requests and match_owner(cancellation_requests[r.id], r): main_logger.log( f"Found deployment cancellation request for {r.id} " f"at {cancellation_requests[r.id].id}" ) elif r.id in deployments_by_request: main_logger.log( - f"Found satisfied request for {r.id} " - f"at {deployments_by_request[r.id].id}" + f"Found satisfied request for {r.id} " f"at {deployments_by_request[r.id].id}" ) else: if ( @@ -635,8 +594,7 @@ def command( # noqa: C901 and previous_requests[r.id].get("status", "") != "RETRY" ): main_logger.log( - f"Skipping unsatisfied request {r.id} " - "because we have seen it before." + f"Skipping unsatisfied request {r.id} " "because we have seen it before." ) else: main_logger.log(f"Request {r.id} needs to processed.") @@ -650,14 +608,10 @@ def command( # noqa: C901 main_logger.log(f"{r.id}: Auction confirmed.") requests_to_execute.append(r) else: - main_logger.log( - f"Skipping request {r.id}: unable to verify auction." - ) + main_logger.log(f"Skipping request {r.id}: unable to verify auction.") dump_known_requests(state_file, [r], status="SKIP") else: - main_logger.log( - f"Skipping request {r.id}: not handling requests with auction." - ) + main_logger.log(f"Skipping request {r.id}: not handling requests with auction.") dump_known_requests(state_file, [r], status="SKIP") elif min_required_payment: main_logger.log(f"{r.id}: Confirming payment...") @@ -671,16 +625,12 @@ def command( # noqa: C901 main_logger.log(f"{r.id}: Payment confirmed.") requests_to_execute.append(r) else: - main_logger.log( - f"Skipping request {r.id}: unable to verify payment." - ) + main_logger.log(f"Skipping request {r.id}: unable to verify payment.") dump_known_requests(state_file, [r], status="UNPAID") else: requests_to_execute.append(r) - main_logger.log( - "Found %d unsatisfied request(s) to process." % len(requests_to_execute) - ) + main_logger.log(f"Found {len(requests_to_execute)} unsatisfied request(s) to process.") if not dry_run: for r in requests_to_execute: @@ -700,10 +650,8 @@ def command( # noqa: C901 if not os.path.exists(run_log_dir): os.mkdir(run_log_dir) run_log_file_path = os.path.join(run_log_dir, f"{run_id}.log") - main_logger.log( - f"Directing deployment logs to: {run_log_file_path}" - ) - run_log_file = open(run_log_file_path, "wt") + main_logger.log(f"Directing deployment logs to: {run_log_file_path}") + run_log_file = open(run_log_file_path, "w") run_reg_client = LaconicRegistryClient( laconic_config, log_file=run_log_file, diff --git a/stack_orchestrator/deploy/webapp/handle_deployment_auction.py b/stack_orchestrator/deploy/webapp/handle_deployment_auction.py index 933de899..45c6ad4f 100644 --- a/stack_orchestrator/deploy/webapp/handle_deployment_auction.py +++ b/stack_orchestrator/deploy/webapp/handle_deployment_auction.py @@ -12,18 +12,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import sys import json +import sys import click from stack_orchestrator.deploy.webapp.util import ( + AUCTION_KIND_PROVIDER, AttrDict, + AuctionStatus, LaconicRegistryClient, TimedLogger, load_known_requests, - AUCTION_KIND_PROVIDER, - AuctionStatus, ) @@ -44,16 +44,13 @@ def process_app_deployment_auction( # Check auction kind if auction.kind != AUCTION_KIND_PROVIDER: - raise Exception( - f"Auction kind needs to be ${AUCTION_KIND_PROVIDER}, got {auction.kind}" - ) + raise Exception(f"Auction kind needs to be ${AUCTION_KIND_PROVIDER}, got {auction.kind}") if current_status == "PENDING": # Skip if pending auction not in commit state if auction.status != AuctionStatus.COMMIT: logger.log( - f"Skipping pending request, auction {auction_id} " - f"status: {auction.status}" + f"Skipping pending request, auction {auction_id} " f"status: {auction.status}" ) return "SKIP", "" @@ -115,9 +112,7 @@ def dump_known_auction_requests(filename, requests, status="SEEN"): @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option( "--state-file", help="File to store state about previously seen auction requests.", @@ -133,9 +128,7 @@ def dump_known_auction_requests(filename, requests, status="SEEN"): help="File path to use for registry mutex lock", default=None, ) -@click.option( - "--dry-run", help="Don't do anything, just report what would be done.", is_flag=True -) +@click.option("--dry-run", help="Don't do anything, just report what would be done.", is_flag=True) @click.pass_context def command( ctx, @@ -198,8 +191,7 @@ def command( continue logger.log( - f"Found pending auction request {r.id} for application " - f"{application}." + f"Found pending auction request {r.id} for application " f"{application}." ) # Add requests to be processed @@ -209,9 +201,7 @@ def command( result_status = "ERROR" logger.log(f"ERROR: examining request {r.id}: " + str(e)) finally: - logger.log( - f"DONE: Examining request {r.id} with result {result_status}." - ) + logger.log(f"DONE: Examining request {r.id} with result {result_status}.") if result_status in ["ERROR"]: dump_known_auction_requests( state_file, diff --git a/stack_orchestrator/deploy/webapp/publish_deployment_auction.py b/stack_orchestrator/deploy/webapp/publish_deployment_auction.py index bdc12eac..8de2ad10 100644 --- a/stack_orchestrator/deploy/webapp/publish_deployment_auction.py +++ b/stack_orchestrator/deploy/webapp/publish_deployment_auction.py @@ -30,9 +30,7 @@ def fatal(msg: str): @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option( "--app", help="The LRN of the application to deploy.", diff --git a/stack_orchestrator/deploy/webapp/publish_webapp_deployer.py b/stack_orchestrator/deploy/webapp/publish_webapp_deployer.py index f69a2031..ab661929 100644 --- a/stack_orchestrator/deploy/webapp/publish_webapp_deployer.py +++ b/stack_orchestrator/deploy/webapp/publish_webapp_deployer.py @@ -13,28 +13,24 @@ # along with this program. If not, see . import base64 -import click import sys -import yaml - from urllib.parse import urlparse +import click +import yaml + from stack_orchestrator.deploy.webapp.util import LaconicRegistryClient @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option("--api-url", help="The API URL of the deployer.", required=True) @click.option( "--public-key-file", help="The public key to use. This should be a binary file.", required=True, ) -@click.option( - "--lrn", help="eg, lrn://laconic/deployers/my.deployer.name", required=True -) +@click.option("--lrn", help="eg, lrn://laconic/deployers/my.deployer.name", required=True) @click.option( "--payment-address", help="The address to which payments should be made. " @@ -84,9 +80,7 @@ def command( # noqa: C901 } if min_required_payment: - webapp_deployer_record["record"][ - "minimumPayment" - ] = f"{min_required_payment}alnt" + webapp_deployer_record["record"]["minimumPayment"] = f"{min_required_payment}alnt" if dry_run: yaml.dump(webapp_deployer_record, sys.stdout) diff --git a/stack_orchestrator/deploy/webapp/registry_mutex.py b/stack_orchestrator/deploy/webapp/registry_mutex.py index 1d023230..5883417f 100644 --- a/stack_orchestrator/deploy/webapp/registry_mutex.py +++ b/stack_orchestrator/deploy/webapp/registry_mutex.py @@ -1,6 +1,6 @@ -from functools import wraps import os import time +from functools import wraps # Define default file path for the lock DEFAULT_LOCK_FILE_PATH = "/tmp/registry_mutex_lock_file" @@ -17,7 +17,7 @@ def acquire_lock(client, lock_file_path, timeout): try: # Check if lock file exists and is potentially stale if os.path.exists(lock_file_path): - with open(lock_file_path, "r") as lock_file: + with open(lock_file_path) as lock_file: timestamp = float(lock_file.read().strip()) # If lock is stale, remove the lock file @@ -25,9 +25,7 @@ def acquire_lock(client, lock_file_path, timeout): print(f"Stale lock detected, removing lock file {lock_file_path}") os.remove(lock_file_path) else: - print( - f"Lock file {lock_file_path} exists and is recent, waiting..." - ) + print(f"Lock file {lock_file_path} exists and is recent, waiting...") time.sleep(LOCK_RETRY_INTERVAL) continue diff --git a/stack_orchestrator/deploy/webapp/request_webapp_deployment.py b/stack_orchestrator/deploy/webapp/request_webapp_deployment.py index 8f266cb4..57ffafd2 100644 --- a/stack_orchestrator/deploy/webapp/request_webapp_deployment.py +++ b/stack_orchestrator/deploy/webapp/request_webapp_deployment.py @@ -12,24 +12,24 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import base64 import shutil import sys import tempfile from datetime import datetime from typing import NoReturn -import base64 -import gnupg import click +import gnupg import requests import yaml +from dotenv import dotenv_values from stack_orchestrator.deploy.webapp.util import ( AUCTION_KIND_PROVIDER, AuctionStatus, LaconicRegistryClient, ) -from dotenv import dotenv_values def fatal(msg: str) -> NoReturn: @@ -38,9 +38,7 @@ def fatal(msg: str) -> NoReturn: @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option( "--app", help="The LRN of the application to deploy.", @@ -63,9 +61,7 @@ def fatal(msg: str) -> NoReturn: "'auto' to use the deployer's minimum required payment." ), ) -@click.option( - "--use-payment", help="The TX id of an existing, unused payment", default=None -) +@click.option("--use-payment", help="The TX id of an existing, unused payment", default=None) @click.option("--dns", help="the DNS name to request (default is autogenerated)") @click.option( "--dry-run", @@ -144,9 +140,7 @@ def command( # noqa: C901 # Check auction kind auction_kind = auction.kind if auction else None if auction_kind != AUCTION_KIND_PROVIDER: - fatal( - f"Auction kind needs to be ${AUCTION_KIND_PROVIDER}, got {auction_kind}" - ) + fatal(f"Auction kind needs to be ${AUCTION_KIND_PROVIDER}, got {auction_kind}") # Check auction status auction_status = auction.status if auction else None @@ -163,14 +157,9 @@ def command( # noqa: C901 # Get deployer record for all the auction winners for auction_winner in auction_winners: # TODO: Match auction winner address with provider address? - deployer_records_by_owner = laconic.webapp_deployers( - {"paymentAddress": auction_winner} - ) + deployer_records_by_owner = laconic.webapp_deployers({"paymentAddress": auction_winner}) if len(deployer_records_by_owner) == 0: - print( - f"WARNING: Unable to locate deployer for auction winner " - f"{auction_winner}" - ) + print(f"WARNING: Unable to locate deployer for auction winner " f"{auction_winner}") # Take first record with name set target_deployer_record = deployer_records_by_owner[0] @@ -196,9 +185,7 @@ def command( # noqa: C901 gpg = gnupg.GPG(gnupghome=tempdir) # Import the deployer's public key - result = gpg.import_keys( - base64.b64decode(deployer_record.attributes.publicKey) - ) + result = gpg.import_keys(base64.b64decode(deployer_record.attributes.publicKey)) if 1 != result.imported: fatal("Failed to import deployer's public key.") @@ -237,15 +224,9 @@ def command( # noqa: C901 if (not deployer) and len(deployer_record.names): target_deployer = deployer_record.names[0] - app_name = ( - app_record.attributes.name - if app_record and app_record.attributes - else "unknown" - ) + app_name = app_record.attributes.name if app_record and app_record.attributes else "unknown" app_version = ( - app_record.attributes.version - if app_record and app_record.attributes - else "unknown" + app_record.attributes.version if app_record and app_record.attributes else "unknown" ) deployment_request = { "record": { @@ -273,15 +254,11 @@ def command( # noqa: C901 deployment_request["record"]["payment"] = "DRY_RUN" elif "auto" == make_payment: if "minimumPayment" in deployer_record.attributes: - amount = int( - deployer_record.attributes.minimumPayment.replace("alnt", "") - ) + amount = int(deployer_record.attributes.minimumPayment.replace("alnt", "")) else: amount = make_payment if amount: - receipt = laconic.send_tokens( - deployer_record.attributes.paymentAddress, amount - ) + receipt = laconic.send_tokens(deployer_record.attributes.paymentAddress, amount) deployment_request["record"]["payment"] = receipt.tx.hash print("Payment TX:", receipt.tx.hash) elif use_payment: diff --git a/stack_orchestrator/deploy/webapp/request_webapp_undeployment.py b/stack_orchestrator/deploy/webapp/request_webapp_undeployment.py index 54bf2393..00bbe98e 100644 --- a/stack_orchestrator/deploy/webapp/request_webapp_undeployment.py +++ b/stack_orchestrator/deploy/webapp/request_webapp_undeployment.py @@ -26,12 +26,8 @@ def fatal(msg: str) -> None: @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) -@click.option( - "--deployer", help="The LRN of the deployer to process this request.", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) +@click.option("--deployer", help="The LRN of the deployer to process this request.", required=True) @click.option( "--deployment", help="Deployment record (ApplicationDeploymentRecord) id of the deployment.", @@ -44,9 +40,7 @@ def fatal(msg: str) -> None: "'auto' to use the deployer's minimum required payment." ), ) -@click.option( - "--use-payment", help="The TX id of an existing, unused payment", default=None -) +@click.option("--use-payment", help="The TX id of an existing, unused payment", default=None) @click.option( "--dry-run", help="Don't publish anything, just report what would be done.", diff --git a/stack_orchestrator/deploy/webapp/run_webapp.py b/stack_orchestrator/deploy/webapp/run_webapp.py index fe11fc30..35fc78a1 100644 --- a/stack_orchestrator/deploy/webapp/run_webapp.py +++ b/stack_orchestrator/deploy/webapp/run_webapp.py @@ -22,6 +22,7 @@ # all or specific containers import hashlib + import click from dotenv import dotenv_values diff --git a/stack_orchestrator/deploy/webapp/undeploy_webapp_from_registry.py b/stack_orchestrator/deploy/webapp/undeploy_webapp_from_registry.py index 30b6eaac..86ace1ae 100644 --- a/stack_orchestrator/deploy/webapp/undeploy_webapp_from_registry.py +++ b/stack_orchestrator/deploy/webapp/undeploy_webapp_from_registry.py @@ -21,11 +21,11 @@ import sys import click from stack_orchestrator.deploy.webapp.util import ( - TimedLogger, LaconicRegistryClient, + TimedLogger, + confirm_payment, match_owner, skip_by_tag, - confirm_payment, ) main_logger = TimedLogger(file=sys.stderr) @@ -40,9 +40,7 @@ def process_app_removal_request( delete_names, webapp_deployer_record, ): - deployment_record = laconic.get_record( - app_removal_request.attributes.deployment, require=True - ) + deployment_record = laconic.get_record(app_removal_request.attributes.deployment, require=True) assert deployment_record is not None # require=True ensures this assert deployment_record.attributes is not None @@ -50,12 +48,10 @@ def process_app_removal_request( assert dns_record is not None # require=True ensures this assert dns_record.attributes is not None - deployment_dir = os.path.join( - deployment_parent_dir, dns_record.attributes.name.lower() - ) + deployment_dir = os.path.join(deployment_parent_dir, dns_record.attributes.name.lower()) if not os.path.exists(deployment_dir): - raise Exception("Deployment directory %s does not exist." % deployment_dir) + raise Exception(f"Deployment directory {deployment_dir} does not exist.") # Check if the removal request is from the owner of the DnsRecord or # deployment record. @@ -63,9 +59,7 @@ def process_app_removal_request( # Or of the original deployment request. if not matched_owner and deployment_record.attributes.request: - original_request = laconic.get_record( - deployment_record.attributes.request, require=True - ) + original_request = laconic.get_record(deployment_record.attributes.request, require=True) assert original_request is not None # require=True ensures this matched_owner = match_owner(app_removal_request, original_request) @@ -75,8 +69,7 @@ def process_app_removal_request( deployment_id = deployment_record.id if deployment_record else "unknown" request_id = app_removal_request.id if app_removal_request else "unknown" raise Exception( - "Unable to confirm ownership of deployment %s for removal request %s" - % (deployment_id, request_id) + f"Unable to confirm ownership of deployment {deployment_id} for removal request {request_id}" ) # TODO(telackey): Call the function directly. The easiest way to build @@ -124,7 +117,7 @@ def process_app_removal_request( def load_known_requests(filename): if filename and os.path.exists(filename): - return json.load(open(filename, "r")) + return json.load(open(filename)) return {} @@ -138,9 +131,7 @@ def dump_known_requests(filename, requests): @click.command() -@click.option( - "--laconic-config", help="Provide a config file for laconicd", required=True -) +@click.option("--laconic-config", help="Provide a config file for laconicd", required=True) @click.option( "--deployment-parent-dir", help="Create deployment directories beneath this directory", @@ -153,9 +144,7 @@ def dump_known_requests(filename, requests): is_flag=True, default=False, ) -@click.option( - "--state-file", help="File to store state about previously seen requests." -) +@click.option("--state-file", help="File to store state about previously seen requests.") @click.option( "--only-update-state", help="Only update the state file, don't process any requests anything.", @@ -166,12 +155,8 @@ def dump_known_requests(filename, requests): help="Delete all names associated with removed deployments.", default=True, ) -@click.option( - "--delete-volumes/--preserve-volumes", default=True, help="delete data volumes" -) -@click.option( - "--dry-run", help="Don't do anything, just report what would be done.", is_flag=True -) +@click.option("--delete-volumes/--preserve-volumes", default=True, help="delete data volumes") +@click.option("--dry-run", help="Don't do anything, just report what would be done.", is_flag=True) @click.option( "--include-tags", help="Only include requests with matching tags (comma-separated).", @@ -245,8 +230,7 @@ def command( # noqa: C901 if min_required_payment and not payment_address: print( - f"Minimum payment required, but no payment address listed " - f"for deployer: {lrn}.", + f"Minimum payment required, but no payment address listed " f"for deployer: {lrn}.", file=sys.stderr, ) sys.exit(2) @@ -303,9 +287,7 @@ def command( # noqa: C901 continue if not r.attributes.deployment: r_id = r.id if r else "unknown" - main_logger.log( - f"Skipping removal request {r_id} since it was a cancellation." - ) + main_logger.log(f"Skipping removal request {r_id} since it was a cancellation.") elif r.attributes.deployment in one_per_deployment: r_id = r.id if r else "unknown" main_logger.log(f"Skipping removal request {r_id} since it was superseded.") @@ -323,14 +305,12 @@ def command( # noqa: C901 ) elif skip_by_tag(r, include_tags, exclude_tags): main_logger.log( - "Skipping removal request %s, filtered by tag " - "(include %s, exclude %s, present %s)" - % (r.id, include_tags, exclude_tags, r.attributes.tags) + f"Skipping removal request {r.id}, filtered by tag " + f"(include {include_tags}, exclude {exclude_tags}, present {r.attributes.tags})" ) elif r.id in removals_by_request: main_logger.log( - f"Found satisfied request for {r.id} " - f"at {removals_by_request[r.id].id}" + f"Found satisfied request for {r.id} " f"at {removals_by_request[r.id].id}" ) elif r.attributes.deployment in removals_by_deployment: main_logger.log( @@ -344,8 +324,7 @@ def command( # noqa: C901 requests_to_check_for_payment.append(r) else: main_logger.log( - f"Skipping unsatisfied request {r.id} " - "because we have seen it before." + f"Skipping unsatisfied request {r.id} " "because we have seen it before." ) except Exception as e: main_logger.log(f"ERROR examining {r.id}: {e}") @@ -370,9 +349,7 @@ def command( # noqa: C901 else: requests_to_execute = requests_to_check_for_payment - main_logger.log( - "Found %d unsatisfied request(s) to process." % len(requests_to_execute) - ) + main_logger.log(f"Found {len(requests_to_execute)} unsatisfied request(s) to process.") if not dry_run: for r in requests_to_execute: diff --git a/stack_orchestrator/deploy/webapp/util.py b/stack_orchestrator/deploy/webapp/util.py index 84accbcd..6969bb09 100644 --- a/stack_orchestrator/deploy/webapp/util.py +++ b/stack_orchestrator/deploy/webapp/util.py @@ -22,10 +22,10 @@ import subprocess import sys import tempfile import uuid -import yaml - from enum import Enum -from typing import Any, List, Optional, TextIO +from typing import Any, TextIO + +import yaml from stack_orchestrator.deploy.webapp.registry_mutex import registry_mutex @@ -43,17 +43,17 @@ AUCTION_KIND_PROVIDER = "provider" class AttrDict(dict): def __init__(self, *args: Any, **kwargs: Any) -> None: - super(AttrDict, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.__dict__ = self def __getattribute__(self, attr: str) -> Any: - __dict__ = super(AttrDict, self).__getattribute__("__dict__") + __dict__ = super().__getattribute__("__dict__") if attr in __dict__: - v = super(AttrDict, self).__getattribute__(attr) + v = super().__getattribute__(attr) if isinstance(v, dict): return AttrDict(v) return v - return super(AttrDict, self).__getattribute__(attr) + return super().__getattribute__(attr) def __getattr__(self, attr: str) -> Any: # This method is called when attribute is not found @@ -62,15 +62,13 @@ class AttrDict(dict): class TimedLogger: - def __init__(self, id: str = "", file: Optional[TextIO] = None) -> None: + def __init__(self, id: str = "", file: TextIO | None = None) -> None: self.start = datetime.datetime.now() self.last = self.start self.id = id self.file = file - def log( - self, msg: str, show_step_time: bool = True, show_total_time: bool = False - ) -> None: + def log(self, msg: str, show_step_time: bool = True, show_total_time: bool = False) -> None: prefix = f"{datetime.datetime.utcnow()} - {self.id}" if show_step_time: prefix += f" - {datetime.datetime.now() - self.last} (step)" @@ -84,11 +82,11 @@ class TimedLogger: def load_known_requests(filename): if filename and os.path.exists(filename): - return json.load(open(filename, "r")) + return json.load(open(filename)) return {} -def logged_cmd(log_file: Optional[TextIO], *vargs: str) -> str: +def logged_cmd(log_file: TextIO | None, *vargs: str) -> str: result = None try: if log_file: @@ -105,15 +103,14 @@ def logged_cmd(log_file: Optional[TextIO], *vargs: str) -> str: raise err -def match_owner( - recordA: Optional[AttrDict], *records: Optional[AttrDict] -) -> Optional[str]: +def match_owner(recordA: AttrDict | None, *records: AttrDict | None) -> str | None: if not recordA or not recordA.owners: return None for owner in recordA.owners: for otherRecord in records: if otherRecord and otherRecord.owners and owner in otherRecord.owners: - return owner + result: str | None = owner + return result return None @@ -147,9 +144,7 @@ class LaconicRegistryClient: return self.cache["whoami"] args = ["laconic", "-c", self.config_file, "registry", "account", "get"] - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] if len(results): self.cache["whoami"] = results[0] @@ -178,9 +173,7 @@ class LaconicRegistryClient: "--address", address, ] - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] if len(results): self.cache["accounts"][address] = results[0] return results[0] @@ -203,9 +196,7 @@ class LaconicRegistryClient: "--id", id, ] - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] self._add_to_cache(results) if len(results): return results[0] @@ -216,9 +207,7 @@ class LaconicRegistryClient: def list_bonds(self): args = ["laconic", "-c", self.config_file, "registry", "bond", "list"] - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] self._add_to_cache(results) return results @@ -232,12 +221,10 @@ class LaconicRegistryClient: if criteria: for k, v in criteria.items(): - args.append("--%s" % k) + args.append(f"--{k}") args.append(str(v)) - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] # Most recent records first results.sort(key=lambda r: r.createTime or "") @@ -246,7 +233,7 @@ class LaconicRegistryClient: return results - def _add_to_cache(self, records: List[AttrDict]) -> None: + def _add_to_cache(self, records: list[AttrDict]) -> None: if not records: return @@ -271,9 +258,7 @@ class LaconicRegistryClient: args = ["laconic", "-c", self.config_file, "registry", "name", "resolve", name] - parsed = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + parsed = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] if parsed: self._add_to_cache(parsed) return parsed[0] @@ -303,9 +288,7 @@ class LaconicRegistryClient: name_or_id, ] - parsed = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + parsed = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] if len(parsed): self._add_to_cache(parsed) return parsed[0] @@ -356,9 +339,7 @@ class LaconicRegistryClient: results = None try: - results = [ - AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r - ] + results = [AttrDict(r) for r in json.loads(logged_cmd(self.log_file, *args)) if r] except: # noqa: E722 pass @@ -422,7 +403,7 @@ class LaconicRegistryClient: record_file = open(record_fname, "w") yaml.dump(record, record_file) record_file.close() - print(open(record_fname, "r").read(), file=self.log_file) + print(open(record_fname).read(), file=self.log_file) new_record_id = json.loads( logged_cmd( @@ -573,10 +554,10 @@ def determine_base_container(clone_dir, app_type="webapp"): def build_container_image( - app_record: Optional[AttrDict], + app_record: AttrDict | None, tag: str, - extra_build_args: Optional[List[str]] = None, - logger: Optional[TimedLogger] = None, + extra_build_args: list[str] | None = None, + logger: TimedLogger | None = None, ) -> None: if app_record is None: raise ValueError("app_record cannot be None") @@ -649,9 +630,7 @@ def build_container_image( ) result.check_returncode() - base_container = determine_base_container( - clone_dir, app_record.attributes.app_type - ) + base_container = determine_base_container(clone_dir, app_record.attributes.app_type) if logger: logger.log("Building webapp ...") @@ -727,14 +706,12 @@ def publish_deployment( if not deploy_record: deploy_ver = "0.0.1" else: - deploy_ver = "0.0.%d" % ( - int(deploy_record.attributes.version.split(".")[-1]) + 1 - ) + deploy_ver = f"0.0.{int(deploy_record.attributes.version.split('.')[-1]) + 1}" if not dns_record: dns_ver = "0.0.1" else: - dns_ver = "0.0.%d" % (int(dns_record.attributes.version.split(".")[-1]) + 1) + dns_ver = f"0.0.{int(dns_record.attributes.version.split('.')[-1]) + 1}" spec = yaml.full_load(open(os.path.join(deployment_dir, "spec.yml"))) fqdn = spec["network"]["http-proxy"][0]["host-name"] @@ -779,13 +756,9 @@ def publish_deployment( # Set auction or payment id from request if app_deployment_request.attributes.auction: - new_deployment_record["record"][ - "auction" - ] = app_deployment_request.attributes.auction + new_deployment_record["record"]["auction"] = app_deployment_request.attributes.auction elif app_deployment_request.attributes.payment: - new_deployment_record["record"][ - "payment" - ] = app_deployment_request.attributes.payment + new_deployment_record["record"]["payment"] = app_deployment_request.attributes.payment if webapp_deployer_record: new_deployment_record["record"]["deployer"] = webapp_deployer_record.names[0] @@ -799,9 +772,7 @@ def publish_deployment( def hostname_for_deployment_request(app_deployment_request, laconic): dns_name = app_deployment_request.attributes.dns if not dns_name: - app = laconic.get_record( - app_deployment_request.attributes.application, require=True - ) + app = laconic.get_record(app_deployment_request.attributes.application, require=True) dns_name = generate_hostname_for_app(app) elif dns_name.startswith("lrn://"): record = laconic.get_record(dns_name, require=True) @@ -818,7 +789,7 @@ def generate_hostname_for_app(app): m.update(app.attributes.repository[0].encode()) else: m.update(app.attributes.repository.encode()) - return "%s-%s" % (last_part, m.hexdigest()[0:10]) + return f"{last_part}-{m.hexdigest()[0:10]}" def skip_by_tag(r, include_tags, exclude_tags): @@ -881,16 +852,13 @@ def confirm_payment( pay_denom = "".join([i for i in tx_amount if not i.isdigit()]) if pay_denom != "alnt": logger.log( - f"{record.id}: {pay_denom} in tx {tx.hash} is not an expected " - "payment denomination" + f"{record.id}: {pay_denom} in tx {tx.hash} is not an expected " "payment denomination" ) return False pay_amount = int("".join([i for i in tx_amount if i.isdigit()]) or "0") if pay_amount < min_amount: - logger.log( - f"{record.id}: payment amount {tx.amount} is less than minimum {min_amount}" - ) + logger.log(f"{record.id}: payment amount {tx.amount} is less than minimum {min_amount}") return False # Check if the payment was already used on a deployment @@ -914,9 +882,7 @@ def confirm_payment( {"deployer": record.attributes.deployer, "payment": tx.hash}, all=True ) if len(used): - logger.log( - f"{record.id}: payment {tx.hash} already used on deployment removal {used}" - ) + logger.log(f"{record.id}: payment {tx.hash} already used on deployment removal {used}") return False return True @@ -940,9 +906,7 @@ def confirm_auction( # Cross check app against application in the auction record requested_app = laconic.get_record(record.attributes.application, require=True) - auction_app = laconic.get_record( - auction_records_by_id[0].attributes.application, require=True - ) + auction_app = laconic.get_record(auction_records_by_id[0].attributes.application, require=True) requested_app_id = requested_app.id if requested_app else None auction_app_id = auction_app.id if auction_app else None if requested_app_id != auction_app_id: diff --git a/stack_orchestrator/main.py b/stack_orchestrator/main.py index 826ef4ff..b0fc0b95 100644 --- a/stack_orchestrator/main.py +++ b/stack_orchestrator/main.py @@ -15,30 +15,24 @@ import click +from stack_orchestrator import opts, update, version +from stack_orchestrator.build import build_containers, build_npms, build_webapp, fetch_containers from stack_orchestrator.command_types import CommandOptions -from stack_orchestrator.repos import setup_repositories -from stack_orchestrator.repos import fetch_stack -from stack_orchestrator.build import build_containers, fetch_containers -from stack_orchestrator.build import build_npms -from stack_orchestrator.build import build_webapp +from stack_orchestrator.deploy import deploy, deployment from stack_orchestrator.deploy.webapp import ( - run_webapp, deploy_webapp, deploy_webapp_from_registry, - undeploy_webapp_from_registry, - publish_webapp_deployer, - publish_deployment_auction, handle_deployment_auction, + publish_deployment_auction, + publish_webapp_deployer, request_webapp_deployment, request_webapp_undeployment, + run_webapp, + undeploy_webapp_from_registry, ) -from stack_orchestrator.deploy import deploy -from stack_orchestrator import version -from stack_orchestrator.deploy import deployment -from stack_orchestrator import opts -from stack_orchestrator import update +from stack_orchestrator.repos import fetch_stack, setup_repositories -CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) +CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} @click.group(context_settings=CONTEXT_SETTINGS) diff --git a/stack_orchestrator/repos/fetch_stack.py b/stack_orchestrator/repos/fetch_stack.py index cee97d0c..5f66355f 100644 --- a/stack_orchestrator/repos/fetch_stack.py +++ b/stack_orchestrator/repos/fetch_stack.py @@ -17,9 +17,9 @@ # CERC_REPO_BASE_DIR defaults to ~/cerc -import click import os +import click from decouple import config from git import exc @@ -36,9 +36,7 @@ from stack_orchestrator.util import error_exit @click.pass_context def command(ctx, stack_locator, git_ssh, check_only, pull): """Optionally resolve then git clone a repository with stack definitions.""" - dev_root_path = os.path.expanduser( - str(config("CERC_REPO_BASE_DIR", default="~/cerc")) - ) + dev_root_path = os.path.expanduser(str(config("CERC_REPO_BASE_DIR", default="~/cerc"))) if not opts.o.quiet: print(f"Dev Root is: {dev_root_path}") try: diff --git a/stack_orchestrator/repos/setup_repositories.py b/stack_orchestrator/repos/setup_repositories.py index 6edd8085..ea248c11 100644 --- a/stack_orchestrator/repos/setup_repositories.py +++ b/stack_orchestrator/repos/setup_repositories.py @@ -16,20 +16,22 @@ # env vars: # CERC_REPO_BASE_DIR defaults to ~/cerc +import importlib.resources import os import sys -from decouple import config -import git -from git.exc import GitCommandError, InvalidGitRepositoryError from typing import Any -from tqdm import tqdm + import click -import importlib.resources +import git +from decouple import config +from git.exc import GitCommandError, InvalidGitRepositoryError +from tqdm import tqdm + from stack_orchestrator.opts import opts from stack_orchestrator.util import ( + error_exit, get_parsed_stack_config, include_exclude_check, - error_exit, warn_exit, ) @@ -86,48 +88,38 @@ def _get_repo_current_branch_or_tag(full_filesystem_repo_path): current_repo_branch_or_tag = "***UNDETERMINED***" is_branch = False try: - current_repo_branch_or_tag = git.Repo( - full_filesystem_repo_path - ).active_branch.name + current_repo_branch_or_tag = git.Repo(full_filesystem_repo_path).active_branch.name is_branch = True except TypeError: # This means that the current ref is not a branch, so possibly a tag # Let's try to get the tag try: - current_repo_branch_or_tag = git.Repo( - full_filesystem_repo_path - ).git.describe("--tags", "--exact-match") + current_repo_branch_or_tag = git.Repo(full_filesystem_repo_path).git.describe( + "--tags", "--exact-match" + ) # Note that git is asymmetric -- the tag you told it to check out # may not be the one you get back here (if there are multiple tags # associated with the same commit) except GitCommandError: # If there is no matching branch or tag checked out, just use the current # SHA - current_repo_branch_or_tag = ( - git.Repo(full_filesystem_repo_path).commit("HEAD").hexsha - ) + current_repo_branch_or_tag = git.Repo(full_filesystem_repo_path).commit("HEAD").hexsha return current_repo_branch_or_tag, is_branch # TODO: fix the messy arg list here -def process_repo( - pull, check_only, git_ssh, dev_root_path, branches_array, fully_qualified_repo -): +def process_repo(pull, check_only, git_ssh, dev_root_path, branches_array, fully_qualified_repo): if opts.o.verbose: print(f"Processing repo: {fully_qualified_repo}") repo_host, repo_path, repo_branch = host_and_path_for_repo(fully_qualified_repo) git_ssh_prefix = f"git@{repo_host}:" git_http_prefix = f"https://{repo_host}/" - full_github_repo_path = ( - f"{git_ssh_prefix if git_ssh else git_http_prefix}{repo_path}" - ) + full_github_repo_path = f"{git_ssh_prefix if git_ssh else git_http_prefix}{repo_path}" repoName = repo_path.split("/")[-1] full_filesystem_repo_path = os.path.join(dev_root_path, repoName) is_present = os.path.isdir(full_filesystem_repo_path) (current_repo_branch_or_tag, is_branch) = ( - _get_repo_current_branch_or_tag(full_filesystem_repo_path) - if is_present - else (None, None) + _get_repo_current_branch_or_tag(full_filesystem_repo_path) if is_present else (None, None) ) if not opts.o.quiet: present_text = ( @@ -140,10 +132,7 @@ def process_repo( # Quick check that it's actually a repo if is_present: if not is_git_repo(full_filesystem_repo_path): - print( - f"Error: {full_filesystem_repo_path} does not contain " - "a valid git repository" - ) + print(f"Error: {full_filesystem_repo_path} does not contain " "a valid git repository") sys.exit(1) else: if pull: @@ -190,8 +179,7 @@ def process_repo( if branch_to_checkout: if current_repo_branch_or_tag is None or ( - current_repo_branch_or_tag - and (current_repo_branch_or_tag != branch_to_checkout) + current_repo_branch_or_tag and (current_repo_branch_or_tag != branch_to_checkout) ): if not opts.o.quiet: print(f"switching to branch {branch_to_checkout} in repo {repo_path}") @@ -245,14 +233,9 @@ def command(ctx, include, exclude, git_ssh, check_only, pull, branches): if local_stack: dev_root_path = os.getcwd()[0 : os.getcwd().rindex("stack-orchestrator")] - print( - f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " - f"{dev_root_path}" - ) + print(f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " f"{dev_root_path}") else: - dev_root_path = os.path.expanduser( - str(config("CERC_REPO_BASE_DIR", default="~/cerc")) - ) + dev_root_path = os.path.expanduser(str(config("CERC_REPO_BASE_DIR", default="~/cerc"))) if not quiet: print(f"Dev Root is: {dev_root_path}") @@ -265,9 +248,7 @@ def command(ctx, include, exclude, git_ssh, check_only, pull, branches): # See: https://stackoverflow.com/a/20885799/1701505 from stack_orchestrator import data - with importlib.resources.open_text( - data, "repository-list.txt" - ) as repository_list_file: + with importlib.resources.open_text(data, "repository-list.txt") as repository_list_file: all_repos = repository_list_file.read().splitlines() repos_in_scope = [] diff --git a/stack_orchestrator/update.py b/stack_orchestrator/update.py index 85fb8b41..3c495fc3 100644 --- a/stack_orchestrator/update.py +++ b/stack_orchestrator/update.py @@ -13,16 +13,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import click import datetime import filecmp import os -from pathlib import Path -import requests -import sys -import stat import shutil +import stat +import sys +from pathlib import Path + +import click +import requests import validators + from stack_orchestrator.util import get_yaml @@ -40,9 +42,7 @@ def _error_exit(s: str): # Note at present this probably won't work on non-Unix based OSes like Windows @click.command() -@click.option( - "--check-only", is_flag=True, default=False, help="only check, don't update" -) +@click.option("--check-only", is_flag=True, default=False, help="only check, don't update") @click.pass_context def command(ctx, check_only): """update shiv binary from a distribution url""" @@ -52,7 +52,7 @@ def command(ctx, check_only): if not config_file_path.exists(): _error_exit(f"Error: Config file: {config_file_path} not found") yaml = get_yaml() - config = yaml.load(open(config_file_path, "r")) + config = yaml.load(open(config_file_path)) if "distribution-url" not in config: _error_exit(f"Error: {config_key} not defined in {config_file_path}") distribution_url = config[config_key] @@ -61,9 +61,7 @@ def command(ctx, check_only): _error_exit(f"ERROR: distribution url: {distribution_url} is not valid") # Figure out the filename for ourselves shiv_binary_path = Path(sys.argv[0]) - timestamp_filename = ( - f"laconic-so-download-{datetime.datetime.now().strftime('%y%m%d-%H%M%S')}" - ) + timestamp_filename = f"laconic-so-download-{datetime.datetime.now().strftime('%y%m%d-%H%M%S')}" temp_download_path = shiv_binary_path.parent.joinpath(timestamp_filename) # Download the file to a temp filename if ctx.obj.verbose: diff --git a/stack_orchestrator/util.py b/stack_orchestrator/util.py index fc8437ca..766e948f 100644 --- a/stack_orchestrator/util.py +++ b/stack_orchestrator/util.py @@ -13,14 +13,17 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from decouple import config import os.path import sys -import ruamel.yaml +from collections.abc import Mapping from pathlib import Path +from typing import NoReturn + +import ruamel.yaml +from decouple import config from dotenv import dotenv_values -from typing import Mapping, NoReturn, Optional, Set, List -from stack_orchestrator.constants import stack_file_name, deployment_file_name + +from stack_orchestrator.constants import deployment_file_name, stack_file_name def include_exclude_check(s, include, exclude): @@ -50,14 +53,9 @@ def get_dev_root_path(ctx): if ctx and ctx.local_stack: # TODO: This code probably doesn't work dev_root_path = os.getcwd()[0 : os.getcwd().rindex("stack-orchestrator")] - print( - f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " - f"{dev_root_path}" - ) + print(f"Local stack dev_root_path (CERC_REPO_BASE_DIR) overridden to: " f"{dev_root_path}") else: - dev_root_path = os.path.expanduser( - str(config("CERC_REPO_BASE_DIR", default="~/cerc")) - ) + dev_root_path = os.path.expanduser(str(config("CERC_REPO_BASE_DIR", default="~/cerc"))) return dev_root_path @@ -65,7 +63,7 @@ def get_dev_root_path(ctx): def get_parsed_stack_config(stack): stack_file_path = get_stack_path(stack).joinpath(stack_file_name) if stack_file_path.exists(): - return get_yaml().load(open(stack_file_path, "r")) + return get_yaml().load(open(stack_file_path)) # We try here to generate a useful diagnostic error # First check if the stack directory is present if stack_file_path.parent.exists(): @@ -101,10 +99,10 @@ def get_job_list(parsed_stack): return result -def get_plugin_code_paths(stack) -> List[Path]: +def get_plugin_code_paths(stack) -> list[Path]: parsed_stack = get_parsed_stack_config(stack) pods = parsed_stack["pods"] - result: Set[Path] = set() + result: set[Path] = set() for pod in pods: if type(pod) is str: result.add(get_stack_path(stack)) @@ -191,7 +189,7 @@ def get_job_file_path(stack, parsed_stack, job_name: str): def get_pod_script_paths(parsed_stack, pod_name: str): pods = parsed_stack["pods"] result = [] - if not type(pods[0]) is str: + if type(pods[0]) is not str: for pod in pods: if pod["name"] == pod_name: pod_root_dir = os.path.join( @@ -243,7 +241,7 @@ def get_k8s_dir(): def get_parsed_deployment_spec(spec_file): spec_file_path = Path(spec_file) try: - return get_yaml().load(open(spec_file_path, "r")) + return get_yaml().load(open(spec_file_path)) except FileNotFoundError as error: # We try here to generate a useful diagnostic error print(f"Error: spec file: {spec_file_path} does not exist") @@ -293,5 +291,6 @@ def warn_exit(s) -> NoReturn: sys.exit(0) -def env_var_map_from_file(file: Path) -> Mapping[str, Optional[str]]: - return dotenv_values(file) +def env_var_map_from_file(file: Path) -> Mapping[str, str | None]: + result: Mapping[str, str | None] = dotenv_values(file) + return result diff --git a/stack_orchestrator/version.py b/stack_orchestrator/version.py index 67bb6b13..1862f041 100644 --- a/stack_orchestrator/version.py +++ b/stack_orchestrator/version.py @@ -13,8 +13,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from importlib import metadata, resources + import click -from importlib import resources, metadata @click.command()