From 591d158e1f1e7f800cdda1526f45f4d5ea844456 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 06:59:07 +0000 Subject: [PATCH] chore: populate pebbles with known bugs and feature requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issues: - bar-a3b [P0] agave-validator crash after ~57 seconds - bar-41a [P1] telegraf volume mounts missing from pod spec - bar-02e [P1] zvol mount bug (closed — fixed 2026-03-08) - bar-b04 [P2] update redeploy to use deployment prepare - bar-b41 [P2] snapshot leapfrog recovery playbook - bar-0b4 [P3] prepare-agave unconditionally imports relay playbook Co-Authored-By: Claude Opus 4.6 --- .pebbles/.gitignore | 1 + .pebbles/config.json | 3 +++ .pebbles/events.jsonl | 45 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 .pebbles/.gitignore create mode 100644 .pebbles/config.json create mode 100644 .pebbles/events.jsonl diff --git a/.pebbles/.gitignore b/.pebbles/.gitignore new file mode 100644 index 00000000..0a168c65 --- /dev/null +++ b/.pebbles/.gitignore @@ -0,0 +1 @@ +pebbles.db diff --git a/.pebbles/config.json b/.pebbles/config.json new file mode 100644 index 00000000..88b6e374 --- /dev/null +++ b/.pebbles/config.json @@ -0,0 +1,3 @@ +{ + "prefix": "bar" +} \ No newline at end of file diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl new file mode 100644 index 00000000..7d1397a3 --- /dev/null +++ b/.pebbles/events.jsonl @@ -0,0 +1,45 @@ +{"type":"create","timestamp":"2026-03-06T07:57:55.427398426Z","issue_id":"bar-48f","payload":{"description":"Route all validator traffic (gossip, repair, TVU, TPU) through 137.239.194.65 on laconic-was-sw01 in Ashburn. Supersedes old TVU-only shred relay. See docs/ashburn-validator-relay.md for full design.","priority":"1","title":"Ashburn Full Validator Traffic Relay","type":"epic"}} +{"type":"create","timestamp":"2026-03-06T07:58:01.589463071Z","issue_id":"bar-a47","payload":{"description":"Create Loopback101 (137.239.194.65/32), VALIDATOR-RELAY ACL + traffic-policy on Et1/1, replacing old SHRED-RELAY. Uses 5-min auto-revert config session. Playbook: playbooks/ashburn-relay-was-sw01.yml","priority":"1","title":"was-sw01: Inbound validator relay config","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:07.292140983Z","issue_id":"bar-0e5","payload":{"description":"Add 137.239.194.65/32 to lo, DNAT rules for ports 8001,9000-9025 to kind node 172.20.0.2. Playbook: playbooks/ashburn-relay-biscayne.yml -t inbound","priority":"1","title":"biscayne: Inbound DNAT rules","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:10.838534858Z","issue_id":"bar-f9b","payload":{"description":"Ping 137.239.194.65 from external host, check DNAT counters on biscayne, verify traffic-policy counters on was-sw01.","priority":"1","title":"Verify inbound relay","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:15.228970622Z","issue_id":"bar-bf4","payload":{"description":"Pre-flight to discover GRE tunnel interface, then apply VALIDATOR-OUTBOUND traffic-policy redirecting src 137.239.194.65 to was-sw01 via backbone. Playbook: playbooks/ashburn-relay-mia-sw01.yml","priority":"1","title":"mia-sw01: Outbound validator redirect","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:19.571640837Z","issue_id":"bar-78d","payload":{"description":"fwmark 100 on validator source ports, SNAT to 137.239.194.65, policy route via doublezero0 table ashburn. Playbook: playbooks/ashburn-relay-biscayne.yml -t outbound","priority":"1","title":"biscayne: Outbound SNAT + policy routing","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:23.377441628Z","issue_id":"bar-f3b","payload":{"description":"Verify traffic-policy counters on both switches, iptables counters on biscayne, validator gossip ContactInfo shows 137.239.194.65, repair peer count increases, slot catchup rate improves. Write memory on both switches.","priority":"1","title":"End-to-end verification","type":"task"}} +{"type":"create","timestamp":"2026-03-06T07:58:27.341320984Z","issue_id":"bar-8a9","payload":{"description":"After stable: remove old SHRED-RELAY policy and ACL from was-sw01, remove old 64.92.84.81:20000 DNAT from biscayne.","priority":"2","title":"Cleanup old SHRED-RELAY","type":"task"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.091645662Z","issue_id":"bar-a47","payload":{"new_id":"bar-48f.1"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.091647902Z","issue_id":"bar-48f.1","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.274391159Z","issue_id":"bar-0e5","payload":{"new_id":"bar-48f.2"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.274392749Z","issue_id":"bar-48f.2","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.468426932Z","issue_id":"bar-f9b","payload":{"new_id":"bar-48f.3"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.468428522Z","issue_id":"bar-48f.3","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.657295386Z","issue_id":"bar-bf4","payload":{"new_id":"bar-48f.4"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.657297846Z","issue_id":"bar-48f.4","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:32.864939519Z","issue_id":"bar-78d","payload":{"new_id":"bar-48f.5"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:32.864941739Z","issue_id":"bar-48f.5","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:33.364299485Z","issue_id":"bar-f3b","payload":{"new_id":"bar-48f.6"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:33.364301305Z","issue_id":"bar-48f.6","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"rename","timestamp":"2026-03-06T07:58:33.639638369Z","issue_id":"bar-8a9","payload":{"new_id":"bar-48f.7"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:33.639640069Z","issue_id":"bar-48f.7","payload":{"dep_type":"parent-child","depends_on":"bar-48f"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:39.486721446Z","issue_id":"bar-48f.2","payload":{"dep_type":"blocks","depends_on":"bar-48f.1"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:39.911749641Z","issue_id":"bar-48f.3","payload":{"dep_type":"blocks","depends_on":"bar-48f.2"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:40.398532353Z","issue_id":"bar-48f.4","payload":{"dep_type":"blocks","depends_on":"bar-48f.3"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:40.762666046Z","issue_id":"bar-48f.5","payload":{"dep_type":"blocks","depends_on":"bar-48f.4"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:41.173027726Z","issue_id":"bar-48f.6","payload":{"dep_type":"blocks","depends_on":"bar-48f.5"}} +{"type":"dep_add","timestamp":"2026-03-06T07:58:41.467313496Z","issue_id":"bar-48f.7","payload":{"dep_type":"blocks","depends_on":"bar-48f.6"}} +{"type":"update","timestamp":"2026-03-06T18:32:00.041874266Z","issue_id":"bar-48f.1","payload":{"description":"Run ansible playbook (pane A) to apply config session with 5-min auto-revert. Review output. In pane B, SSH to install@137.239.200.198 and manually verify (show session-config diffs, show traffic-policy counters). Type 'configure session validator-relay commit' and 'write memory' when satisfied. Playbook: playbooks/ashburn-relay-was-sw01.yml (do NOT use -e commit=true; commit is manual via SSH)."}} +{"type":"update","timestamp":"2026-03-06T18:32:05.861153312Z","issue_id":"bar-48f.4","payload":{"description":"Run ansible playbook pre-flight (pane A) to discover GRE tunnel interface. Then run with -e apply=true -e tunnel_interface=TunnelX for 5-min auto-revert. In pane B, SSH to install@209.42.167.133 and manually verify. Type 'configure session validator-outbound commit' and 'write memory' when satisfied. Playbook: playbooks/ashburn-relay-mia-sw01.yml (do NOT use -e commit=true; commit is manual via SSH)."}} +{"type":"status_update","timestamp":"2026-03-06T18:35:35.320628231Z","issue_id":"bar-48f","payload":{"status":"in_progress"}} +{"type":"status_update","timestamp":"2026-03-06T18:35:35.717040604Z","issue_id":"bar-48f.1","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:12:45.087966093Z","issue_id":"bar-48f.1","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:16:34.00466057Z","issue_id":"bar-48f.2","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:17:18.681131396Z","issue_id":"bar-48f.2","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:17:19.159927405Z","issue_id":"bar-48f.3","payload":{"status":"in_progress"}} +{"type":"close","timestamp":"2026-03-06T20:18:42.42112937Z","issue_id":"bar-48f.3","payload":{}} +{"type":"status_update","timestamp":"2026-03-06T20:18:42.930237032Z","issue_id":"bar-48f.4","payload":{"status":"in_progress"}} +{"type":"create","timestamp":"2026-03-08T06:58:52.122307149Z","issue_id":"bar-02e","payload":{"description":"/srv/solana is a directory on the ZFS dataset biscayne/DATA/srv (mounted at /srv\nwith overlay=on). The fstab zvol mount at /srv/solana was shadowed by ZFS.\n\nFixed 2026-03-08: removed /srv/solana fstab entries, canonical data path is now\n/srv/kind/solana. All playbooks updated. fstab clean. Mounts verified.","priority":"1","title":"zvol mount: /srv/solana resolves to ZFS dataset, not zvol","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:52.557582445Z","issue_id":"bar-41a","payload":{"description":"laconic-so creates configmap resources for telegraf but does not generate\nvolumeMounts in the pod spec. The telegraf container crashes because\n/etc/telegraf and /scripts are empty. Manual configmap creation works but\nthe volume mounts are still missing. Root cause is in laconic-so's stack\nmigration — configmap volume mount generation is incomplete.","priority":"1","title":"telegraf volume mounts missing from pod spec","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:53.065888933Z","issue_id":"bar-a3b","payload":{"description":"Validator exits shortly after starting. Log shows UDP port reachability checks\nand TCP port checks failing. Needs full log analysis from kind node path\n(/mnt/validator-log/validator.log). May be related to networking/firewall\nconfiguration or the shred relay setup.","priority":"0","title":"agave-validator crash after ~57 seconds","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:58:53.589221516Z","issue_id":"bar-b04","payload":{"description":"Once laconic-so deployment prepare lands, update biscayne-redeploy.yml to use\nprepare instead of start+scale-to-0 workaround. The deploy tag section should\ncall deployment prepare, and scale-up should call deployment start\n--skip-cluster-management.","priority":"2","title":"update biscayne-redeploy to use deployment prepare","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:58:54.238136989Z","issue_id":"bar-b41","payload":{"description":"Automate the leapfrog recovery strategy documented in CLAUDE.md. When the\nvalidator is stuck in a repair-dependent gap, download a fresh snapshot past\nthe incomplete zone while preserving the existing ledger (which has turbine\nshreds at the tip). Needs: shred completeness check, snapshot slot targeting,\nselective wipe (accounts+snapshots only, keep ledger).","priority":"2","title":"snapshot leapfrog recovery playbook","type":"feature"}} +{"type":"create","timestamp":"2026-03-08T06:58:54.756609299Z","issue_id":"bar-0b4","payload":{"description":"biscayne-prepare-agave.yml unconditionally imports ashburn-relay-biscayne.yml\nat the end. This couples filesystem preparation to relay setup. The relay\nplaybook fails if the kind node isn't running (ping to 172.20.0.2 fails).\nShould be a separate playbook invocation, not an import.","priority":"3","title":"biscayne-prepare-agave imports ashburn-relay-biscayne unconditionally","type":"bug"}} +{"type":"close","timestamp":"2026-03-08T06:59:00.140156099Z","issue_id":"bar-02e","payload":{}}