fix(test): wait for kind cluster cleanup before recreating

Replace the fixed `sleep 20` with a polling loop that waits for `kind get clusters` to report no clusters. The previous approach was flaky on CI runners where Docker takes longer to tear down cgroup hierarchies after `kind delete cluster`. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 05:26:48 +00:00 · 2026-03-10 05:26:48 +00:00 · 108f13a09b
parent d64046df55
commit 108f13a09b
1 changed files with 16 additions and 3 deletions
--- a/tests/k8s-deploy/run-deploy-test.sh
+++ b/tests/k8s-deploy/run-deploy-test.sh
@ -46,6 +46,18 @@ wait_for_log_output () {
 }
 wait_for_cluster_destroyed () {
    for i in {1..60}
    do
        if ! kind get clusters 2>/dev/null | grep -q .; then
            return
        fi
        sleep 2
    done
    echo "waiting for kind cluster cleanup: FAILED"
    exit 1
 }
 delete_cluster_exit () {
    $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes
    exit 1
@ -227,9 +239,10 @@ fi
 # Stop then start again and check the volume was preserved
 $TEST_TARGET_SO deployment --dir $test_deployment_dir stop
-# Sleep a bit just in case
+# Wait for the kind cluster to be fully destroyed before recreating it.
-# sleep for longer to check if that's why the subsequent create cluster fails
+# Without this, the second 'kind create cluster' can fail with cgroup
-sleep 20
+# detection errors because Docker hasn't finished cleaning up.
 wait_for_cluster_destroyed
 $TEST_TARGET_SO deployment --dir $test_deployment_dir start
 wait_for_pods_started
 wait_for_log_output