fix(test): wait for kind cluster cleanup before recreating

Replace the fixed `sleep 20` with a polling loop that waits for
`kind get clusters` to report no clusters. The previous approach
was flaky on CI runners where Docker takes longer to tear down
cgroup hierarchies after `kind delete cluster`.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feature/k8s-jobs
Prathamesh Musale 2026-03-10 05:26:48 +00:00
parent d64046df55
commit 108f13a09b
1 changed files with 16 additions and 3 deletions

View File

@ -46,6 +46,18 @@ wait_for_log_output () {
} }
wait_for_cluster_destroyed () {
for i in {1..60}
do
if ! kind get clusters 2>/dev/null | grep -q .; then
return
fi
sleep 2
done
echo "waiting for kind cluster cleanup: FAILED"
exit 1
}
delete_cluster_exit () { delete_cluster_exit () {
$TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes
exit 1 exit 1
@ -227,9 +239,10 @@ fi
# Stop then start again and check the volume was preserved # Stop then start again and check the volume was preserved
$TEST_TARGET_SO deployment --dir $test_deployment_dir stop $TEST_TARGET_SO deployment --dir $test_deployment_dir stop
# Sleep a bit just in case # Wait for the kind cluster to be fully destroyed before recreating it.
# sleep for longer to check if that's why the subsequent create cluster fails # Without this, the second 'kind create cluster' can fail with cgroup
sleep 20 # detection errors because Docker hasn't finished cleaning up.
wait_for_cluster_destroyed
$TEST_TARGET_SO deployment --dir $test_deployment_dir start $TEST_TARGET_SO deployment --dir $test_deployment_dir start
wait_for_pods_started wait_for_pods_started
wait_for_log_output wait_for_log_output