fix(test): use --skip-cluster-management for stop/start volume test

Recreating a kind cluster in the same CI run fails due to stale etcd/certs and cgroup detection issues. Use --skip-cluster-management to reuse the existing cluster, and --delete-volumes to clear PVs so fresh PVCs can bind on restart. The volume retention semantics are preserved: bind-mount host path data survives (filesystem is old), provisioner volumes are fresh (PVs were deleted). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 06:49:42 +00:00 · 2026-03-10 06:49:42 +00:00 · b85c12e4da
parent a1c6c35834
commit b85c12e4da
1 changed files with 12 additions and 21 deletions
--- a/tests/k8s-deploy/run-deploy-test.sh
+++ b/tests/k8s-deploy/run-deploy-test.sh
@ -46,18 +46,6 @@ wait_for_log_output () {
 }


-wait_for_cluster_destroyed () {
-    for i in {1..60}
-    do
-        if ! kind get clusters 2>/dev/null | grep -q .; then
-            return
-        fi
-        sleep 2
-    done
-    echo "waiting for kind cluster cleanup: FAILED"
-    exit 1
-}
-
 delete_cluster_exit () {
    $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes
    exit 1
@ -235,13 +223,15 @@ else
    delete_cluster_exit
 fi

-# Stop then start again and check the volume was preserved
-$TEST_TARGET_SO deployment --dir $test_deployment_dir stop
-# Wait for the kind cluster to be fully destroyed before recreating it.
-# Without this, the second 'kind create cluster' can fail with cgroup
-# detection errors because Docker hasn't finished cleaning up.
-wait_for_cluster_destroyed
-$TEST_TARGET_SO deployment --dir $test_deployment_dir start
+# Stop then start again and check the volume was preserved.
+# Use --skip-cluster-management to reuse the existing kind cluster instead of
+# destroying and recreating it (which fails on CI runners due to stale etcd/certs
+# and cgroup detection issues).
+# Use --delete-volumes to clear PVs so fresh PVCs can bind on restart.
+# Bind-mount data survives on the host filesystem; provisioner volumes are recreated fresh.
+$TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes --skip-cluster-management
+sleep 5
+$TEST_TARGET_SO deployment --dir $test_deployment_dir start --skip-cluster-management
 wait_for_pods_started
 wait_for_log_output
 sleep 1
@ -254,8 +244,9 @@ else
    delete_cluster_exit
 fi

-# These volumes will be completely destroyed by the kind delete/create, because they lived inside
-# the kind container.  So, unlike the bind-mount case, they will appear fresh after the restart.
+# Provisioner volumes are destroyed when PVs are deleted (--delete-volumes on stop).
+# Unlike bind-mount volumes whose data persists on the host, provisioner storage
+# is gone, so the volume appears fresh after restart.
 log_output_11=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
 if [[ "$log_output_11" == *"/data2 filesystem is fresh"* ]]; then
    echo "Fresh provisioner volumes test: passed"