fix(test): use --skip-cluster-management for stop/start volume test

Recreating a kind cluster in the same CI run fails due to stale
etcd/certs and cgroup detection issues. Use --skip-cluster-management
to reuse the existing cluster, and --delete-volumes to clear PVs so
fresh PVCs can bind on restart.

The volume retention semantics are preserved: bind-mount host path
data survives (filesystem is old), provisioner volumes are fresh
(PVs were deleted).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feature/k8s-jobs
Prathamesh Musale 2026-03-10 06:49:42 +00:00
parent a1c6c35834
commit b85c12e4da
1 changed files with 12 additions and 21 deletions

View File

@ -46,18 +46,6 @@ wait_for_log_output () {
} }
wait_for_cluster_destroyed () {
for i in {1..60}
do
if ! kind get clusters 2>/dev/null | grep -q .; then
return
fi
sleep 2
done
echo "waiting for kind cluster cleanup: FAILED"
exit 1
}
delete_cluster_exit () { delete_cluster_exit () {
$TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes
exit 1 exit 1
@ -235,13 +223,15 @@ else
delete_cluster_exit delete_cluster_exit
fi fi
# Stop then start again and check the volume was preserved # Stop then start again and check the volume was preserved.
$TEST_TARGET_SO deployment --dir $test_deployment_dir stop # Use --skip-cluster-management to reuse the existing kind cluster instead of
# Wait for the kind cluster to be fully destroyed before recreating it. # destroying and recreating it (which fails on CI runners due to stale etcd/certs
# Without this, the second 'kind create cluster' can fail with cgroup # and cgroup detection issues).
# detection errors because Docker hasn't finished cleaning up. # Use --delete-volumes to clear PVs so fresh PVCs can bind on restart.
wait_for_cluster_destroyed # Bind-mount data survives on the host filesystem; provisioner volumes are recreated fresh.
$TEST_TARGET_SO deployment --dir $test_deployment_dir start $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes --skip-cluster-management
sleep 5
$TEST_TARGET_SO deployment --dir $test_deployment_dir start --skip-cluster-management
wait_for_pods_started wait_for_pods_started
wait_for_log_output wait_for_log_output
sleep 1 sleep 1
@ -254,8 +244,9 @@ else
delete_cluster_exit delete_cluster_exit
fi fi
# These volumes will be completely destroyed by the kind delete/create, because they lived inside # Provisioner volumes are destroyed when PVs are deleted (--delete-volumes on stop).
# the kind container. So, unlike the bind-mount case, they will appear fresh after the restart. # Unlike bind-mount volumes whose data persists on the host, provisioner storage
# is gone, so the volume appears fresh after restart.
log_output_11=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs ) log_output_11=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_11" == *"/data2 filesystem is fresh"* ]]; then if [[ "$log_output_11" == *"/data2 filesystem is fresh"* ]]; then
echo "Fresh provisioner volumes test: passed" echo "Fresh provisioner volumes test: passed"