so-l2l: make down() synchronous via _wait_for_labeled_deletions
delete_collection returns before the apiserver actually removes objects — finalizers on PVs, PVCs, and pod graceful shutdown all propagate async. Add _wait_for_labeled_deletions that polls the same label selector across every kind we triggered a delete for, with a 120s timeout. down() now returns only once the cluster has actually settled, so callers (tests, ansible, cryovial) don't need their own wait loops. Update the k8s-deploy test's assert_no_labeled_resources to rely on that synchronous contract — no polling in the test. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>pull/743/head
parent
98ad60ca03
commit
3d83c6ad27
|
|
@ -1063,6 +1063,124 @@ class K8sDeployer(Deployer):
|
||||||
if opts.o.debug:
|
if opts.o.debug:
|
||||||
print(f"Error listing PVs: {e}")
|
print(f"Error listing PVs: {e}")
|
||||||
|
|
||||||
|
self._wait_for_labeled_deletions(
|
||||||
|
namespace, label_selector, delete_volumes=delete_volumes
|
||||||
|
)
|
||||||
|
|
||||||
|
def _wait_for_labeled_deletions(
|
||||||
|
self,
|
||||||
|
namespace: str,
|
||||||
|
label_selector: str,
|
||||||
|
delete_volumes: bool,
|
||||||
|
timeout_seconds: int = 120,
|
||||||
|
):
|
||||||
|
"""Block until stack-labeled resources finish terminating.
|
||||||
|
|
||||||
|
delete_collection returns before the apiserver has actually removed
|
||||||
|
the objects — finalizers (PVs waiting for PVCs, PVCs waiting for
|
||||||
|
VolumeAttachment, pods waiting for graceful shutdown) propagate
|
||||||
|
async. Poll until everything we triggered a delete for is gone,
|
||||||
|
so callers can assume a synchronous tear-down.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
# (kind name, lister callable) — lister returns an object with .items
|
||||||
|
listers = [
|
||||||
|
(
|
||||||
|
"deployment",
|
||||||
|
lambda: self.apps_api.list_namespaced_deployment(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"ingress",
|
||||||
|
lambda: self.networking_api.list_namespaced_ingress(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"job",
|
||||||
|
lambda: self.batch_api.list_namespaced_job(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"service",
|
||||||
|
lambda: self.core_api.list_namespaced_service(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"configmap",
|
||||||
|
lambda: self.core_api.list_namespaced_config_map(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"secret",
|
||||||
|
lambda: self.core_api.list_namespaced_secret(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"pod",
|
||||||
|
lambda: self.core_api.list_namespaced_pod(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
if delete_volumes:
|
||||||
|
listers.append(
|
||||||
|
(
|
||||||
|
"persistentvolumeclaim",
|
||||||
|
lambda: self.core_api.list_namespaced_persistent_volume_claim(
|
||||||
|
namespace=namespace, label_selector=label_selector
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
listers.append(
|
||||||
|
(
|
||||||
|
"persistentvolume",
|
||||||
|
lambda: self.core_api.list_persistent_volume(
|
||||||
|
label_selector=label_selector
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
deadline = time.monotonic() + timeout_seconds
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
remaining = []
|
||||||
|
for kind, lister in listers:
|
||||||
|
try:
|
||||||
|
items = lister().items
|
||||||
|
except ApiException as e:
|
||||||
|
if e.status == 404:
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
if items:
|
||||||
|
remaining.append((kind, len(items)))
|
||||||
|
if not remaining:
|
||||||
|
return
|
||||||
|
if opts.o.debug:
|
||||||
|
print(f"Waiting for deletions: {remaining}")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Timed out — warn but don't raise. Caller may still have the
|
||||||
|
# cluster in a sensible state.
|
||||||
|
still_present = []
|
||||||
|
for kind, lister in listers:
|
||||||
|
try:
|
||||||
|
items = lister().items
|
||||||
|
except ApiException:
|
||||||
|
continue
|
||||||
|
if items:
|
||||||
|
still_present.append((kind, len(items)))
|
||||||
|
if still_present:
|
||||||
|
print(
|
||||||
|
f"Warning: resources still present after {timeout_seconds}s: "
|
||||||
|
f"{still_present}"
|
||||||
|
)
|
||||||
|
|
||||||
def status(self):
|
def status(self):
|
||||||
self.connect_api()
|
self.connect_api()
|
||||||
# Call whatever API we need to get the running container list
|
# Call whatever API we need to get the running container list
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,9 @@ assert_ns_phase () {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Count labeled resources in the deployment namespace. Exit 1 on mismatch.
|
# Count labeled resources in the deployment namespace. down() is
|
||||||
|
# synchronous on its own cleanup (waits for PVCs/pods to terminate
|
||||||
|
# before returning) so callers can assert immediately.
|
||||||
# Usage: assert_no_labeled_resources <kind>
|
# Usage: assert_no_labeled_resources <kind>
|
||||||
assert_no_labeled_resources () {
|
assert_no_labeled_resources () {
|
||||||
local kind=$1
|
local kind=$1
|
||||||
|
|
@ -260,7 +262,7 @@ $TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes --sk
|
||||||
assert_ns_phase "Active"
|
assert_ns_phase "Active"
|
||||||
echo "stop preserves namespace test: passed"
|
echo "stop preserves namespace test: passed"
|
||||||
|
|
||||||
for kind in deployment service configmap secret pvc; do
|
for kind in deployment job ingress service configmap secret pvc pod; do
|
||||||
assert_no_labeled_resources "$kind"
|
assert_no_labeled_resources "$kind"
|
||||||
done
|
done
|
||||||
echo "stop cleans labeled resources test: passed"
|
echo "stop cleans labeled resources test: passed"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue