so-l2l: refactor down() for clarity

down() is now a five-phase recipe with each phase a single line:
namespaced cleanup, cluster-scoped PV cleanup, synchronous wait,
optional namespace delete, optional cluster destroy. Each helper
does one thing.

- Extract _stack_label_selector() and _namespace_exists() so down()
  reads declaratively.
- Rename _delete_labeled_resources -> _delete_namespaced_labeled_
  resources to match what it actually does (namespaced phase only).
- Extract _list_delete_namespaced() helper for the Services and
  Endpoints list+delete pattern (k8s client lacks delete_collection
  for those kinds).
- _wait_for_labeled_gone (renamed from _wait_for_labeled_deletions)
  builds listers in clean append-style; DRY the poll/timeout
  iterations via a local remaining() closure.

No behavior changes — same semantics, ~50 fewer lines.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
pull/743/head
Prathamesh Musale 2026-04-16 06:10:56 +00:00
parent 2f99e6f7c9
commit 774b39836e
1 changed files with 171 additions and 223 deletions

View File

@ -912,181 +912,170 @@ class K8sDeployer(Deployer):
call_stack_deploy_start(self.deployment_context) call_stack_deploy_start(self.deployment_context)
def down(self, timeout, volumes, skip_cluster_management, delete_namespace=False): def down(
self, timeout, volumes, skip_cluster_management, delete_namespace=False
):
"""Tear down stack-labeled resources. Phases:
1. Delete namespaced resources (if namespace still exists).
2. Delete cluster-scoped PVs (if --delete-volumes, regardless of (1)).
3. Wait for everything we triggered to actually be gone.
4. Optionally delete the namespace itself (--delete-namespace).
5. Optionally destroy the kind cluster (--perform-cluster-management).
Steps 1-3 scope cleanup to a single stack via app.kubernetes.io/stack,
so multiple stacks sharing a namespace tear down independently.
"""
self.skip_cluster_management = skip_cluster_management self.skip_cluster_management = skip_cluster_management
self.connect_api() self.connect_api()
# Delete by stack label so multiple stacks sharing a namespace are selector = self._stack_label_selector()
# cleaned up independently. Fall back to the app label for stacks
# that predate the stack label.
stack_name = self.cluster_info.stack_name
if stack_name:
label_selector = f"app.kubernetes.io/stack={stack_name}"
else:
label_selector = f"app={self.cluster_info.app_name}"
ns = self.k8s_namespace ns = self.k8s_namespace
# Check whether the namespace exists. If it's already gone, skip the ns_exists = self._namespace_exists(ns)
# namespaced cleanup (nothing to do, list/delete calls would 404),
# but cluster-scoped PVs may still be labeled with this stack — so
# fall through to the cluster-scoped half of _delete_labeled_resources.
try:
self.core_api.read_namespace(name=ns)
namespace_present = True
except ApiException as e:
if e.status == 404:
namespace_present = False
if opts.o.debug:
print(f"Namespace {ns} not found; cleaning cluster-scoped only")
else:
raise
self._delete_labeled_resources( if ns_exists:
ns, self._delete_namespaced_labeled_resources(ns, selector, volumes)
label_selector, if volumes:
delete_volumes=volumes, self._delete_labeled_pvs(selector)
namespace_present=namespace_present, self._wait_for_labeled_gone(
ns, selector, delete_volumes=volumes, namespace_present=ns_exists
) )
# Full teardown: nuke the namespace and wait for termination so that a if delete_namespace and ns_exists:
# subsequent up() can recreate it cleanly. No-op if already missing.
if delete_namespace and namespace_present:
self._delete_namespace() self._delete_namespace()
self._wait_for_namespace_gone() self._wait_for_namespace_gone()
if self.is_kind() and not self.skip_cluster_management: if self.is_kind() and not self.skip_cluster_management:
destroy_cluster(self.kind_cluster_name) destroy_cluster(self.kind_cluster_name)
def _delete_labeled_resources( def _stack_label_selector(self) -> str:
self, """Selector used for stack-scoped cleanup.
namespace: str,
label_selector: str, Prefer app.kubernetes.io/stack (per-stack) and fall back to the
delete_volumes: bool, legacy app= label (cluster-id scoped) for deployments that predate
namespace_present: bool = True, the stack label.
"""
stack_name = self.cluster_info.stack_name
if stack_name:
return f"app.kubernetes.io/stack={stack_name}"
return f"app={self.cluster_info.app_name}"
def _namespace_exists(self, namespace: str) -> bool:
try:
self.core_api.read_namespace(name=namespace)
return True
except ApiException as e:
if e.status == 404:
if opts.o.debug:
print(f"Namespace {namespace} not found")
return False
raise
def _delete_namespaced_labeled_resources(
self, namespace: str, selector: str, delete_volumes: bool
): ):
"""Delete all stack-labeled resources. """Delete Ingresses, Deployments, Jobs, Services, ConfigMaps,
Secrets, Endpoints, Pods, and (if delete_volumes) PVCs in the
Namespaced resources (Deployments, Services, ConfigMaps, Secrets, namespace. Order matters: Ingresses first so external traffic
PVCs, Pods, Endpoints, Ingresses, Jobs) are only touched when the stops, then workloads, then support objects, then Pods, then PVCs.
namespace still exists. Cluster-scoped PVs are always candidates
for deletion if delete_volumes is set they can outlive a deleted
namespace (e.g. after an earlier stop --delete-namespace).
Keeps the namespace Active so that a subsequent up() can recreate
resources without racing against k8s namespace termination.
""" """
if opts.o.dry_run: if opts.o.dry_run:
print( print(
f"Dry run: would delete resources in {namespace} " f"Dry run: would delete namespaced resources in {namespace} "
f"matching {label_selector}" f"matching {selector}"
) )
return return
def _swallow_404(fn): def swallow_404(fn):
try: try:
fn() fn()
except ApiException as e: except ApiException as e:
if e.status not in (404, 405): if e.status not in (404, 405):
raise raise
if not namespace_present:
# Jump straight to cluster-scoped cleanup.
if delete_volumes:
self._delete_labeled_pvs(label_selector)
self._wait_for_labeled_deletions(
namespace, label_selector, delete_volumes=delete_volumes
)
return
# Ingresses first so external traffic stops before pods disappear. # Ingresses first so external traffic stops before pods disappear.
_swallow_404( swallow_404(
lambda: self.networking_api.delete_collection_namespaced_ingress( lambda: self.networking_api.delete_collection_namespaced_ingress(
namespace=namespace, label_selector=label_selector namespace=namespace, label_selector=selector
) )
) )
# Deployments (owns ReplicaSets + Pods via garbage collection). # Deployments (owns ReplicaSets + Pods via GC).
_swallow_404( swallow_404(
lambda: self.apps_api.delete_collection_namespaced_deployment( lambda: self.apps_api.delete_collection_namespaced_deployment(
namespace=namespace, label_selector=label_selector namespace=namespace, label_selector=selector
) )
) )
# Jobs (propagation_policy=Background deletes child pods). # Jobs — propagation=Background cascades to child pods.
_swallow_404( swallow_404(
lambda: self.batch_api.delete_collection_namespaced_job( lambda: self.batch_api.delete_collection_namespaced_job(
namespace=namespace, namespace=namespace,
label_selector=label_selector, label_selector=selector,
propagation_policy="Background", propagation_policy="Background",
) )
) )
# Services — no delete_collection on core_api for services; # Services have no delete_collection on core_api; list + delete.
# list + delete individually. self._list_delete_namespaced(
try:
svcs = self.core_api.list_namespaced_service(
namespace=namespace, label_selector=label_selector
)
for svc in svcs.items:
_swallow_404(
lambda n=svc.metadata.name: self.core_api.delete_namespaced_service(
name=n, namespace=namespace
)
)
except ApiException as e:
if e.status != 404:
raise
# ConfigMaps, Secrets, Endpoints.
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_config_map(
namespace=namespace, label_selector=label_selector
)
)
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_secret(
namespace=namespace, label_selector=label_selector
)
)
# Endpoints usually GC with Services, but delete explicitly for
# external-services Endpoints we create directly.
try:
eps = self.core_api.list_namespaced_endpoints(
namespace=namespace, label_selector=label_selector
)
for ep in eps.items:
_swallow_404(
lambda n=ep.metadata.name: self.core_api.delete_namespaced_endpoints(
name=n, namespace=namespace
)
)
except ApiException as e:
if e.status != 404:
raise
# Lingering Pods (shouldn't exist after Deployment/Job deletion,
# but handles standalone pods if any were created).
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_pod(
namespace=namespace, label_selector=label_selector
)
)
if delete_volumes:
# Namespaced PVCs.
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_persistent_volume_claim(
namespace=namespace, label_selector=label_selector
)
)
self._delete_labeled_pvs(label_selector)
self._wait_for_labeled_deletions(
namespace, namespace,
label_selector, selector,
delete_volumes=delete_volumes, list_fn=self.core_api.list_namespaced_service,
namespace_present=True, delete_fn=self.core_api.delete_namespaced_service,
) )
# ConfigMaps, Secrets.
swallow_404(
lambda: self.core_api.delete_collection_namespaced_config_map(
namespace=namespace, label_selector=selector
)
)
swallow_404(
lambda: self.core_api.delete_collection_namespaced_secret(
namespace=namespace, label_selector=selector
)
)
# Endpoints usually GC with Services, but we create a few directly
# (external-services) that aren't owned by a Service — clean those.
self._list_delete_namespaced(
namespace,
selector,
list_fn=self.core_api.list_namespaced_endpoints,
delete_fn=self.core_api.delete_namespaced_endpoints,
)
# Stray pods (owned pods are GC'd with their Deployment/Job).
swallow_404(
lambda: self.core_api.delete_collection_namespaced_pod(
namespace=namespace, label_selector=selector
)
)
if delete_volumes:
swallow_404(
lambda: self.core_api.delete_collection_namespaced_persistent_volume_claim( # noqa: E501
namespace=namespace, label_selector=selector
)
)
def _delete_labeled_pvs(self, label_selector: str): def _list_delete_namespaced(self, namespace, selector, list_fn, delete_fn):
"""Delete cluster-scoped PVs matching the stack label.""" """List by selector and delete each item. Use for resources where
the k8s python client lacks delete_collection (Services, Endpoints).
"""
try: try:
pvs = self.core_api.list_persistent_volume(label_selector=label_selector) items = list_fn(namespace=namespace, label_selector=selector).items
except ApiException as e:
if e.status == 404:
return
raise
for item in items:
try:
delete_fn(name=item.metadata.name, namespace=namespace)
except ApiException as e:
if e.status not in (404, 405):
raise
def _delete_labeled_pvs(self, selector: str):
"""Delete cluster-scoped PVs matching the stack label."""
if opts.o.dry_run:
print(f"Dry run: would delete PVs matching {selector}")
return
try:
pvs = self.core_api.list_persistent_volume(label_selector=selector)
except ApiException as e: except ApiException as e:
if opts.o.debug: if opts.o.debug:
print(f"Error listing PVs: {e}") print(f"Error listing PVs: {e}")
@ -1099,95 +1088,58 @@ class K8sDeployer(Deployer):
except ApiException as e: except ApiException as e:
_check_delete_exception(e) _check_delete_exception(e)
def _wait_for_labeled_deletions( def _wait_for_labeled_gone(
self, self,
namespace: str, namespace: str,
label_selector: str, selector: str,
delete_volumes: bool, delete_volumes: bool,
namespace_present: bool = True, namespace_present: bool,
timeout_seconds: int = 120, timeout_seconds: int = 120,
): ):
"""Block until stack-labeled resources finish terminating. """Poll until every kind we triggered a delete for is gone.
delete_collection returns before the apiserver has actually removed delete_collection/delete are async finalizers (PV bound-by-PVC,
the objects finalizers (PVs waiting for PVCs, PVCs waiting for PVC bound-by-VolumeAttachment, pod graceful shutdown) propagate
VolumeAttachment, pods waiting for graceful shutdown) propagate after the API call returns. Blocking here makes down() a
async. Poll until everything we triggered a delete for is gone, synchronous contract for callers (tests, ansible, cryovial).
so callers can assume a synchronous tear-down.
""" """
import time import time
# (kind name, lister callable) — lister returns an object with .items. listers = []
# Namespaced kinds are skipped when the namespace is already gone if namespace_present:
# (there's nothing to list). listers += [
listers = [] if not namespace_present else [ ("deployment", lambda: self.apps_api.list_namespaced_deployment(
( namespace=namespace, label_selector=selector)),
"deployment", ("ingress", lambda: self.networking_api.list_namespaced_ingress(
lambda: self.apps_api.list_namespaced_deployment( namespace=namespace, label_selector=selector)),
namespace=namespace, label_selector=label_selector ("job", lambda: self.batch_api.list_namespaced_job(
), namespace=namespace, label_selector=selector)),
), ("service", lambda: self.core_api.list_namespaced_service(
( namespace=namespace, label_selector=selector)),
"ingress", ("configmap", lambda: self.core_api.list_namespaced_config_map(
lambda: self.networking_api.list_namespaced_ingress( namespace=namespace, label_selector=selector)),
namespace=namespace, label_selector=label_selector ("secret", lambda: self.core_api.list_namespaced_secret(
), namespace=namespace, label_selector=selector)),
), ("pod", lambda: self.core_api.list_namespaced_pod(
( namespace=namespace, label_selector=selector)),
"job", ]
lambda: self.batch_api.list_namespaced_job( if delete_volumes:
namespace=namespace, label_selector=label_selector
),
),
(
"service",
lambda: self.core_api.list_namespaced_service(
namespace=namespace, label_selector=label_selector
),
),
(
"configmap",
lambda: self.core_api.list_namespaced_config_map(
namespace=namespace, label_selector=label_selector
),
),
(
"secret",
lambda: self.core_api.list_namespaced_secret(
namespace=namespace, label_selector=label_selector
),
),
(
"pod",
lambda: self.core_api.list_namespaced_pod(
namespace=namespace, label_selector=label_selector
),
),
]
if delete_volumes:
if namespace_present:
listers.append( listers.append(
( ("persistentvolumeclaim",
"persistentvolumeclaim", lambda: self.core_api.list_namespaced_persistent_volume_claim(
lambda: self.core_api.list_namespaced_persistent_volume_claim( namespace=namespace, label_selector=selector))
namespace=namespace, label_selector=label_selector
),
)
) )
# PVs are cluster-scoped — wait for them even when the namespace # PVs are cluster-scoped — wait for them even when the namespace
# is already gone (orphaned from a prior --delete-namespace). # is already gone (orphaned from a prior --delete-namespace).
if delete_volumes:
listers.append( listers.append(
( ("persistentvolume",
"persistentvolume", lambda: self.core_api.list_persistent_volume(
lambda: self.core_api.list_persistent_volume( label_selector=selector))
label_selector=label_selector
),
)
) )
deadline = time.monotonic() + timeout_seconds def remaining():
while time.monotonic() < deadline: out = []
remaining = []
for kind, lister in listers: for kind, lister in listers:
try: try:
items = lister().items items = lister().items
@ -1196,27 +1148,23 @@ class K8sDeployer(Deployer):
continue continue
raise raise
if items: if items:
remaining.append((kind, len(items))) out.append((kind, len(items)))
if not remaining: return out
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
left = remaining()
if not left:
return return
if opts.o.debug: if opts.o.debug:
print(f"Waiting for deletions: {remaining}") print(f"Waiting for deletions: {left}")
time.sleep(2) time.sleep(2)
# Timed out — warn but don't raise. Caller may still have the left = remaining()
# cluster in a sensible state. if left:
still_present = []
for kind, lister in listers:
try:
items = lister().items
except ApiException:
continue
if items:
still_present.append((kind, len(items)))
if still_present:
print( print(
f"Warning: resources still present after {timeout_seconds}s: " f"Warning: resources still present after {timeout_seconds}s: "
f"{still_present}" f"{left}"
) )
def status(self): def status(self):