so-l2l Part B: down() deletes by stack label, keeps namespace

Stop no longer calls _delete_namespace() on every down. Instead,
deletion is scoped by app.kubernetes.io/stack=<stack-name> so
multiple stacks sharing a namespace are torn down independently,
and no namespace termination race blocks a following up().

Prerequisite: every V1ObjectMeta created by cluster_info.py and
deploy_k8s.py now carries the stack label via a new
ClusterInfo._stack_labels() helper (Namespace, Ingress, Service,
Deployment pod template, ConfigMap, Secret, PVC, PV, Endpoints,
Job, CA certs secret, external-service Services).

down() order: Ingresses -> Deployments -> Jobs -> Services ->
ConfigMaps/Secrets/Endpoints -> lingering Pods, then PVCs/PVs
only when --delete-volumes is passed. Kind cluster destruction
still gated by --perform-cluster-management.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
pull/743/head
Prathamesh Musale 2026-04-15 13:17:13 +00:00
parent 8a586b7dfc
commit c7d2aaa0d0
2 changed files with 167 additions and 71 deletions

View File

@ -118,6 +118,17 @@ class ClusterInfo:
volumes.extend(named_volumes_from_pod_files(self.parsed_job_yaml_map))
return volumes
def _stack_labels(self, extra: Optional[dict] = None) -> dict:
"""Standard resource labels. Use on every k8s resource SO creates so
label-based cleanup (down by stack) can find them all.
"""
labels = {"app": self.app_name}
if self.stack_name:
labels["app.kubernetes.io/stack"] = self.stack_name
if extra:
labels.update(extra)
return labels
def get_nodeports(self):
nodeports = []
for pod_name in self.parsed_pod_yaml_map:
@ -151,7 +162,7 @@ class ClusterInfo:
f"{self.app_name}-nodeport-"
f"{pod_port}-{protocol.lower()}"
),
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
type="NodePort",
@ -268,7 +279,7 @@ class ClusterInfo:
ingress = client.V1Ingress(
metadata=client.V1ObjectMeta(
name=f"{self.app_name}-ingress",
labels={"app": self.app_name},
labels=self._stack_labels(),
annotations=ingress_annotations,
),
spec=spec,
@ -323,7 +334,7 @@ class ClusterInfo:
service = client.V1Service(
metadata=client.V1ObjectMeta(
name=f"{self.app_name}-service",
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
type="ClusterIP",
@ -355,10 +366,9 @@ class ClusterInfo:
self.spec.get_volume_resources_for(volume_name) or global_resources
)
labels = {
"app": self.app_name,
"volume-label": f"{self.app_name}-{volume_name}",
}
labels = self._stack_labels(
{"volume-label": f"{self.app_name}-{volume_name}"}
)
if volume_path:
storage_class_name = "manual"
k8s_volume_name = f"{self.app_name}-{volume_name}"
@ -418,7 +428,7 @@ class ClusterInfo:
spec = client.V1ConfigMap(
metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{cfg_map_name}",
labels={"app": self.app_name, "configmap-label": cfg_map_name},
labels=self._stack_labels({"configmap-label": cfg_map_name}),
),
binary_data=data,
)
@ -482,10 +492,9 @@ class ClusterInfo:
pv = client.V1PersistentVolume(
metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{volume_name}",
labels={
"app": self.app_name,
"volume-label": f"{self.app_name}-{volume_name}",
},
labels=self._stack_labels(
{"volume-label": f"{self.app_name}-{volume_name}"}
),
),
spec=spec,
)
@ -737,9 +746,7 @@ class ClusterInfo:
Returns (annotations, labels, affinity, tolerations).
"""
annotations = None
labels = {"app": self.app_name}
if self.stack_name:
labels["app.kubernetes.io/stack"] = self.stack_name
labels = self._stack_labels()
affinity = None
tolerations = None
@ -920,21 +927,11 @@ class ClusterInfo:
kind="Deployment",
metadata=client.V1ObjectMeta(
name=deployment_name,
labels={
"app": self.app_name,
**(
{
"app.kubernetes.io/stack": self.stack_name,
}
if self.stack_name
else {}
),
**(
{"app.kubernetes.io/component": pod_name}
if multi_pod
else {}
),
},
labels=self._stack_labels(
{"app.kubernetes.io/component": pod_name}
if multi_pod
else None
),
),
spec=spec,
)
@ -1001,7 +998,7 @@ class ClusterInfo:
service = client.V1Service(
metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{pod_name}-service",
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
type="ClusterIP",
@ -1054,14 +1051,9 @@ class ClusterInfo:
# Use a distinct app label for job pods so they don't get
# picked up by pods_in_deployment() which queries app={app_name}.
pod_labels = {
"app": f"{self.app_name}-job",
**(
{"app.kubernetes.io/stack": self.stack_name}
if self.stack_name
else {}
),
}
# Use a distinct app label for job pods (see comment above) so we
# still build via _stack_labels then override.
pod_labels = self._stack_labels({"app": f"{self.app_name}-job"})
template = client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(labels=pod_labels),
spec=client.V1PodSpec(
@ -1076,14 +1068,7 @@ class ClusterInfo:
template=template,
backoff_limit=0,
)
job_labels = {
"app": self.app_name,
**(
{"app.kubernetes.io/stack": self.stack_name}
if self.stack_name
else {}
),
}
job_labels = self._stack_labels()
job = client.V1Job(
api_version="batch/v1",
kind="Job",
@ -1121,7 +1106,7 @@ class ClusterInfo:
svc = client.V1Service(
metadata=client.V1ObjectMeta(
name=name,
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
type="ExternalName",
@ -1138,7 +1123,7 @@ class ClusterInfo:
svc = client.V1Service(
metadata=client.V1ObjectMeta(
name=name,
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
cluster_ip="None",
@ -1156,7 +1141,7 @@ class ClusterInfo:
svc = client.V1Service(
metadata=client.V1ObjectMeta(
name=name,
labels={"app": self.app_name},
labels=self._stack_labels(),
),
spec=client.V1ServiceSpec(
cluster_ip="None",
@ -1199,7 +1184,7 @@ class ClusterInfo:
secret = client.V1Secret(
metadata=client.V1ObjectMeta(
name=secret_name,
labels={"app": self.app_name},
labels=self._stack_labels(),
),
data=secret_data,
)

View File

@ -189,7 +189,7 @@ class K8sDeployer(Deployer):
ns = client.V1Namespace(
metadata=client.V1ObjectMeta(
name=self.k8s_namespace,
labels={"app": self.cluster_info.app_name},
labels=self.cluster_info._stack_labels(),
)
)
self.core_api.create_namespace(body=ns)
@ -475,7 +475,7 @@ class K8sDeployer(Deployer):
endpoints = client.V1Endpoints(
metadata=client.V1ObjectMeta(
name=name,
labels={"app": self.cluster_info.app_name},
labels=self.cluster_info._stack_labels(),
),
subsets=[
client.V1EndpointSubset(
@ -535,7 +535,7 @@ class K8sDeployer(Deployer):
endpoints = client.V1Endpoints(
metadata=client.V1ObjectMeta(
name=name,
labels={"app": self.cluster_info.app_name},
labels=self.cluster_info._stack_labels(),
),
subsets=[
client.V1EndpointSubset(
@ -905,12 +905,136 @@ class K8sDeployer(Deployer):
self.skip_cluster_management = skip_cluster_management
self.connect_api()
app_label = f"app={self.cluster_info.app_name}"
# Delete by stack label so multiple stacks sharing a namespace are
# cleaned up independently. Fall back to the app label for stacks
# that predate the stack label.
stack_name = self.cluster_info.stack_name
if stack_name:
label_selector = f"app.kubernetes.io/stack={stack_name}"
else:
label_selector = f"app={self.cluster_info.app_name}"
# PersistentVolumes are cluster-scoped (not namespaced), so delete by label
if volumes:
ns = self.k8s_namespace
# Namespace may not exist yet on first-time deployments.
try:
self.core_api.read_namespace(name=ns)
except ApiException as e:
if e.status == 404:
if opts.o.debug:
print(f"Namespace {ns} not found; nothing to delete")
if self.is_kind() and not self.skip_cluster_management:
destroy_cluster(self.kind_cluster_name)
return
raise
self._delete_labeled_resources(ns, label_selector, delete_volumes=volumes)
if self.is_kind() and not self.skip_cluster_management:
destroy_cluster(self.kind_cluster_name)
def _delete_labeled_resources(
self, namespace: str, label_selector: str, delete_volumes: bool
):
"""Delete all stack-labeled resources in the namespace.
Keeps the namespace Active so that a subsequent up() can recreate
resources without racing against k8s namespace termination.
"""
if opts.o.dry_run:
print(
f"Dry run: would delete resources in {namespace} "
f"matching {label_selector}"
)
return
def _swallow_404(fn):
try:
pvs = self.core_api.list_persistent_volume(label_selector=app_label)
fn()
except ApiException as e:
if e.status not in (404, 405):
raise
# Ingresses first so external traffic stops before pods disappear.
_swallow_404(
lambda: self.networking_api.delete_collection_namespaced_ingress(
namespace=namespace, label_selector=label_selector
)
)
# Deployments (owns ReplicaSets + Pods via garbage collection).
_swallow_404(
lambda: self.apps_api.delete_collection_namespaced_deployment(
namespace=namespace, label_selector=label_selector
)
)
# Jobs (propagation_policy=Background deletes child pods).
_swallow_404(
lambda: self.batch_api.delete_collection_namespaced_job(
namespace=namespace,
label_selector=label_selector,
propagation_policy="Background",
)
)
# Services — no delete_collection on core_api for services;
# list + delete individually.
try:
svcs = self.core_api.list_namespaced_service(
namespace=namespace, label_selector=label_selector
)
for svc in svcs.items:
_swallow_404(
lambda n=svc.metadata.name: self.core_api.delete_namespaced_service(
name=n, namespace=namespace
)
)
except ApiException as e:
if e.status != 404:
raise
# ConfigMaps, Secrets, Endpoints.
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_config_map(
namespace=namespace, label_selector=label_selector
)
)
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_secret(
namespace=namespace, label_selector=label_selector
)
)
# Endpoints usually GC with Services, but delete explicitly for
# external-services Endpoints we create directly.
try:
eps = self.core_api.list_namespaced_endpoints(
namespace=namespace, label_selector=label_selector
)
for ep in eps.items:
_swallow_404(
lambda n=ep.metadata.name: self.core_api.delete_namespaced_endpoints(
name=n, namespace=namespace
)
)
except ApiException as e:
if e.status != 404:
raise
# Lingering Pods (shouldn't exist after Deployment/Job deletion,
# but handles standalone pods if any were created).
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_pod(
namespace=namespace, label_selector=label_selector
)
)
if delete_volumes:
# Namespaced PVCs.
_swallow_404(
lambda: self.core_api.delete_collection_namespaced_persistent_volume_claim(
namespace=namespace, label_selector=label_selector
)
)
# Cluster-scoped PVs.
try:
pvs = self.core_api.list_persistent_volume(
label_selector=label_selector
)
for pv in pvs.items:
if opts.o.debug:
print(f"Deleting PV: {pv.metadata.name}")
@ -922,19 +1046,6 @@ class K8sDeployer(Deployer):
if opts.o.debug:
print(f"Error listing PVs: {e}")
# Delete the namespace to ensure clean slate.
# Resources created by older laconic-so versions lack labels, so
# label-based deletion can't find them. Namespace deletion is the
# only reliable cleanup.
self._delete_namespace()
# Wait for namespace to finish terminating before returning,
# so that up() can recreate it immediately.
self._wait_for_namespace_gone()
if self.is_kind() and not self.skip_cluster_management:
# Destroy the kind cluster
destroy_cluster(self.kind_cluster_name)
def status(self):
self.connect_api()
# Call whatever API we need to get the running container list