diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index 88cb2a21..2c24e02f 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -118,6 +118,17 @@ class ClusterInfo: volumes.extend(named_volumes_from_pod_files(self.parsed_job_yaml_map)) return volumes + def _stack_labels(self, extra: Optional[dict] = None) -> dict: + """Standard resource labels. Use on every k8s resource SO creates so + label-based cleanup (down by stack) can find them all. + """ + labels = {"app": self.app_name} + if self.stack_name: + labels["app.kubernetes.io/stack"] = self.stack_name + if extra: + labels.update(extra) + return labels + def get_nodeports(self): nodeports = [] for pod_name in self.parsed_pod_yaml_map: @@ -151,7 +162,7 @@ class ClusterInfo: f"{self.app_name}-nodeport-" f"{pod_port}-{protocol.lower()}" ), - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( type="NodePort", @@ -268,7 +279,7 @@ class ClusterInfo: ingress = client.V1Ingress( metadata=client.V1ObjectMeta( name=f"{self.app_name}-ingress", - labels={"app": self.app_name}, + labels=self._stack_labels(), annotations=ingress_annotations, ), spec=spec, @@ -323,7 +334,7 @@ class ClusterInfo: service = client.V1Service( metadata=client.V1ObjectMeta( name=f"{self.app_name}-service", - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( type="ClusterIP", @@ -355,10 +366,9 @@ class ClusterInfo: self.spec.get_volume_resources_for(volume_name) or global_resources ) - labels = { - "app": self.app_name, - "volume-label": f"{self.app_name}-{volume_name}", - } + labels = self._stack_labels( + {"volume-label": f"{self.app_name}-{volume_name}"} + ) if volume_path: storage_class_name = "manual" k8s_volume_name = f"{self.app_name}-{volume_name}" @@ -418,7 +428,7 @@ class ClusterInfo: spec = client.V1ConfigMap( metadata=client.V1ObjectMeta( name=f"{self.app_name}-{cfg_map_name}", - labels={"app": self.app_name, "configmap-label": cfg_map_name}, + labels=self._stack_labels({"configmap-label": cfg_map_name}), ), binary_data=data, ) @@ -482,10 +492,9 @@ class ClusterInfo: pv = client.V1PersistentVolume( metadata=client.V1ObjectMeta( name=f"{self.app_name}-{volume_name}", - labels={ - "app": self.app_name, - "volume-label": f"{self.app_name}-{volume_name}", - }, + labels=self._stack_labels( + {"volume-label": f"{self.app_name}-{volume_name}"} + ), ), spec=spec, ) @@ -737,9 +746,7 @@ class ClusterInfo: Returns (annotations, labels, affinity, tolerations). """ annotations = None - labels = {"app": self.app_name} - if self.stack_name: - labels["app.kubernetes.io/stack"] = self.stack_name + labels = self._stack_labels() affinity = None tolerations = None @@ -920,21 +927,11 @@ class ClusterInfo: kind="Deployment", metadata=client.V1ObjectMeta( name=deployment_name, - labels={ - "app": self.app_name, - **( - { - "app.kubernetes.io/stack": self.stack_name, - } - if self.stack_name - else {} - ), - **( - {"app.kubernetes.io/component": pod_name} - if multi_pod - else {} - ), - }, + labels=self._stack_labels( + {"app.kubernetes.io/component": pod_name} + if multi_pod + else None + ), ), spec=spec, ) @@ -1001,7 +998,7 @@ class ClusterInfo: service = client.V1Service( metadata=client.V1ObjectMeta( name=f"{self.app_name}-{pod_name}-service", - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( type="ClusterIP", @@ -1054,14 +1051,9 @@ class ClusterInfo: # Use a distinct app label for job pods so they don't get # picked up by pods_in_deployment() which queries app={app_name}. - pod_labels = { - "app": f"{self.app_name}-job", - **( - {"app.kubernetes.io/stack": self.stack_name} - if self.stack_name - else {} - ), - } + # Use a distinct app label for job pods (see comment above) so we + # still build via _stack_labels then override. + pod_labels = self._stack_labels({"app": f"{self.app_name}-job"}) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels=pod_labels), spec=client.V1PodSpec( @@ -1076,14 +1068,7 @@ class ClusterInfo: template=template, backoff_limit=0, ) - job_labels = { - "app": self.app_name, - **( - {"app.kubernetes.io/stack": self.stack_name} - if self.stack_name - else {} - ), - } + job_labels = self._stack_labels() job = client.V1Job( api_version="batch/v1", kind="Job", @@ -1121,7 +1106,7 @@ class ClusterInfo: svc = client.V1Service( metadata=client.V1ObjectMeta( name=name, - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( type="ExternalName", @@ -1138,7 +1123,7 @@ class ClusterInfo: svc = client.V1Service( metadata=client.V1ObjectMeta( name=name, - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( cluster_ip="None", @@ -1156,7 +1141,7 @@ class ClusterInfo: svc = client.V1Service( metadata=client.V1ObjectMeta( name=name, - labels={"app": self.app_name}, + labels=self._stack_labels(), ), spec=client.V1ServiceSpec( cluster_ip="None", @@ -1199,7 +1184,7 @@ class ClusterInfo: secret = client.V1Secret( metadata=client.V1ObjectMeta( name=secret_name, - labels={"app": self.app_name}, + labels=self._stack_labels(), ), data=secret_data, ) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index a9a227fc..c51f93b9 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -189,7 +189,7 @@ class K8sDeployer(Deployer): ns = client.V1Namespace( metadata=client.V1ObjectMeta( name=self.k8s_namespace, - labels={"app": self.cluster_info.app_name}, + labels=self.cluster_info._stack_labels(), ) ) self.core_api.create_namespace(body=ns) @@ -475,7 +475,7 @@ class K8sDeployer(Deployer): endpoints = client.V1Endpoints( metadata=client.V1ObjectMeta( name=name, - labels={"app": self.cluster_info.app_name}, + labels=self.cluster_info._stack_labels(), ), subsets=[ client.V1EndpointSubset( @@ -535,7 +535,7 @@ class K8sDeployer(Deployer): endpoints = client.V1Endpoints( metadata=client.V1ObjectMeta( name=name, - labels={"app": self.cluster_info.app_name}, + labels=self.cluster_info._stack_labels(), ), subsets=[ client.V1EndpointSubset( @@ -905,12 +905,136 @@ class K8sDeployer(Deployer): self.skip_cluster_management = skip_cluster_management self.connect_api() - app_label = f"app={self.cluster_info.app_name}" + # Delete by stack label so multiple stacks sharing a namespace are + # cleaned up independently. Fall back to the app label for stacks + # that predate the stack label. + stack_name = self.cluster_info.stack_name + if stack_name: + label_selector = f"app.kubernetes.io/stack={stack_name}" + else: + label_selector = f"app={self.cluster_info.app_name}" - # PersistentVolumes are cluster-scoped (not namespaced), so delete by label - if volumes: + ns = self.k8s_namespace + # Namespace may not exist yet on first-time deployments. + try: + self.core_api.read_namespace(name=ns) + except ApiException as e: + if e.status == 404: + if opts.o.debug: + print(f"Namespace {ns} not found; nothing to delete") + if self.is_kind() and not self.skip_cluster_management: + destroy_cluster(self.kind_cluster_name) + return + raise + + self._delete_labeled_resources(ns, label_selector, delete_volumes=volumes) + + if self.is_kind() and not self.skip_cluster_management: + destroy_cluster(self.kind_cluster_name) + + def _delete_labeled_resources( + self, namespace: str, label_selector: str, delete_volumes: bool + ): + """Delete all stack-labeled resources in the namespace. + + Keeps the namespace Active so that a subsequent up() can recreate + resources without racing against k8s namespace termination. + """ + if opts.o.dry_run: + print( + f"Dry run: would delete resources in {namespace} " + f"matching {label_selector}" + ) + return + + def _swallow_404(fn): try: - pvs = self.core_api.list_persistent_volume(label_selector=app_label) + fn() + except ApiException as e: + if e.status not in (404, 405): + raise + + # Ingresses first so external traffic stops before pods disappear. + _swallow_404( + lambda: self.networking_api.delete_collection_namespaced_ingress( + namespace=namespace, label_selector=label_selector + ) + ) + # Deployments (owns ReplicaSets + Pods via garbage collection). + _swallow_404( + lambda: self.apps_api.delete_collection_namespaced_deployment( + namespace=namespace, label_selector=label_selector + ) + ) + # Jobs (propagation_policy=Background deletes child pods). + _swallow_404( + lambda: self.batch_api.delete_collection_namespaced_job( + namespace=namespace, + label_selector=label_selector, + propagation_policy="Background", + ) + ) + # Services — no delete_collection on core_api for services; + # list + delete individually. + try: + svcs = self.core_api.list_namespaced_service( + namespace=namespace, label_selector=label_selector + ) + for svc in svcs.items: + _swallow_404( + lambda n=svc.metadata.name: self.core_api.delete_namespaced_service( + name=n, namespace=namespace + ) + ) + except ApiException as e: + if e.status != 404: + raise + # ConfigMaps, Secrets, Endpoints. + _swallow_404( + lambda: self.core_api.delete_collection_namespaced_config_map( + namespace=namespace, label_selector=label_selector + ) + ) + _swallow_404( + lambda: self.core_api.delete_collection_namespaced_secret( + namespace=namespace, label_selector=label_selector + ) + ) + # Endpoints usually GC with Services, but delete explicitly for + # external-services Endpoints we create directly. + try: + eps = self.core_api.list_namespaced_endpoints( + namespace=namespace, label_selector=label_selector + ) + for ep in eps.items: + _swallow_404( + lambda n=ep.metadata.name: self.core_api.delete_namespaced_endpoints( + name=n, namespace=namespace + ) + ) + except ApiException as e: + if e.status != 404: + raise + # Lingering Pods (shouldn't exist after Deployment/Job deletion, + # but handles standalone pods if any were created). + _swallow_404( + lambda: self.core_api.delete_collection_namespaced_pod( + namespace=namespace, label_selector=label_selector + ) + ) + + if delete_volumes: + # Namespaced PVCs. + _swallow_404( + lambda: self.core_api.delete_collection_namespaced_persistent_volume_claim( + namespace=namespace, label_selector=label_selector + ) + ) + # Cluster-scoped PVs. + try: + pvs = self.core_api.list_persistent_volume( + label_selector=label_selector + ) for pv in pvs.items: if opts.o.debug: print(f"Deleting PV: {pv.metadata.name}") @@ -922,19 +1046,6 @@ class K8sDeployer(Deployer): if opts.o.debug: print(f"Error listing PVs: {e}") - # Delete the namespace to ensure clean slate. - # Resources created by older laconic-so versions lack labels, so - # label-based deletion can't find them. Namespace deletion is the - # only reliable cleanup. - self._delete_namespace() - # Wait for namespace to finish terminating before returning, - # so that up() can recreate it immediately. - self._wait_for_namespace_gone() - - if self.is_kind() and not self.skip_cluster_management: - # Destroy the kind cluster - destroy_cluster(self.kind_cluster_name) - def status(self): self.connect_api() # Call whatever API we need to get the running container list