From fb69cc58ffd015e187d208910b233c140128046c Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Tue, 3 Mar 2026 05:28:52 +0000
Subject: [PATCH 01/19] feat(k8s): map compose service ports to Kind
 extraPortMappings and support hostNetwork

Kind's extraPortMappings only included ports 80/443 for Caddy. Compose
service ports (RPC, gossip, UDP) were never forwarded, making them
unreachable from the host. Also adds hostNetwork/dnsPolicy to the k8s
pod spec when any compose service uses network_mode: host.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/cluster_info.py | 13 ++++++++++
 stack_orchestrator/deploy/k8s/helpers.py      | 24 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index da24bdc2..161fbd03 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -394,6 +394,14 @@ class ClusterInfo:
             result.append(pv)
         return result
 
+    def _any_service_has_host_network(self):
+        for pod_name in self.parsed_pod_yaml_map:
+            pod = self.parsed_pod_yaml_map[pod_name]
+            for svc in pod.get("services", {}).values():
+                if svc.get("network_mode") == "host":
+                    return True
+        return False
+
     # TODO: put things like image pull policy into an object-scope struct
     def get_deployment(self, image_pull_policy: Optional[str] = None):
         containers = []
@@ -568,6 +576,7 @@ class ClusterInfo:
                     )
                 )
 
+        use_host_network = self._any_service_has_host_network()
         template = client.V1PodTemplateSpec(
             metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
             spec=client.V1PodSpec(
@@ -577,6 +586,10 @@ class ClusterInfo:
                 affinity=affinity,
                 tolerations=tolerations,
                 runtime_class_name=self.spec.get_runtime_class(),
+                host_network=use_host_network or None,
+                dns_policy=(
+                    "ClusterFirstWithHostNet" if use_host_network else None
+                ),
             ),
         )
         spec = client.V1DeploymentSpec(
diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 8b367f86..4d9cbe3b 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -683,11 +683,35 @@ def _generate_kind_port_mappings_from_services(parsed_pod_files):
 
 def _generate_kind_port_mappings(parsed_pod_files):
     port_definitions = []
+    seen = set()
     # Map port 80 and 443 for the Caddy ingress controller (HTTPS support)
     for port_string in ["80", "443"]:
         port_definitions.append(
             f"  - containerPort: {port_string}\n    hostPort: {port_string}\n"
         )
+        seen.add((port_string, "TCP"))
+    # Map ports declared in compose services
+    for pod in parsed_pod_files:
+        parsed_pod_file = parsed_pod_files[pod]
+        if "services" in parsed_pod_file:
+            for service_name in parsed_pod_file["services"]:
+                service_obj = parsed_pod_file["services"][service_name]
+                for port_entry in service_obj.get("ports", []):
+                    port_str = str(port_entry)
+                    protocol = "TCP"
+                    if "/" in port_str:
+                        port_str, proto = port_str.split("/", 1)
+                        protocol = proto.upper()
+                    if ":" in port_str:
+                        port_str = port_str.split(":")[-1]
+                    port_num = port_str.strip("'\"")
+                    if (port_num, protocol) not in seen:
+                        seen.add((port_num, protocol))
+                        port_definitions.append(
+                            f"  - containerPort: {port_num}\n"
+                            f"    hostPort: {port_num}\n"
+                            f"    protocol: {protocol}\n"
+                        )
     return (
         ""
         if len(port_definitions) == 0

From f305214ce1b6a545c2d5c1533e7a3b5e6b391f01 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Tue, 3 Mar 2026 05:28:55 +0000
Subject: [PATCH 02/19] add local test runner script

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/scripts/run-test-local.sh | 53 +++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100755 tests/scripts/run-test-local.sh

diff --git a/tests/scripts/run-test-local.sh b/tests/scripts/run-test-local.sh
new file mode 100755
index 00000000..f6f32346
--- /dev/null
+++ b/tests/scripts/run-test-local.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Run a test suite locally in an isolated venv.
+#
+# Usage:
+#   ./tests/scripts/run-test-local.sh <test-script>
+#
+# Examples:
+#   ./tests/scripts/run-test-local.sh tests/webapp-test/run-webapp-test.sh
+#   ./tests/scripts/run-test-local.sh tests/smoke-test/run-smoke-test.sh
+#   ./tests/scripts/run-test-local.sh tests/k8s-deploy/run-deploy-test.sh
+#
+# The script creates a temporary venv, installs shiv, builds the laconic-so
+# package, runs the requested test, then cleans up.
+
+set -euo pipefail
+
+if [ $# -lt 1 ]; then
+  echo "Usage: $0 <test-script> [args...]"
+  exit 1
+fi
+
+TEST_SCRIPT="$1"
+shift
+
+if [ ! -f "$TEST_SCRIPT" ]; then
+  echo "Error: $TEST_SCRIPT not found"
+  exit 1
+fi
+
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+VENV_DIR=$(mktemp -d /tmp/so-test-XXXXXX)
+
+cleanup() {
+  echo "Cleaning up venv: $VENV_DIR"
+  rm -rf "$VENV_DIR"
+}
+trap cleanup EXIT
+
+cd "$REPO_DIR"
+
+echo "==> Creating venv in $VENV_DIR"
+python3 -m venv "$VENV_DIR"
+source "$VENV_DIR/bin/activate"
+
+echo "==> Installing shiv"
+pip install -q shiv
+
+echo "==> Building laconic-so package"
+./scripts/create_build_tag_file.sh
+./scripts/build_shiv_package.sh
+
+echo "==> Running: $TEST_SCRIPT $*"
+exec "./$TEST_SCRIPT" "$@"

From 7cd5043a835ff8866b58bf7fd8e3173aaddb89af Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Wed, 4 Mar 2026 16:41:16 +0000
Subject: [PATCH 03/19] feat(k8s): add kind-mount-root for unified kind
 extraMount

When kind-mount-root is set in spec.yml, emit a single extraMount
mapping the root to /mnt instead of per-volume mounts. This allows
adding new volumes without recreating the kind cluster.

Volumes whose host path is under the root are skipped for individual
extraMounts and their PV paths resolve to /mnt/{relative_path}.
Volumes outside the root keep individual extraMounts as before.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/constants.py               |  1 +
 stack_orchestrator/deploy/k8s/cluster_info.py |  6 +++++-
 stack_orchestrator/deploy/k8s/helpers.py      | 21 ++++++++++++++++++-
 stack_orchestrator/deploy/spec.py             |  3 +++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/stack_orchestrator/constants.py b/stack_orchestrator/constants.py
index 75bd0ebc..2c0c8de0 100644
--- a/stack_orchestrator/constants.py
+++ b/stack_orchestrator/constants.py
@@ -45,3 +45,4 @@ runtime_class_key = "runtime-class"
 high_memlock_runtime = "high-memlock"
 high_memlock_spec_filename = "high-memlock-spec.json"
 acme_email_key = "acme-email"
+kind_mount_root_key = "kind-mount-root"
diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index 161fbd03..dc967469 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -371,7 +371,11 @@ class ClusterInfo:
 
             if self.spec.is_kind_deployment():
                 host_path = client.V1HostPathVolumeSource(
-                    path=get_kind_pv_bind_mount_path(volume_name)
+                    path=get_kind_pv_bind_mount_path(
+                        volume_name,
+                        kind_mount_root=self.spec.get_kind_mount_root(),
+                        host_path=volume_path,
+                    )
                 )
             else:
                 host_path = client.V1HostPathVolumeSource(path=volume_path)
diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 4d9cbe3b..a316f0c7 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -440,7 +440,11 @@ def named_volumes_from_pod_files(parsed_pod_files):
     return named_volumes
 
 
-def get_kind_pv_bind_mount_path(volume_name: str):
+def get_kind_pv_bind_mount_path(volume_name: str, kind_mount_root: Optional[str] = None,
+                                host_path: Optional[str] = None):
+    if kind_mount_root and host_path and host_path.startswith(kind_mount_root):
+        rel = os.path.relpath(host_path, kind_mount_root)
+        return f"/mnt/{rel}"
     return f"/mnt/{volume_name}"
 
 
@@ -563,6 +567,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
     volume_definitions = []
     volume_host_path_map = _get_host_paths_for_volumes(deployment_context)
     seen_host_path_mounts = set()  # Track to avoid duplicate mounts
+    kind_mount_root = deployment_context.spec.get_kind_mount_root()
 
     # Cluster state backup for offline data recovery (unique per deployment)
     # etcd contains all k8s state; PKI certs needed to decrypt etcd offline
@@ -583,6 +588,17 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
         f"  - hostPath: {pki_host_path}\n" f"    containerPath: /etc/kubernetes/pki\n"
     )
 
+    # When kind-mount-root is set, emit a single extraMount for the root.
+    # Individual volumes whose host path starts with the root are covered
+    # by this single mount and don't need their own extraMount entries.
+    mount_root_emitted = False
+    if kind_mount_root:
+        volume_definitions.append(
+            f"  - hostPath: {kind_mount_root}\n"
+            f"    containerPath: /mnt\n"
+        )
+        mount_root_emitted = True
+
     # Note these paths are relative to the location of the pod files (at present)
     # So we need to fix up to make them correct and absolute because kind assumes
     # relative to the cwd.
@@ -642,6 +658,9 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
                                         volume_host_path_map[volume_name],
                                         deployment_dir,
                                     )
+                                    # Skip individual extraMount if covered by mount root
+                                    if mount_root_emitted and str(host_path).startswith(kind_mount_root):
+                                        continue
                                     container_path = get_kind_pv_bind_mount_path(
                                         volume_name
                                     )
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index e5647b04..c62d0aea 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -202,5 +202,8 @@ class Spec:
     def is_kind_deployment(self):
         return self.get_deployment_type() in [constants.k8s_kind_deploy_type]
 
+    def get_kind_mount_root(self):
+        return self.obj.get(constants.kind_mount_root_key)
+
     def is_docker_deployment(self):
         return self.get_deployment_type() in [constants.compose_deploy_type]

From 26dea540e98cf145ab9b83a5cfb648d56334f8e8 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Wed, 4 Mar 2026 17:13:08 +0000
Subject: [PATCH 04/19] fix(k8s): use deployment namespace for pod and
 container lookups

pods_in_deployment() and containers_in_pod() were hardcoded to search
the "default" namespace, but deployments are created in a per-deployment
namespace (laconic-{name}). This caused logs() to report "Pods not
running" even when pods were healthy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/deploy_k8s.py | 4 ++--
 stack_orchestrator/deploy/k8s/helpers.py    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index f7f8ad43..58801d33 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -574,14 +574,14 @@ class K8sDeployer(Deployer):
 
     def logs(self, services, tail, follow, stream):
         self.connect_api()
-        pods = pods_in_deployment(self.core_api, self.cluster_info.app_name)
+        pods = pods_in_deployment(self.core_api, self.cluster_info.app_name, namespace=self.k8s_namespace)
         if len(pods) > 1:
             print("Warning: more than one pod in the deployment")
         if len(pods) == 0:
             log_data = "******* Pods not running ********\n"
         else:
             k8s_pod_name = pods[0]
-            containers = containers_in_pod(self.core_api, k8s_pod_name)
+            containers = containers_in_pod(self.core_api, k8s_pod_name, namespace=self.k8s_namespace)
             # If pod not started, logs request below will throw an exception
             try:
                 log_data = ""
diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index a316f0c7..c7b9703d 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -393,10 +393,10 @@ def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]):
             raise DeployerException(f"kind load docker-image failed: {result}")
 
 
-def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str):
+def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str, namespace: str = "default"):
     pods = []
     pod_response = core_api.list_namespaced_pod(
-        namespace="default", label_selector=f"app={deployment_name}"
+        namespace=namespace, label_selector=f"app={deployment_name}"
     )
     if opts.o.debug:
         print(f"pod_response: {pod_response}")
@@ -406,10 +406,10 @@ def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str):
     return pods
 
 
-def containers_in_pod(core_api: client.CoreV1Api, pod_name: str) -> List[str]:
+def containers_in_pod(core_api: client.CoreV1Api, pod_name: str, namespace: str = "default") -> List[str]:
     containers: List[str] = []
     pod_response = cast(
-        client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace="default")
+        client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace=namespace)
     )
     if opts.o.debug:
         print(f"pod_response: {pod_response}")

From d090f2064e60ac0e1ffa10069b7cdcb437b0602b Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 08:47:12 +0000
Subject: [PATCH 05/19] docs: annotate spec.yml config layering conventions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compose file owns application defaults. spec.yml config: section is for
deployment-specific overrides only (hostnames, IPs, secrets). Start
scripts should not have their own defaults — they read what the compose
file provides.

Annotations added:
- CLAUDE.md: config layering table and anti-pattern callout
- spec.py: Spec class docstring with good/bad config examples
- deployment_create.py: _write_config_file docstring

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md                                     | 27 +++++++++++++++++
 .../deploy/deployment_create.py               | 12 ++++++++
 stack_orchestrator/deploy/spec.py             | 29 +++++++++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index 845cbd22..33054f8d 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -114,6 +114,33 @@ One Kind cluster per host by design. Never request or expect separate clusters.
 - `helpers.py`: `create_cluster()`, etcd cleanup, kind operations
 - `cluster_info.py`: K8s resource generation (Deployment, Service, Ingress)
 
+## spec.yml: Config Layering
+
+**The compose file is the single source of truth for application defaults.**
+
+The configuration chain is: compose defaults → spec.yml overrides → container env.
+
+| Layer | Owns | Example |
+|-------|------|---------|
+| **compose file** | All env vars and their defaults | `RPC_PORT: ${RPC_PORT:-8899}` |
+| **spec.yml config:** | Deployment-specific overrides only | `GOSSIP_HOST: 10.0.0.1` |
+| **start script** | Reads env vars, no defaults of its own | `${RPC_PORT}` |
+
+**What goes in spec.yml config:**
+- Values unique to this deployment (hostnames, IPs, endpoints)
+- Secrets (`$generate:hex:32$`)
+- Overrides that differ from the compose default for this specific deployment
+
+**What does NOT go in spec.yml config:**
+- Application defaults (ports, log levels, intervals, feature flags)
+- Values that would be the same across all deployments of this stack
+- Every env var the service accepts — that's the compose file's job
+
+**Anti-pattern:** Dumping all env vars from the compose file into spec.yml.
+This creates three sources of truth (compose, spec, start script) that
+inevitably diverge. If someone changes the default in the compose file,
+spec.yml still has the old value and silently overrides it.
+
 ## Insights and Observations
 
 ### Design Principles
diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py
index 511445be..0e2effc2 100644
--- a/stack_orchestrator/deploy/deployment_create.py
+++ b/stack_orchestrator/deploy/deployment_create.py
@@ -639,6 +639,18 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]:
 def _write_config_file(
     spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None
 ):
+    """Write spec.yml config: entries to config.env.
+
+    The config: section in spec.yml should contain only deployment-specific
+    overrides — values that differ between deployments (hostnames, endpoints,
+    credentials, secrets via $generate:...$).
+
+    Application defaults (ports, log levels, feature flags, tuning params)
+    belong in the compose file's environment section. The compose file is
+    the single source of truth for what env vars a service accepts and
+    their default values. spec.yml overrides those defaults for a specific
+    deployment.
+    """
     spec_content = get_parsed_deployment_spec(spec_file)
     config_vars = spec_content.get("config", {}) or {}
 
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index c62d0aea..072b035c 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -73,6 +73,35 @@ class Resources:
 
 
 class Spec:
+    """Deployment spec (spec.yml) — describes WHERE and HOW to deploy a stack.
+
+    A spec.yml contains deployment-specific infrastructure configuration:
+      - stack: path to the stack definition
+      - deploy-to: target platform (k8s-kind, k8s, compose)
+      - network: ports, http-proxy, acme-email
+      - resources: CPU/memory limits and reservations
+      - security: privileged, capabilities, memlock
+      - volumes: host path mappings for persistent data
+      - configmaps: directories mounted as k8s ConfigMaps
+      - config: deployment-specific env var OVERRIDES (see below)
+
+    The config: section is for deployment-specific values only — things
+    that differ between deployments (hostnames, endpoints, secrets).
+    Application defaults belong in the compose file's environment section,
+    not here. If a value would be the same across all deployments of this
+    stack, it belongs in the compose file, not in spec.yml.
+
+    Good config: entries (deployment-specific):
+      VALIDATOR_ENTRYPOINT: my-cluster.example.com:8001
+      PUBLIC_RPC_ADDRESS: my-node.example.com:8899
+      GOSSIP_HOST: 10.0.0.1
+
+    Bad config: entries (these are application defaults):
+      RPC_PORT: '8899'          # same everywhere, belongs in compose
+      LIMIT_LEDGER_SIZE: '50000000'  # same everywhere, belongs in compose
+      RUST_LOG: info             # same everywhere, belongs in compose
+    """
+
     obj: typing.Any
     file_path: Optional[Path]
 

From d4dcbedd48bd3fe984fbc3fe17c6a51258aed6f5 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 09:55:24 +0000
Subject: [PATCH 06/19] bug: deploy create doesn't auto-generate volume
 mappings for new pods

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 TODO.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/TODO.md b/TODO.md
index 349530c8..65439ab5 100644
--- a/TODO.md
+++ b/TODO.md
@@ -7,6 +7,25 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio
 
 **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment.
 
+## Bugs
+
+### `deploy create` doesn't auto-generate volume mappings for new pods
+
+When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create`
+does not generate default host path mappings in spec.yml for the new pod's
+volumes. The deployment then fails at scheduling because the PVCs don't exist.
+
+**Expected**: `deploy create` enumerates all volumes from all compose files
+in the stack and generates default host paths for any that aren't already
+mapped in the spec.yml `volumes:` section.
+
+**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently
+missing, causing `FailedScheduling: persistentvolumeclaim not found`.
+
+**Workaround**: Manually add volume entries to spec.yml and create host dirs.
+
+**Files**: `deployment_create.py` (`_write_config_file`, volume handling)
+
 ## Architecture Refactoring
 
 ### Separate Deployer from Stack Orchestrator CLI

From 8a8b882e322b19bae3190b20845b4b0245469927 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 09:56:28 +0000
Subject: [PATCH 07/19] bug: deploy create doesn't auto-generate volume
 mappings for new pods

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 TODO.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/TODO.md b/TODO.md
index 349530c8..65439ab5 100644
--- a/TODO.md
+++ b/TODO.md
@@ -7,6 +7,25 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio
 
 **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment.
 
+## Bugs
+
+### `deploy create` doesn't auto-generate volume mappings for new pods
+
+When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create`
+does not generate default host path mappings in spec.yml for the new pod's
+volumes. The deployment then fails at scheduling because the PVCs don't exist.
+
+**Expected**: `deploy create` enumerates all volumes from all compose files
+in the stack and generates default host paths for any that aren't already
+mapped in the spec.yml `volumes:` section.
+
+**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently
+missing, causing `FailedScheduling: persistentvolumeclaim not found`.
+
+**Workaround**: Manually add volume entries to spec.yml and create host dirs.
+
+**Files**: `deployment_create.py` (`_write_config_file`, volume handling)
+
 ## Architecture Refactoring
 
 ### Separate Deployer from Stack Orchestrator CLI

From eae4c3cdffac48d2b9c8ea685dc634db1de17060 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 10:26:10 +0000
Subject: [PATCH 08/19] feat(k8s): per-service resource layering in deployer

Resolve container resources using layered priority:
1. spec.yml per-container override (resources.containers.<name>)
2. Compose file deploy.resources block
3. spec.yml global resources
4. DEFAULT_CONTAINER_RESOURCES fallback

This prevents monitoring sidecars from inheriting the validator's
resource requests (e.g., 256G memory). Each service gets appropriate
resources from its compose definition unless explicitly overridden.

Note: existing deployments with a global resources block in spec.yml
can remove it once compose files declare per-service defaults.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/cluster_info.py | 44 +++++++++++++++++--
 stack_orchestrator/deploy/spec.py             | 21 +++++++++
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index da24bdc2..2ebf96f2 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -394,13 +394,43 @@ class ClusterInfo:
             result.append(pv)
         return result
 
+    def _any_service_has_host_network(self):
+        for pod_name in self.parsed_pod_yaml_map:
+            pod = self.parsed_pod_yaml_map[pod_name]
+            for svc in pod.get("services", {}).values():
+                if svc.get("network_mode") == "host":
+                    return True
+        return False
+
+    def _resolve_container_resources(
+        self, container_name: str, service_info: dict, global_resources: Resources
+    ) -> Resources:
+        """Resolve resources for a container using layered priority.
+
+        Priority: spec per-container > compose deploy.resources
+        > spec global > DEFAULT
+        """
+        # 1. Check spec.yml for per-container override
+        per_container = self.spec.get_container_resources_for(container_name)
+        if per_container:
+            return per_container
+
+        # 2. Check compose service_info for deploy.resources
+        deploy_block = service_info.get("deploy", {})
+        compose_resources = deploy_block.get("resources", {}) if deploy_block else {}
+        if compose_resources:
+            return Resources(compose_resources)
+
+        # 3. Fall back to spec.yml global (already resolved with DEFAULT fallback)
+        return global_resources
+
     # TODO: put things like image pull policy into an object-scope struct
     def get_deployment(self, image_pull_policy: Optional[str] = None):
         containers = []
         services = {}
-        resources = self.spec.get_container_resources()
-        if not resources:
-            resources = DEFAULT_CONTAINER_RESOURCES
+        global_resources = self.spec.get_container_resources()
+        if not global_resources:
+            global_resources = DEFAULT_CONTAINER_RESOURCES
         for pod_name in self.parsed_pod_yaml_map:
             pod = self.parsed_pod_yaml_map[pod_name]
             services = pod["services"]
@@ -483,6 +513,9 @@ class ClusterInfo:
                         )
                     )
                 ]
+                container_resources = self._resolve_container_resources(
+                    container_name, service_info, global_resources
+                )
                 container = client.V1Container(
                     name=container_name,
                     image=image_to_use,
@@ -501,7 +534,7 @@ class ClusterInfo:
                         if self.spec.get_capabilities()
                         else None,
                     ),
-                    resources=to_k8s_resource_requirements(resources),
+                    resources=to_k8s_resource_requirements(container_resources),
                 )
                 containers.append(container)
         volumes = volumes_for_pod_files(
@@ -568,6 +601,7 @@ class ClusterInfo:
                     )
                 )
 
+        use_host_network = self._any_service_has_host_network()
         template = client.V1PodTemplateSpec(
             metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
             spec=client.V1PodSpec(
@@ -577,6 +611,8 @@ class ClusterInfo:
                 affinity=affinity,
                 tolerations=tolerations,
                 runtime_class_name=self.spec.get_runtime_class(),
+                host_network=use_host_network or None,
+                dns_policy=("ClusterFirstWithHostNet" if use_host_network else None),
             ),
         )
         spec = client.V1DeploymentSpec(
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index e5647b04..bd62779e 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -120,6 +120,27 @@ class Spec:
             self.obj.get(constants.resources_key, {}).get("containers", {})
         )
 
+    def get_container_resources_for(
+        self, container_name: str
+    ) -> typing.Optional[Resources]:
+        """Look up per-container resource overrides from spec.yml.
+
+        Checks resources.containers.<container_name> in the spec. Returns None
+        if no per-container override exists (caller falls back to other sources).
+        """
+        containers_block = self.obj.get(constants.resources_key, {}).get(
+            "containers", {}
+        )
+        if container_name in containers_block:
+            entry = containers_block[container_name]
+            # Only treat it as a per-container override if it's a dict with
+            # reservations/limits nested inside (not a top-level global key)
+            if isinstance(entry, dict) and (
+                "reservations" in entry or "limits" in entry
+            ):
+                return Resources(entry)
+        return None
+
     def get_volume_resources(self):
         return Resources(
             self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})

From b6d6ad81455d91479884f2ffd05af41c8dc1d339 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Wed, 4 Mar 2026 16:41:16 +0000
Subject: [PATCH 09/19] feat(k8s): add kind-mount-root for unified kind
 extraMount

When kind-mount-root is set in spec.yml, emit a single extraMount
mapping the root to /mnt instead of per-volume mounts. This allows
adding new volumes without recreating the kind cluster.

Volumes whose host path is under the root are skipped for individual
extraMounts and their PV paths resolve to /mnt/{relative_path}.
Volumes outside the root keep individual extraMounts as before.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/constants.py               |  1 +
 stack_orchestrator/deploy/k8s/cluster_info.py |  6 +++++-
 stack_orchestrator/deploy/k8s/helpers.py      | 21 ++++++++++++++++++-
 stack_orchestrator/deploy/spec.py             |  3 +++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/stack_orchestrator/constants.py b/stack_orchestrator/constants.py
index 75bd0ebc..2c0c8de0 100644
--- a/stack_orchestrator/constants.py
+++ b/stack_orchestrator/constants.py
@@ -45,3 +45,4 @@ runtime_class_key = "runtime-class"
 high_memlock_runtime = "high-memlock"
 high_memlock_spec_filename = "high-memlock-spec.json"
 acme_email_key = "acme-email"
+kind_mount_root_key = "kind-mount-root"
diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index 2ebf96f2..818ffa25 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -371,7 +371,11 @@ class ClusterInfo:
 
             if self.spec.is_kind_deployment():
                 host_path = client.V1HostPathVolumeSource(
-                    path=get_kind_pv_bind_mount_path(volume_name)
+                    path=get_kind_pv_bind_mount_path(
+                        volume_name,
+                        kind_mount_root=self.spec.get_kind_mount_root(),
+                        host_path=volume_path,
+                    )
                 )
             else:
                 host_path = client.V1HostPathVolumeSource(path=volume_path)
diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 8b367f86..95e53d73 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -440,7 +440,11 @@ def named_volumes_from_pod_files(parsed_pod_files):
     return named_volumes
 
 
-def get_kind_pv_bind_mount_path(volume_name: str):
+def get_kind_pv_bind_mount_path(volume_name: str, kind_mount_root: Optional[str] = None,
+                                host_path: Optional[str] = None):
+    if kind_mount_root and host_path and host_path.startswith(kind_mount_root):
+        rel = os.path.relpath(host_path, kind_mount_root)
+        return f"/mnt/{rel}"
     return f"/mnt/{volume_name}"
 
 
@@ -563,6 +567,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
     volume_definitions = []
     volume_host_path_map = _get_host_paths_for_volumes(deployment_context)
     seen_host_path_mounts = set()  # Track to avoid duplicate mounts
+    kind_mount_root = deployment_context.spec.get_kind_mount_root()
 
     # Cluster state backup for offline data recovery (unique per deployment)
     # etcd contains all k8s state; PKI certs needed to decrypt etcd offline
@@ -583,6 +588,17 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
         f"  - hostPath: {pki_host_path}\n" f"    containerPath: /etc/kubernetes/pki\n"
     )
 
+    # When kind-mount-root is set, emit a single extraMount for the root.
+    # Individual volumes whose host path starts with the root are covered
+    # by this single mount and don't need their own extraMount entries.
+    mount_root_emitted = False
+    if kind_mount_root:
+        volume_definitions.append(
+            f"  - hostPath: {kind_mount_root}\n"
+            f"    containerPath: /mnt\n"
+        )
+        mount_root_emitted = True
+
     # Note these paths are relative to the location of the pod files (at present)
     # So we need to fix up to make them correct and absolute because kind assumes
     # relative to the cwd.
@@ -642,6 +658,9 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
                                         volume_host_path_map[volume_name],
                                         deployment_dir,
                                     )
+                                    # Skip individual extraMount if covered by mount root
+                                    if mount_root_emitted and str(host_path).startswith(kind_mount_root):
+                                        continue
                                     container_path = get_kind_pv_bind_mount_path(
                                         volume_name
                                     )
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index bd62779e..c4cde6f8 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -223,5 +223,8 @@ class Spec:
     def is_kind_deployment(self):
         return self.get_deployment_type() in [constants.k8s_kind_deploy_type]
 
+    def get_kind_mount_root(self):
+        return self.obj.get(constants.kind_mount_root_key)
+
     def is_docker_deployment(self):
         return self.get_deployment_type() in [constants.compose_deploy_type]

From 929bdab8a421a96f0ef3bb4db6ad0b8a5cfea383 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 12:58:04 +0000
Subject: [PATCH 10/19] fix(k8s): add HostToContainer mount propagation to
 kind-mount-root

The kind-mount-root extraMount entry used kind's default propagation
(None), so new bind mounts under the root on the host (e.g. zvols
mounted under /srv/kind) were not visible inside the kind node until
restart. Setting propagation to HostToContainer makes host-side mount
changes propagate into the kind node automatically.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/helpers.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 95e53d73..ec136233 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -440,8 +440,11 @@ def named_volumes_from_pod_files(parsed_pod_files):
     return named_volumes
 
 
-def get_kind_pv_bind_mount_path(volume_name: str, kind_mount_root: Optional[str] = None,
-                                host_path: Optional[str] = None):
+def get_kind_pv_bind_mount_path(
+    volume_name: str,
+    kind_mount_root: Optional[str] = None,
+    host_path: Optional[str] = None,
+):
     if kind_mount_root and host_path and host_path.startswith(kind_mount_root):
         rel = os.path.relpath(host_path, kind_mount_root)
         return f"/mnt/{rel}"
@@ -596,6 +599,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
         volume_definitions.append(
             f"  - hostPath: {kind_mount_root}\n"
             f"    containerPath: /mnt\n"
+            f"    propagation: HostToContainer\n"
         )
         mount_root_emitted = True
 
@@ -658,8 +662,10 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
                                         volume_host_path_map[volume_name],
                                         deployment_dir,
                                     )
-                                    # Skip individual extraMount if covered by mount root
-                                    if mount_root_emitted and str(host_path).startswith(kind_mount_root):
+                                    # Skip if covered by mount root
+                                    if mount_root_emitted and str(host_path).startswith(
+                                        kind_mount_root
+                                    ):
                                         continue
                                     container_path = get_kind_pv_bind_mount_path(
                                         volume_name

From a11d40f2f340011f06d918c6552e1ac4ba8ad360 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 13:07:12 +0000
Subject: [PATCH 11/19] fix(k8s): add HostToContainer mount propagation to kind
 extraMounts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without propagation, rbind submounts on the host (e.g., XFS zvol at
/srv/kind/solana) are invisible inside the kind node — it sees the
underlying filesystem (ZFS) instead. This causes agave's io_uring to
deadlock on ZFS transaction commits (D-state in dsl_dir_tempreserve_space).

HostToContainer propagation ensures host submounts propagate into the
kind node, so /mnt/solana correctly resolves to the XFS zvol.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/helpers.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 8b367f86..ac4e8603 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -573,14 +573,18 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
         Path(f"./data/{backup_subdir}/etcd"), deployment_dir
     )
     volume_definitions.append(
-        f"  - hostPath: {etcd_host_path}\n" f"    containerPath: /var/lib/etcd\n"
+        f"  - hostPath: {etcd_host_path}\n"
+        f"    containerPath: /var/lib/etcd\n"
+        f"    propagation: HostToContainer\n"
     )
 
     pki_host_path = _make_absolute_host_path(
         Path(f"./data/{backup_subdir}/pki"), deployment_dir
     )
     volume_definitions.append(
-        f"  - hostPath: {pki_host_path}\n" f"    containerPath: /etc/kubernetes/pki\n"
+        f"  - hostPath: {pki_host_path}\n"
+        f"    containerPath: /etc/kubernetes/pki\n"
+        f"    propagation: HostToContainer\n"
     )
 
     # Note these paths are relative to the location of the pod files (at present)
@@ -621,6 +625,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
                                 volume_definitions.append(
                                     f"  - hostPath: {host_path}\n"
                                     f"    containerPath: {container_path}\n"
+                                    f"    propagation: HostToContainer\n"
                                 )
                                 if opts.o.debug:
                                     print(f"Added host path mount: {host_path}")
@@ -648,6 +653,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context):
                                     volume_definitions.append(
                                         f"  - hostPath: {host_path}\n"
                                         f"    containerPath: {container_path}\n"
+                                        f"    propagation: HostToContainer\n"
                                     )
     return (
         ""
@@ -703,7 +709,11 @@ def _generate_high_memlock_spec_mount(deployment_dir: Path):
     references an absolute path.
     """
     spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve()
-    return f"  - hostPath: {spec_path}\n" f"    containerPath: {spec_path}\n"
+    return (
+        f"  - hostPath: {spec_path}\n"
+        f"    containerPath: {spec_path}\n"
+        f"    propagation: HostToContainer\n"
+    )
 
 
 def generate_high_memlock_spec_json():

From 7f205732f2eadf01548ba2839a530d3cbce58ddb Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sat, 7 Mar 2026 17:56:13 +0000
Subject: [PATCH 12/19] fix(k8s): expand etcd cleanup whitelist to preserve
 core cluster services

_clean_etcd_keeping_certs() only preserved /registry/secrets/caddy-system,
deleting everything else including the kubernetes ClusterIP service in the
default namespace. When kind recreated the cluster with the cleaned etcd,
kube-apiserver saw existing data and skipped bootstrapping the service.
kindnet panicked on KUBERNETES_SERVICE_HOST missing, blocking all pod
networking.

Expand the whitelist to also preserve:
- /registry/services/specs/default/kubernetes
- /registry/services/endpoints/default/kubernetes

Loop over multiple prefixes instead of a single etcdctl get --prefix call.

See docs/bug-laconic-so-etcd-cleanup.md in biscayne-agave-runbook.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/helpers.py | 28 +++++++++++++++++++-----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index ac4e8603..85f3d5f7 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -148,8 +148,16 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
     etcd_image = "gcr.io/etcd-development/etcd:v3.5.9"
     temp_dir = "/tmp/laconic-etcd-cleanup"
 
-    # Whitelist: prefixes to KEEP - everything else gets deleted
-    keep_prefixes = "/registry/secrets/caddy-system"
+    # Whitelist: prefixes to KEEP - everything else gets deleted.
+    # Must include core cluster resources (kubernetes service, kube-system
+    # secrets) or kindnet panics on restart — KUBERNETES_SERVICE_HOST is
+    # injected from the kubernetes ClusterIP service in default namespace.
+    keep_prefixes = [
+        "/registry/secrets/caddy-system",
+        "/registry/services/specs/default/kubernetes",
+        "/registry/services/endpoints/default/kubernetes",
+    ]
+    keep_prefixes_str = " ".join(keep_prefixes)
 
     # The etcd image is distroless (no shell). We extract the statically-linked
     # etcdctl binary and run it from alpine which has shell + jq support.
@@ -195,13 +203,21 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
         sleep 3
 
         # Use alpine with extracted etcdctl to run commands (alpine has shell + jq)
-        # Export caddy secrets
+        # Export whitelisted keys (caddy TLS certs + core cluster services)
         docker run --rm \
             -v {temp_dir}:/backup \
             --network container:laconic-etcd-cleanup \
-            $ALPINE_IMAGE sh -c \
-            '/backup/etcdctl get --prefix "{keep_prefixes}" -w json \
-                > /backup/kept.json 2>/dev/null || echo "{{}}" > /backup/kept.json'
+            $ALPINE_IMAGE sh -c '
+                apk add --no-cache jq >/dev/null 2>&1
+                echo "[]" > /backup/all-kvs.json
+                for prefix in {keep_prefixes_str}; do
+                    /backup/etcdctl get --prefix "$prefix" -w json 2>/dev/null \
+                        | jq ".kvs // []" >> /backup/all-kvs.json || true
+                done
+                jq -s "add" /backup/all-kvs.json \
+                    | jq "{{kvs: .}}" > /backup/kept.json 2>/dev/null \
+                    || echo "{{}}" > /backup/kept.json
+            '
 
         # Delete ALL registry keys
         docker run --rm \

From 806c1bb723f3a627a6efcee4414a5459ce8ba860 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 02:33:20 +0000
Subject: [PATCH 13/19] refactor: rename `deployment update` to `deployment
 update-envs`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The update command only patches environment variables and adds a
restart annotation. It does not update ports, volumes, configmaps,
or any other deployment spec. The old name was misleading — it
implied a full spec update, causing operators to expect changes
that never took effect.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/compose/deploy_docker.py | 2 +-
 stack_orchestrator/deploy/deploy.py                | 4 ++--
 stack_orchestrator/deploy/deployer.py              | 2 +-
 stack_orchestrator/deploy/deployment.py            | 8 ++++----
 stack_orchestrator/deploy/k8s/deploy_k8s.py        | 2 +-
 stack_orchestrator/deploy/webapp/util.py           | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/stack_orchestrator/deploy/compose/deploy_docker.py b/stack_orchestrator/deploy/compose/deploy_docker.py
index c6397aad..fa0ac1d4 100644
--- a/stack_orchestrator/deploy/compose/deploy_docker.py
+++ b/stack_orchestrator/deploy/compose/deploy_docker.py
@@ -62,7 +62,7 @@ class DockerDeployer(Deployer):
             except DockerException as e:
                 raise DeployerException(e)
 
-    def update(self):
+    def update_envs(self):
         if not opts.o.dry_run:
             try:
                 return self.docker.compose.restart()
diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py
index 86c1856c..f2bf977c 100644
--- a/stack_orchestrator/deploy/deploy.py
+++ b/stack_orchestrator/deploy/deploy.py
@@ -182,8 +182,8 @@ def status_operation(ctx):
     ctx.obj.deployer.status()
 
 
-def update_operation(ctx):
-    ctx.obj.deployer.update()
+def update_envs_operation(ctx):
+    ctx.obj.deployer.update_envs()
 
 
 def ps_operation(ctx):
diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py
index d8fb656b..11fb6592 100644
--- a/stack_orchestrator/deploy/deployer.py
+++ b/stack_orchestrator/deploy/deployer.py
@@ -28,7 +28,7 @@ class Deployer(ABC):
         pass
 
     @abstractmethod
-    def update(self):
+    def update_envs(self):
         pass
 
     @abstractmethod
diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py
index b76e6486..902780fb 100644
--- a/stack_orchestrator/deploy/deployment.py
+++ b/stack_orchestrator/deploy/deployment.py
@@ -31,7 +31,7 @@ from stack_orchestrator.deploy.deploy import (
     exec_operation,
     logs_operation,
     create_deploy_context,
-    update_operation,
+    update_envs_operation,
 )
 from stack_orchestrator.deploy.deploy_types import DeployCommandContext
 from stack_orchestrator.deploy.deployment_context import DeploymentContext
@@ -210,11 +210,11 @@ def status(ctx):
     status_operation(ctx)
 
 
-@command.command()
+@command.command(name="update-envs")
 @click.pass_context
-def update(ctx):
+def update_envs(ctx):
     ctx.obj = make_deploy_context(ctx)
-    update_operation(ctx)
+    update_envs_operation(ctx)
 
 
 @command.command()
diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index f7f8ad43..3b235538 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -598,7 +598,7 @@ class K8sDeployer(Deployer):
                 log_data = "******* No logs available ********\n"
         return log_stream_from_string(log_data)
 
-    def update(self):
+    def update_envs(self):
         self.connect_api()
         ref_deployment = self.cluster_info.get_deployment()
         if not ref_deployment or not ref_deployment.metadata:
diff --git a/stack_orchestrator/deploy/webapp/util.py b/stack_orchestrator/deploy/webapp/util.py
index 3c536477..84accbcd 100644
--- a/stack_orchestrator/deploy/webapp/util.py
+++ b/stack_orchestrator/deploy/webapp/util.py
@@ -696,7 +696,7 @@ def deploy_to_k8s(deploy_record, deployment_dir, recreate, logger):
         if not deploy_record:
             commands_to_run = ["start"]
         else:
-            commands_to_run = ["update"]
+            commands_to_run = ["update-envs"]
 
     for command in commands_to_run:
         logger.log(f"Running {command} command on deployment dir: {deployment_dir}")

From cc6acd5f0940c0f77e7c9faa5ec4f3f3c05a7415 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 02:41:25 +0000
Subject: [PATCH 14/19] fix: default skip-cluster-management to true

Destroying the kind cluster on stop/start is almost never the intent.
The cluster holds PVs, ConfigMaps, and networking state that are
expensive to recreate. Default to preserving the cluster; pass
--perform-cluster-management explicitly when a full teardown is needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/deployment.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py
index 902780fb..1182d23f 100644
--- a/stack_orchestrator/deploy/deployment.py
+++ b/stack_orchestrator/deploy/deployment.py
@@ -114,7 +114,7 @@ def up(ctx, stay_attached, skip_cluster_management, extra_args):
 )
 @click.option(
     "--skip-cluster-management/--perform-cluster-management",
-    default=False,
+    default=True,
     help="Skip cluster initialization/tear-down (only for kind-k8s deployments)",
 )
 @click.argument("extra_args", nargs=-1)  # help: command: up <service1> <service2>
@@ -132,7 +132,7 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args):
 )
 @click.option(
     "--skip-cluster-management/--perform-cluster-management",
-    default=False,
+    default=True,
     help="Skip cluster initialization/tear-down (only for kind-k8s deployments)",
 )
 @click.argument("extra_args", nargs=-1)  # help: command: down <service1> <service2>
@@ -151,7 +151,7 @@ def down(ctx, delete_volumes, skip_cluster_management, extra_args):
 )
 @click.option(
     "--skip-cluster-management/--perform-cluster-management",
-    default=False,
+    default=True,
     help="Skip cluster initialization/tear-down (only for kind-k8s deployments)",
 )
 @click.argument("extra_args", nargs=-1)  # help: command: down <service1> <service2>

From 1da69cf739352ffc8632c7612dd5a2b3541b8afc Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 04:15:03 +0000
Subject: [PATCH 15/19] fix(k8s): make deploy_k8s.py idempotent with
 create-or-replace semantics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All K8s resource creation in deploy_k8s.py now uses try-create, catch
ApiException(409), then replace — matching the pattern already used for
secrets in deployment_create.py. This allows `deployment start` to be
safely re-run without 409 Conflict errors.

Resources made idempotent:
- Deployment (create_namespaced_deployment → replace on 409)
- Service (create_namespaced_service → replace on 409)
- Ingress (create_namespaced_ingress → replace on 409)
- NodePort services (same as Service)
- ConfigMap (create_namespaced_config_map → replace on 409)
- PV/PVC: bare `except: pass` replaced with explicit ApiException
  catch for 404

Extracted _ensure_deployment(), _ensure_service(), _ensure_ingress(),
and _ensure_config_map() helpers to keep cyclomatic complexity in check.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/deploy_k8s.py | 149 ++++++++++++++------
 1 file changed, 104 insertions(+), 45 deletions(-)

diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index 3b235538..c0272be7 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -192,6 +192,99 @@ class K8sDeployer(Deployer):
             else:
                 raise
 
+    def _ensure_config_map(self, cfg_map):
+        """Create or replace a ConfigMap (idempotent)."""
+        try:
+            resp = self.core_api.create_namespaced_config_map(
+                body=cfg_map, namespace=self.k8s_namespace
+            )
+            if opts.o.debug:
+                print(f"ConfigMap created: {resp}")
+        except ApiException as e:
+            if e.status == 409:
+                resp = self.core_api.replace_namespaced_config_map(
+                    name=cfg_map.metadata.name,
+                    namespace=self.k8s_namespace,
+                    body=cfg_map,
+                )
+                if opts.o.debug:
+                    print(f"ConfigMap updated: {resp}")
+            else:
+                raise
+
+    def _ensure_deployment(self, deployment):
+        """Create or replace a Deployment (idempotent)."""
+        try:
+            resp = cast(
+                client.V1Deployment,
+                self.apps_api.create_namespaced_deployment(
+                    body=deployment, namespace=self.k8s_namespace
+                ),
+            )
+            if opts.o.debug:
+                print("Deployment created:")
+        except ApiException as e:
+            if e.status == 409:
+                resp = cast(
+                    client.V1Deployment,
+                    self.apps_api.replace_namespaced_deployment(
+                        name=deployment.metadata.name,
+                        namespace=self.k8s_namespace,
+                        body=deployment,
+                    ),
+                )
+                if opts.o.debug:
+                    print("Deployment updated:")
+            else:
+                raise
+        if opts.o.debug:
+            meta = resp.metadata
+            spec = resp.spec
+            if meta and spec and spec.template.spec:
+                containers = spec.template.spec.containers
+                img = containers[0].image if containers else None
+                print(f"{meta.namespace} {meta.name} {meta.generation} {img}")
+
+    def _ensure_service(self, service, kind: str = "Service"):
+        """Create or replace a Service (idempotent)."""
+        try:
+            resp = self.core_api.create_namespaced_service(
+                namespace=self.k8s_namespace, body=service
+            )
+            if opts.o.debug:
+                print(f"{kind} created: {resp}")
+        except ApiException as e:
+            if e.status == 409:
+                resp = self.core_api.replace_namespaced_service(
+                    name=service.metadata.name,
+                    namespace=self.k8s_namespace,
+                    body=service,
+                )
+                if opts.o.debug:
+                    print(f"{kind} updated: {resp}")
+            else:
+                raise
+
+    def _ensure_ingress(self, ingress):
+        """Create or replace an Ingress (idempotent)."""
+        try:
+            resp = self.networking_api.create_namespaced_ingress(
+                namespace=self.k8s_namespace, body=ingress
+            )
+            if opts.o.debug:
+                print(f"Ingress created: {resp}")
+        except ApiException as e:
+            if e.status == 409:
+                resp = self.networking_api.replace_namespaced_ingress(
+                    name=ingress.metadata.name,
+                    namespace=self.k8s_namespace,
+                    body=ingress,
+                )
+                if opts.o.debug:
+                    print(f"Ingress updated: {resp}")
+            else:
+                raise
+
     def _create_volume_data(self):
         # Create the host-path-mounted PVs for this deployment
         pvs = self.cluster_info.get_pvs()
@@ -208,8 +301,9 @@ class K8sDeployer(Deployer):
                             print("PVs already present:")
                             print(f"{pv_resp}")
                         continue
-                except:  # noqa: E722
-                    pass
+                except ApiException as e:
+                    if e.status != 404:
+                        raise
 
                 pv_resp = self.core_api.create_persistent_volume(body=pv)
                 if opts.o.debug:
@@ -232,8 +326,9 @@ class K8sDeployer(Deployer):
                             print("PVCs already present:")
                             print(f"{pvc_resp}")
                         continue
-                except:  # noqa: E722
-                    pass
+                except ApiException as e:
+                    if e.status != 404:
+                        raise
 
                 pvc_resp = self.core_api.create_namespaced_persistent_volume_claim(
                     body=pvc, namespace=self.k8s_namespace
@@ -248,12 +343,7 @@ class K8sDeployer(Deployer):
             if opts.o.debug:
                 print(f"Sending this ConfigMap: {cfg_map}")
             if not opts.o.dry_run:
-                cfg_rsp = self.core_api.create_namespaced_config_map(
-                    body=cfg_map, namespace=self.k8s_namespace
-                )
-                if opts.o.debug:
-                    print("ConfigMap created:")
-                    print(f"{cfg_rsp}")
+                self._ensure_config_map(cfg_map)
 
     def _create_deployment(self):
         # Process compose files into a Deployment
@@ -264,34 +354,13 @@ class K8sDeployer(Deployer):
         if opts.o.debug:
             print(f"Sending this deployment: {deployment}")
         if not opts.o.dry_run:
-            deployment_resp = cast(
-                client.V1Deployment,
-                self.apps_api.create_namespaced_deployment(
-                    body=deployment, namespace=self.k8s_namespace
-                ),
-            )
-            if opts.o.debug:
-                print("Deployment created:")
-                meta = deployment_resp.metadata
-                spec = deployment_resp.spec
-                if meta and spec and spec.template.spec:
-                    ns = meta.namespace
-                    name = meta.name
-                    gen = meta.generation
-                    containers = spec.template.spec.containers
-                    img = containers[0].image if containers else None
-                    print(f"{ns} {name} {gen} {img}")
+            self._ensure_deployment(deployment)
 
         service = self.cluster_info.get_service()
         if opts.o.debug:
             print(f"Sending this service: {service}")
         if service and not opts.o.dry_run:
-            service_resp = self.core_api.create_namespaced_service(
-                namespace=self.k8s_namespace, body=service
-            )
-            if opts.o.debug:
-                print("Service created:")
-                print(f"{service_resp}")
+            self._ensure_service(service)
 
     def _find_certificate_for_host_name(self, host_name):
         all_certificates = self.custom_obj_api.list_namespaced_custom_object(
@@ -404,12 +473,7 @@ class K8sDeployer(Deployer):
             if opts.o.debug:
                 print(f"Sending this ingress: {ingress}")
             if not opts.o.dry_run:
-                ingress_resp = self.networking_api.create_namespaced_ingress(
-                    namespace=self.k8s_namespace, body=ingress
-                )
-                if opts.o.debug:
-                    print("Ingress created:")
-                    print(f"{ingress_resp}")
+                self._ensure_ingress(ingress)
         else:
             if opts.o.debug:
                 print("No ingress configured")
@@ -419,12 +483,7 @@ class K8sDeployer(Deployer):
             if opts.o.debug:
                 print(f"Sending this nodeport: {nodeport}")
             if not opts.o.dry_run:
-                nodeport_resp = self.core_api.create_namespaced_service(
-                    namespace=self.k8s_namespace, body=nodeport
-                )
-                if opts.o.debug:
-                    print("NodePort created:")
-                    print(f"{nodeport_resp}")
+                self._ensure_service(nodeport, kind="NodePort")
 
     def down(self, timeout, volumes, skip_cluster_management):
         self.skip_cluster_management = skip_cluster_management

From 14f423ea0c04c624ebaca9be5d6a223bc7402ef1 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 04:32:20 +0000
Subject: [PATCH 16/19] fix(k8s): read existing resourceVersion/clusterIP
 before replace

K8s PUT (replace) operations require metadata.resourceVersion for
optimistic concurrency control. Services additionally have immutable
spec.clusterIP that must be preserved from the existing object.

On 409 conflict, all _ensure_* methods now read the existing resource
first and copy resourceVersion (and clusterIP for Services) into the
body before calling replace.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 stack_orchestrator/deploy/k8s/deploy_k8s.py | 27 ++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index c0272be7..b34e3291 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -202,6 +202,10 @@ class K8sDeployer(Deployer):
                 print(f"ConfigMap created: {resp}")
         except ApiException as e:
             if e.status == 409:
+                existing = self.core_api.read_namespaced_config_map(
+                    name=cfg_map.metadata.name, namespace=self.k8s_namespace
+                )
+                cfg_map.metadata.resource_version = existing.metadata.resource_version
                 resp = self.core_api.replace_namespaced_config_map(
                     name=cfg_map.metadata.name,
                     namespace=self.k8s_namespace,
@@ -225,6 +229,13 @@ class K8sDeployer(Deployer):
                 print("Deployment created:")
         except ApiException as e:
             if e.status == 409:
+                existing = self.apps_api.read_namespaced_deployment(
+                    name=deployment.metadata.name,
+                    namespace=self.k8s_namespace,
+                )
+                deployment.metadata.resource_version = (
+                    existing.metadata.resource_version
+                )
                 resp = cast(
                     client.V1Deployment,
                     self.apps_api.replace_namespaced_deployment(
@@ -246,7 +257,11 @@ class K8sDeployer(Deployer):
                 print(f"{meta.namespace} {meta.name} {meta.generation} {img}")
 
     def _ensure_service(self, service, kind: str = "Service"):
-        """Create or replace a Service (idempotent)."""
+        """Create or replace a Service (idempotent).
+
+        Services have immutable fields (spec.clusterIP) that must be
+        preserved from the existing object on replace.
+        """
         try:
             resp = self.core_api.create_namespaced_service(
                 namespace=self.k8s_namespace, body=service
@@ -255,6 +270,12 @@ class K8sDeployer(Deployer):
                 print(f"{kind} created: {resp}")
         except ApiException as e:
             if e.status == 409:
+                existing = self.core_api.read_namespaced_service(
+                    name=service.metadata.name, namespace=self.k8s_namespace
+                )
+                service.metadata.resource_version = existing.metadata.resource_version
+                if existing.spec.cluster_ip:
+                    service.spec.cluster_ip = existing.spec.cluster_ip
                 resp = self.core_api.replace_namespaced_service(
                     name=service.metadata.name,
                     namespace=self.k8s_namespace,
@@ -275,6 +296,10 @@ class K8sDeployer(Deployer):
                 print(f"Ingress created: {resp}")
         except ApiException as e:
             if e.status == 409:
+                existing = self.networking_api.read_namespaced_ingress(
+                    name=ingress.metadata.name, namespace=self.k8s_namespace
+                )
+                ingress.metadata.resource_version = existing.metadata.resource_version
                 resp = self.networking_api.replace_namespaced_ingress(
                     name=ingress.metadata.name,
                     namespace=self.k8s_namespace,

From 9c5b8e3f4e429f6e51b4373088a2cbdfaf89ba38 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 06:56:25 +0000
Subject: [PATCH 17/19] chore: initialize pebbles issue tracker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Track stack-orchestrator work items with pebbles (append-only event log).

Epic so-076: Stack composition — deploy multiple stacks into one kind cluster
with independent lifecycle management per sub-stack.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .pebbles/config.json  |  3 +++
 .pebbles/events.jsonl | 15 +++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 .pebbles/config.json
 create mode 100644 .pebbles/events.jsonl

diff --git a/.pebbles/config.json b/.pebbles/config.json
new file mode 100644
index 00000000..806dcad9
--- /dev/null
+++ b/.pebbles/config.json
@@ -0,0 +1,3 @@
+{
+  "prefix": "so"
+}
\ No newline at end of file
diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl
new file mode 100644
index 00000000..944b62d0
--- /dev/null
+++ b/.pebbles/events.jsonl
@@ -0,0 +1,15 @@
+{"type":"create","timestamp":"2026-03-08T06:56:07.080584539Z","issue_id":"so-076","payload":{"description":"Currently laconic-so maps one stack to one deployment to one pod. All containers\nin a stack's compose files become containers in a single k8s pod. This means:\n\n- Can't upgrade doublezero without restarting agave-validator\n- Can't restart monitoring without disrupting the validator\n- Can't independently scale or lifecycle-manage components\n\nThe fix is stack composition. A meta-stack (e.g. biscayne-stack) composes\nsub-stacks (agave, doublezero, agave-monitoring), each becoming its own\nk8s Deployment with independent lifecycle.","priority":"2","title":"Stack composition: deploy multiple stacks into one kind cluster","type":"epic"}}
+{"type":"create","timestamp":"2026-03-08T06:56:07.551986919Z","issue_id":"so-ab0","payload":{"description":"Add laconic-so deployment prepare that creates cluster infrastructure without pods. Already implemented, needs review.","priority":"2","title":"deployment prepare command","type":"task"}}
+{"type":"create","timestamp":"2026-03-08T06:56:07.884418759Z","issue_id":"so-04f","payload":{"description":"deployment stop on ANY deployment deletes the shared kind cluster. Should only delete its own namespace.","priority":"2","title":"deployment stop should not destroy shared cluster","type":"bug"}}
+{"type":"create","timestamp":"2026-03-08T06:56:08.253520249Z","issue_id":"so-370","payload":{"description":"Allow stack.yml to reference sub-stacks. Each sub-stack becomes its own k8s Deployment sharing namespace and PVs.","priority":"2","title":"Add stacks: field to stack.yml for composition","type":"task"}}
+{"type":"create","timestamp":"2026-03-08T06:56:08.646764337Z","issue_id":"so-f7c","payload":{"description":"Create three independent stacks from the monolithic agave-stack. Each gets its own compose file and independent lifecycle.","priority":"2","title":"Split agave-stack into agave + doublezero + monitoring","type":"task"}}
+{"type":"rename","timestamp":"2026-03-08T06:56:14.499990161Z","issue_id":"so-ab0","payload":{"new_id":"so-076.1"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:14.499992031Z","issue_id":"so-076.1","payload":{"dep_type":"parent-child","depends_on":"so-076"}}
+{"type":"rename","timestamp":"2026-03-08T06:56:14.786407752Z","issue_id":"so-04f","payload":{"new_id":"so-076.2"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:14.786409842Z","issue_id":"so-076.2","payload":{"dep_type":"parent-child","depends_on":"so-076"}}
+{"type":"rename","timestamp":"2026-03-08T06:56:15.058959714Z","issue_id":"so-370","payload":{"new_id":"so-076.3"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:15.058961364Z","issue_id":"so-076.3","payload":{"dep_type":"parent-child","depends_on":"so-076"}}
+{"type":"rename","timestamp":"2026-03-08T06:56:15.410080785Z","issue_id":"so-f7c","payload":{"new_id":"so-076.4"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:15.410082305Z","issue_id":"so-076.4","payload":{"dep_type":"parent-child","depends_on":"so-076"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:16.313585082Z","issue_id":"so-076.3","payload":{"dep_type":"blocks","depends_on":"so-076.2"}}
+{"type":"dep_add","timestamp":"2026-03-08T06:56:16.567629422Z","issue_id":"so-076.4","payload":{"dep_type":"blocks","depends_on":"so-076.3"}}

From 974eed0c733324da2b3d844821a7923297843b6b Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Sun, 8 Mar 2026 06:56:34 +0000
Subject: [PATCH 18/19] feat: add `deployment prepare` command (so-076.1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refactors K8sDeployer.up() into three composable methods:
- _setup_cluster_and_namespace(): kind cluster, API, namespace, ingress
- _create_infrastructure(): PVs, PVCs, ConfigMaps, Services, NodePorts
- _create_deployment(): Deployment resource (pods)

`prepare` calls the first two only — creates all cluster infrastructure
without starting pods. This eliminates the scale-to-0 workaround where
operators had to run `deployment start` then immediately scale down.

Usage: laconic-so deployment --dir <dir> prepare

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                                  |  1 +
 stack_orchestrator/deploy/deploy.py         |  6 ++
 stack_orchestrator/deploy/deployer.py       |  9 +++
 stack_orchestrator/deploy/deployment.py     | 22 +++++++
 stack_orchestrator/deploy/k8s/deploy_k8s.py | 65 +++++++++++----------
 5 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3aaa220b..6abbf941 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ __pycache__
 package
 stack_orchestrator/data/build_tag.txt
 /build
+.worktrees
diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py
index f2bf977c..6e914b92 100644
--- a/stack_orchestrator/deploy/deploy.py
+++ b/stack_orchestrator/deploy/deploy.py
@@ -182,6 +182,12 @@ def status_operation(ctx):
     ctx.obj.deployer.status()
 
 
+def prepare_operation(ctx, skip_cluster_management=False):
+    ctx.obj.deployer.prepare(
+        skip_cluster_management=skip_cluster_management,
+    )
+
+
 def update_envs_operation(ctx):
     ctx.obj.deployer.update_envs()
 
diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py
index 11fb6592..b950e29b 100644
--- a/stack_orchestrator/deploy/deployer.py
+++ b/stack_orchestrator/deploy/deployer.py
@@ -69,6 +69,15 @@ class Deployer(ABC):
     def run_job(self, job_name: str, release_name: Optional[str] = None):
         pass
 
+    def prepare(self, skip_cluster_management):
+        """Create cluster infrastructure (namespace, PVs, services) without starting pods.
+
+        Only supported for k8s deployers. Compose deployers raise an error.
+        """
+        raise DeployerException(
+            "prepare is only supported for k8s deployments"
+        )
+
 
 class DeployerException(Exception):
     def __init__(self, *args: object) -> None:
diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py
index 1182d23f..0dc9ac37 100644
--- a/stack_orchestrator/deploy/deployment.py
+++ b/stack_orchestrator/deploy/deployment.py
@@ -23,6 +23,7 @@ from stack_orchestrator.deploy.images import push_images_operation
 from stack_orchestrator.deploy.deploy import (
     up_operation,
     down_operation,
+    prepare_operation,
     ps_operation,
     port_operation,
     status_operation,
@@ -125,6 +126,27 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args):
     up_operation(ctx, services_list, stay_attached, skip_cluster_management)
 
 
+@command.command()
+@click.option(
+    "--skip-cluster-management/--perform-cluster-management",
+    default=False,
+    help="Skip cluster initialization (only for kind-k8s deployments)",
+)
+@click.pass_context
+def prepare(ctx, skip_cluster_management):
+    """Create cluster infrastructure without starting pods.
+
+    Sets up the kind cluster, namespace, PVs, PVCs, ConfigMaps, Services,
+    and Ingresses — everything that 'start' does EXCEPT creating the
+    Deployment resource. No pods will be scheduled.
+
+    Use 'start --skip-cluster-management' afterward to create the Deployment
+    and start pods when ready.
+    """
+    ctx.obj = make_deploy_context(ctx)
+    prepare_operation(ctx, skip_cluster_management)
+
+
 # TODO: remove legacy up command since it's an alias for stop
 @command.command()
 @click.option(
diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index b34e3291..1eee8ffd 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -371,22 +371,15 @@ class K8sDeployer(Deployer):
                 self._ensure_config_map(cfg_map)
 
     def _create_deployment(self):
-        # Process compose files into a Deployment
+        """Create the k8s Deployment resource (which starts pods)."""
         deployment = self.cluster_info.get_deployment(
             image_pull_policy=None if self.is_kind() else "Always"
         )
-        # Create the k8s objects
         if opts.o.debug:
             print(f"Sending this deployment: {deployment}")
         if not opts.o.dry_run:
             self._ensure_deployment(deployment)
 
-        service = self.cluster_info.get_service()
-        if opts.o.debug:
-            print(f"Sending this service: {service}")
-        if service and not opts.o.dry_run:
-            self._ensure_service(service)
-
     def _find_certificate_for_host_name(self, host_name):
         all_certificates = self.custom_obj_api.list_namespaced_custom_object(
             group="cert-manager.io",
@@ -424,24 +417,25 @@ class K8sDeployer(Deployer):
         return None
 
     def up(self, detach, skip_cluster_management, services):
+        self._setup_cluster_and_namespace(skip_cluster_management)
+        self._create_infrastructure()
+        self._create_deployment()
+
+    def _setup_cluster_and_namespace(self, skip_cluster_management):
+        """Create kind cluster (if needed) and namespace. Shared by up() and prepare()."""
         self.skip_cluster_management = skip_cluster_management
         if not opts.o.dry_run:
             if self.is_kind() and not self.skip_cluster_management:
-                # Create the kind cluster (or reuse existing one)
                 kind_config = str(
                     self.deployment_dir.joinpath(constants.kind_config_filename)
                 )
                 actual_cluster = create_cluster(self.kind_cluster_name, kind_config)
                 if actual_cluster != self.kind_cluster_name:
-                    # An existing cluster was found, use it instead
                     self.kind_cluster_name = actual_cluster
-                # Only load locally-built images into kind
-                # Registry images (docker.io, ghcr.io, etc.) will be pulled by k8s
                 local_containers = self.deployment_context.stack.obj.get(
                     "containers", []
                 )
                 if local_containers:
-                    # Filter image_set to only images matching local containers
                     local_images = {
                         img
                         for img in self.cluster_info.image_set
@@ -449,47 +443,48 @@ class K8sDeployer(Deployer):
                     }
                     if local_images:
                         load_images_into_kind(self.kind_cluster_name, local_images)
-                # Note: if no local containers defined, all images come from registries
             self.connect_api()
-            # Create deployment-specific namespace for resource isolation
             self._ensure_namespace()
             if self.is_kind() and not self.skip_cluster_management:
-                # Configure ingress controller (not installed by default in kind)
-                # Skip if already running (idempotent for shared cluster)
                 if not is_ingress_running():
                     install_ingress_for_kind(self.cluster_info.spec.get_acme_email())
-                    # Wait for ingress to start
-                    # (deployment provisioning will fail unless this is done)
                     wait_for_ingress_in_kind()
-                # Create RuntimeClass if unlimited_memlock is enabled
                 if self.cluster_info.spec.get_unlimited_memlock():
                     _create_runtime_class(
                         constants.high_memlock_runtime,
                         constants.high_memlock_runtime,
                     )
-
         else:
             print("Dry run mode enabled, skipping k8s API connect")
 
-        # Create registry secret if configured
+    def _create_infrastructure(self):
+        """Create PVs, PVCs, ConfigMaps, Services, Ingresses, NodePorts.
+
+        Everything except the Deployment resource (which starts pods).
+        Shared by up() and prepare().
+        """
         from stack_orchestrator.deploy.deployment_create import create_registry_secret
 
         create_registry_secret(self.cluster_info.spec, self.cluster_info.app_name)
 
         self._create_volume_data()
-        self._create_deployment()
+
+        # Create the ClusterIP service (paired with the deployment)
+        service = self.cluster_info.get_service()
+        if service and not opts.o.dry_run:
+            if opts.o.debug:
+                print(f"Sending this service: {service}")
+            self._ensure_service(service)
 
         http_proxy_info = self.cluster_info.spec.get_http_proxy()
-        # Note: we don't support tls for kind (enabling tls causes errors)
         use_tls = http_proxy_info and not self.is_kind()
         certificate = (
             self._find_certificate_for_host_name(http_proxy_info[0]["host-name"])
             if use_tls
             else None
         )
-        if opts.o.debug:
-            if certificate:
-                print(f"Using existing certificate: {certificate}")
+        if opts.o.debug and certificate:
+            print(f"Using existing certificate: {certificate}")
 
         ingress = self.cluster_info.get_ingress(
             use_tls=use_tls, certificate=certificate
@@ -499,9 +494,8 @@ class K8sDeployer(Deployer):
                 print(f"Sending this ingress: {ingress}")
             if not opts.o.dry_run:
                 self._ensure_ingress(ingress)
-        else:
-            if opts.o.debug:
-                print("No ingress configured")
+        elif opts.o.debug:
+            print("No ingress configured")
 
         nodeports: List[client.V1Service] = self.cluster_info.get_nodeports()
         for nodeport in nodeports:
@@ -510,6 +504,17 @@ class K8sDeployer(Deployer):
             if not opts.o.dry_run:
                 self._ensure_service(nodeport, kind="NodePort")
 
+    def prepare(self, skip_cluster_management):
+        """Create cluster infrastructure without starting pods.
+
+        Sets up kind cluster, namespace, PVs, PVCs, ConfigMaps, Services,
+        Ingresses, and NodePorts — everything that up() does EXCEPT creating
+        the Deployment resource.
+        """
+        self._setup_cluster_and_namespace(skip_cluster_management)
+        self._create_infrastructure()
+        print("Cluster infrastructure prepared (no pods started).")
+
     def down(self, timeout, volumes, skip_cluster_management):
         self.skip_cluster_management = skip_cluster_management
         self.connect_api()

From 36c37d2bdec49e4e32a7fb0a50c880d2a86d5b89 Mon Sep 17 00:00:00 2001
From: "A. F. Dudley" <a.frederick.dudley@gmail.com>
Date: Mon, 16 Mar 2026 08:01:11 +0000
Subject: [PATCH 19/19] wd-a7b: Fix cluster-id and namespace naming

- Replace token_hex cluster IDs with sortable timestamp-based IDs
  (laconic-{base62_timestamp}{random_suffix}) via new ids.py module
- Check for existing Kind cluster before generating a new cluster-id
- Derive k8s namespace from stack name instead of compose_project_name
  (e.g. laconic-dumpster instead of laconic-<random>)
- Plumb namespace through to secret generation instead of hardcoding
  'default'

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../deploy/deployment_create.py               | 53 +++++++++++---
 stack_orchestrator/deploy/k8s/cluster_info.py | 73 ++++++++++++-------
 stack_orchestrator/deploy/k8s/deploy_k8s.py   | 32 +++++---
 stack_orchestrator/deploy/k8s/helpers.py      |  8 +-
 stack_orchestrator/deploy/spec.py             |  8 +-
 stack_orchestrator/ids.py                     | 47 ++++++++++++
 6 files changed, 164 insertions(+), 57 deletions(-)
 create mode 100644 stack_orchestrator/ids.py

diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py
index 792d8e3d..3ff3e169 100644
--- a/stack_orchestrator/deploy/deployment_create.py
+++ b/stack_orchestrator/deploy/deployment_create.py
@@ -24,11 +24,13 @@ from typing import List, Optional
 import random
 from shutil import copy, copyfile, copytree, rmtree
 from secrets import token_hex
+import subprocess
 import sys
 import filecmp
 import tempfile
 
 from stack_orchestrator import constants
+from stack_orchestrator.ids import generate_id
 from stack_orchestrator.opts import opts
 from stack_orchestrator.util import (
     get_stack_path,
@@ -513,7 +515,9 @@ def init_operation(
 GENERATE_TOKEN_PATTERN = re.compile(r"\$generate:(\w+):(\d+)\$")
 
 
-def _generate_and_store_secrets(config_vars: dict, deployment_name: str):
+def _generate_and_store_secrets(
+    config_vars: dict, deployment_name: str, namespace: str = "default"
+):
     """Generate secrets for $generate:...$ tokens and store in K8s Secret.
 
     Called by `deploy create` - generates fresh secrets and stores them.
@@ -555,7 +559,6 @@ def _generate_and_store_secrets(config_vars: dict, deployment_name: str):
 
     v1 = client.CoreV1Api()
     secret_name = f"{deployment_name}-generated-secrets"
-    namespace = "default"
 
     secret_data = {k: base64.b64encode(v.encode()).decode() for k, v in secrets.items()}
     k8s_secret = client.V1Secret(
@@ -659,7 +662,10 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]:
 
 
 def _write_config_file(
-    spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None
+    spec_file: Path,
+    config_env_file: Path,
+    deployment_name: Optional[str] = None,
+    namespace: str = "default",
 ):
     spec_content = get_parsed_deployment_spec(spec_file)
     config_vars = spec_content.get("config", {}) or {}
@@ -671,7 +677,7 @@ def _write_config_file(
             for v in config_vars.values()
         )
         if has_generate_tokens:
-            _generate_and_store_secrets(config_vars, deployment_name)
+            _generate_and_store_secrets(config_vars, deployment_name, namespace)
 
     # Write non-secret config to config.env (exclude $generate:...$ tokens)
     with open(config_env_file, "w") as output_file:
@@ -697,9 +703,31 @@ def _copy_files_to_directory(file_paths: List[Path], directory: Path):
         copy(path, os.path.join(directory, os.path.basename(path)))
 
 
+def _get_existing_kind_cluster() -> Optional[str]:
+    """Return the name of an existing Kind cluster, or None."""
+    try:
+        result = subprocess.run(
+            ["kind", "get", "clusters"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        if result.returncode == 0:
+            clusters = [
+                c.strip() for c in result.stdout.strip().splitlines() if c.strip()
+            ]
+            if clusters:
+                return clusters[0]
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+    return None
+
+
 def _create_deployment_file(deployment_dir: Path, stack_source: Optional[Path] = None):
     deployment_file_path = deployment_dir.joinpath(constants.deployment_file_name)
-    cluster = f"{constants.cluster_name_prefix}{token_hex(8)}"
+    # Reuse existing Kind cluster if one exists, otherwise generate a timestamp-based ID
+    existing = _get_existing_kind_cluster()
+    cluster = existing if existing else generate_id("laconic")
     deployment_content = {constants.cluster_id_key: cluster}
     if stack_source:
         deployment_content["stack-source"] = str(stack_source)
@@ -953,8 +981,13 @@ def _write_deployment_files(
     # Use stack_name as deployment_name for K8s secret naming
     # Extract just the name part if stack_name is a path ("path/to/stack" -> "stack")
     deployment_name = Path(stack_name).name.replace("_", "-")
+    # Derive namespace from spec or stack name, matching deploy_k8s logic
+    namespace = parsed_spec.get_namespace() or f"laconic-{deployment_name}"
     _write_config_file(
-        spec_file, target_dir.joinpath(constants.config_file_name), deployment_name
+        spec_file,
+        target_dir.joinpath(constants.config_file_name),
+        deployment_name,
+        namespace=namespace,
     )
 
     # Copy any k8s config file into the target dir
@@ -1032,12 +1065,8 @@ def _write_deployment_files(
         for configmap in parsed_spec.get_configmaps():
             source_config_dir = resolve_config_dir(stack_name, configmap)
             if os.path.exists(source_config_dir):
-                destination_config_dir = target_dir.joinpath(
-                    "configmaps", configmap
-                )
-                copytree(
-                    source_config_dir, destination_config_dir, dirs_exist_ok=True
-                )
+                destination_config_dir = target_dir.joinpath("configmaps", configmap)
+                copytree(source_config_dir, destination_config_dir, dirs_exist_ok=True)
 
     # Copy the job files into the target dir
     jobs = get_job_list(parsed_stack)
diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py
index d84474ab..34f41c99 100644
--- a/stack_orchestrator/deploy/k8s/cluster_info.py
+++ b/stack_orchestrator/deploy/k8s/cluster_info.py
@@ -82,7 +82,14 @@ class ClusterInfo:
     def __init__(self) -> None:
         self.parsed_job_yaml_map = {}
 
-    def int(self, pod_files: List[str], compose_env_file, deployment_name, spec: Spec, stack_name=""):
+    def int(
+        self,
+        pod_files: List[str],
+        compose_env_file,
+        deployment_name,
+        spec: Spec,
+        stack_name="",
+    ):
         self.parsed_pod_yaml_map = parsed_pod_files_map_from_file_names(pod_files)
         # Find the set of images in the pods
         self.image_set = images_for_deployment(pod_files)
@@ -292,8 +299,7 @@ class ClusterInfo:
 
             # Per-volume resources override global, which overrides default.
             vol_resources = (
-                self.spec.get_volume_resources_for(volume_name)
-                or global_resources
+                self.spec.get_volume_resources_for(volume_name) or global_resources
             )
 
             labels = {
@@ -395,8 +401,7 @@ class ClusterInfo:
                     continue
 
             vol_resources = (
-                self.spec.get_volume_resources_for(volume_name)
-                or global_resources
+                self.spec.get_volume_resources_for(volume_name) or global_resources
             )
             if self.spec.is_kind_deployment():
                 host_path = client.V1HostPathVolumeSource(
@@ -531,9 +536,7 @@ class ClusterInfo:
                     if self.spec.get_image_registry() is not None
                     else image
                 )
-                volume_mounts = volume_mounts_for_service(
-                    parsed_yaml_map, service_name
-                )
+                volume_mounts = volume_mounts_for_service(parsed_yaml_map, service_name)
                 # Handle command/entrypoint from compose file
                 # In docker-compose: entrypoint -> k8s command, command -> k8s args
                 container_command = None
@@ -581,7 +584,9 @@ class ClusterInfo:
                     volume_mounts=volume_mounts,
                     security_context=client.V1SecurityContext(
                         privileged=self.spec.get_privileged(),
-                        run_as_user=int(service_info["user"]) if "user" in service_info else None,
+                        run_as_user=int(service_info["user"])
+                        if "user" in service_info
+                        else None,
                         capabilities=client.V1Capabilities(
                             add=self.spec.get_capabilities()
                         )
@@ -595,19 +600,17 @@ class ClusterInfo:
                 svc_labels = service_info.get("labels", {})
                 if isinstance(svc_labels, list):
                     # docker-compose labels can be a list of "key=value"
-                    svc_labels = dict(
-                        item.split("=", 1) for item in svc_labels
-                    )
-                is_init = str(
-                    svc_labels.get("laconic.init-container", "")
-                ).lower() in ("true", "1", "yes")
+                    svc_labels = dict(item.split("=", 1) for item in svc_labels)
+                is_init = str(svc_labels.get("laconic.init-container", "")).lower() in (
+                    "true",
+                    "1",
+                    "yes",
+                )
                 if is_init:
                     init_containers.append(container)
                 else:
                     containers.append(container)
-        volumes = volumes_for_pod_files(
-            parsed_yaml_map, self.spec, self.app_name
-        )
+        volumes = volumes_for_pod_files(parsed_yaml_map, self.spec, self.app_name)
         return containers, init_containers, services, volumes
 
     # TODO: put things like image pull policy into an object-scope struct
@@ -704,7 +707,14 @@ class ClusterInfo:
             kind="Deployment",
             metadata=client.V1ObjectMeta(
                 name=f"{self.app_name}-deployment",
-                labels={"app": self.app_name, **({"app.kubernetes.io/stack": self.stack_name} if self.stack_name else {})},
+                labels={
+                    "app": self.app_name,
+                    **(
+                        {"app.kubernetes.io/stack": self.stack_name}
+                        if self.stack_name
+                        else {}
+                    ),
+                },
             ),
             spec=spec,
         )
@@ -732,8 +742,8 @@ class ClusterInfo:
         for job_file in self.parsed_job_yaml_map:
             # Build containers for this single job file
             single_job_map = {job_file: self.parsed_job_yaml_map[job_file]}
-            containers, init_containers, _services, volumes = (
-                self._build_containers(single_job_map, image_pull_policy)
+            containers, init_containers, _services, volumes = self._build_containers(
+                single_job_map, image_pull_policy
             )
 
             # Derive job name from file path: docker-compose-<name>.yml -> <name>
@@ -741,7 +751,7 @@ class ClusterInfo:
             # Strip docker-compose- prefix and .yml suffix
             job_name = base
             if job_name.startswith("docker-compose-"):
-                job_name = job_name[len("docker-compose-"):]
+                job_name = job_name[len("docker-compose-") :]
             if job_name.endswith(".yml"):
                 job_name = job_name[: -len(".yml")]
             elif job_name.endswith(".yaml"):
@@ -751,12 +761,14 @@ class ClusterInfo:
             # picked up by pods_in_deployment() which queries app={app_name}.
             pod_labels = {
                 "app": f"{self.app_name}-job",
-                **({"app.kubernetes.io/stack": self.stack_name} if self.stack_name else {}),
+                **(
+                    {"app.kubernetes.io/stack": self.stack_name}
+                    if self.stack_name
+                    else {}
+                ),
             }
             template = client.V1PodTemplateSpec(
-                metadata=client.V1ObjectMeta(
-                    labels=pod_labels
-                ),
+                metadata=client.V1ObjectMeta(labels=pod_labels),
                 spec=client.V1PodSpec(
                     containers=containers,
                     init_containers=init_containers or None,
@@ -769,7 +781,14 @@ class ClusterInfo:
                 template=template,
                 backoff_limit=0,
             )
-            job_labels = {"app": self.app_name, **({"app.kubernetes.io/stack": self.stack_name} if self.stack_name else {})}
+            job_labels = {
+                "app": self.app_name,
+                **(
+                    {"app.kubernetes.io/stack": self.stack_name}
+                    if self.stack_name
+                    else {}
+                ),
+            }
             job = client.V1Job(
                 api_version="batch/v1",
                 kind="Job",
diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py
index 8a41acea..dfb148f9 100644
--- a/stack_orchestrator/deploy/k8s/deploy_k8s.py
+++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py
@@ -122,14 +122,18 @@ class K8sDeployer(Deployer):
             return
         self.deployment_dir = deployment_context.deployment_dir
         self.deployment_context = deployment_context
-        self.kind_cluster_name = deployment_context.spec.get_kind_cluster_name() or compose_project_name
-        # Use spec namespace if provided, otherwise derive from cluster-id
-        self.k8s_namespace = deployment_context.spec.get_namespace() or f"laconic-{compose_project_name}"
-        self.cluster_info = ClusterInfo()
+        self.kind_cluster_name = (
+            deployment_context.spec.get_kind_cluster_name() or compose_project_name
+        )
         # stack.name may be an absolute path (from spec "stack:" key after
         # path resolution). Extract just the directory basename for labels.
         raw_name = deployment_context.stack.name if deployment_context else ""
         stack_name = Path(raw_name).name if raw_name else ""
+        # Use spec namespace if provided, otherwise derive from stack name
+        self.k8s_namespace = deployment_context.spec.get_namespace() or (
+            f"laconic-{stack_name}" if stack_name else f"laconic-{compose_project_name}"
+        )
+        self.cluster_info = ClusterInfo()
         self.cluster_info.int(
             compose_files,
             compose_env_file,
@@ -232,7 +236,8 @@ class K8sDeployer(Deployer):
             for job in jobs.items:
                 print(f"Deleting Job {job.metadata.name}")
                 self.batch_api.delete_namespaced_job(
-                    name=job.metadata.name, namespace=ns,
+                    name=job.metadata.name,
+                    namespace=ns,
                     body=client.V1DeleteOptions(propagation_policy="Background"),
                 )
         except ApiException as e:
@@ -555,7 +560,10 @@ class K8sDeployer(Deployer):
 
         # Call start() hooks — stacks can create additional k8s resources
         if self.deployment_context:
-            from stack_orchestrator.deploy.deployment_create import call_stack_deploy_start
+            from stack_orchestrator.deploy.deployment_create import (
+                call_stack_deploy_start,
+            )
+
             call_stack_deploy_start(self.deployment_context)
 
     def down(self, timeout, volumes, skip_cluster_management):
@@ -567,9 +575,7 @@ class K8sDeployer(Deployer):
         # PersistentVolumes are cluster-scoped (not namespaced), so delete by label
         if volumes:
             try:
-                pvs = self.core_api.list_persistent_volume(
-                    label_selector=app_label
-                )
+                pvs = self.core_api.list_persistent_volume(label_selector=app_label)
                 for pv in pvs.items:
                     if opts.o.debug:
                         print(f"Deleting PV: {pv.metadata.name}")
@@ -713,14 +719,18 @@ class K8sDeployer(Deployer):
 
     def logs(self, services, tail, follow, stream):
         self.connect_api()
-        pods = pods_in_deployment(self.core_api, self.cluster_info.app_name, namespace=self.k8s_namespace)
+        pods = pods_in_deployment(
+            self.core_api, self.cluster_info.app_name, namespace=self.k8s_namespace
+        )
         if len(pods) > 1:
             print("Warning: more than one pod in the deployment")
         if len(pods) == 0:
             log_data = "******* Pods not running ********\n"
         else:
             k8s_pod_name = pods[0]
-            containers = containers_in_pod(self.core_api, k8s_pod_name, namespace=self.k8s_namespace)
+            containers = containers_in_pod(
+                self.core_api, k8s_pod_name, namespace=self.k8s_namespace
+            )
             # If pod not started, logs request below will throw an exception
             try:
                 log_data = ""
diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py
index 1eedfd5f..426e3125 100644
--- a/stack_orchestrator/deploy/k8s/helpers.py
+++ b/stack_orchestrator/deploy/k8s/helpers.py
@@ -393,7 +393,9 @@ def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]):
             raise DeployerException(f"kind load docker-image failed: {result}")
 
 
-def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str, namespace: str = "default"):
+def pods_in_deployment(
+    core_api: client.CoreV1Api, deployment_name: str, namespace: str = "default"
+):
     pods = []
     pod_response = core_api.list_namespaced_pod(
         namespace=namespace, label_selector=f"app={deployment_name}"
@@ -406,7 +408,9 @@ def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str, namespa
     return pods
 
 
-def containers_in_pod(core_api: client.CoreV1Api, pod_name: str, namespace: str = "default") -> List[str]:
+def containers_in_pod(
+    core_api: client.CoreV1Api, pod_name: str, namespace: str = "default"
+) -> List[str]:
     containers: List[str] = []
     pod_response = cast(
         client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace=namespace)
diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py
index 83762b35..8eb9456f 100644
--- a/stack_orchestrator/deploy/spec.py
+++ b/stack_orchestrator/deploy/spec.py
@@ -170,15 +170,13 @@ class Spec:
         Returns the per-volume Resources if found, otherwise None.
         The caller should fall back to get_volume_resources() then the default.
         """
-        vol_section = (
-            self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})
+        vol_section = self.obj.get(constants.resources_key, {}).get(
+            constants.volumes_key, {}
         )
         if volume_name not in vol_section:
             return None
         entry = vol_section[volume_name]
-        if isinstance(entry, dict) and (
-            "reservations" in entry or "limits" in entry
-        ):
+        if isinstance(entry, dict) and ("reservations" in entry or "limits" in entry):
             return Resources(entry)
         return None
 
diff --git a/stack_orchestrator/ids.py b/stack_orchestrator/ids.py
new file mode 100644
index 00000000..e6a67782
--- /dev/null
+++ b/stack_orchestrator/ids.py
@@ -0,0 +1,47 @@
+"""Sortable timestamp-based ID generation for cluster naming.
+
+Uses base62 encoding with 100ms resolution and a 2024-01-01 epoch
+to produce compact, sortable IDs like 'laconic-iqE6Za'.
+
+Format: {prefix}-{timestamp}{random}
+- timestamp: 5 chars (100ms resolution, ~180 years from 2024)
+- random: 2 chars (3,844 unique per 100ms slot)
+"""
+# Adapted from exophial/src/exophial/ids.py
+
+import random
+import time
+
+# 2024-01-01 00:00:00 UTC in milliseconds
+EPOCH_2024 = 1704067200000
+
+# Sortable base62 alphabet (0-9, A-Z, a-z)
+ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+
+def _base62(n: int) -> str:
+    """Encode integer as base62 string."""
+    if n == 0:
+        return ALPHABET[0]
+    s = ""
+    while n:
+        n, r = divmod(n, 62)
+        s = ALPHABET[r] + s
+    return s
+
+
+def _random_suffix(length: int = 2) -> str:
+    """Generate random base62 suffix."""
+    return "".join(random.choice(ALPHABET) for _ in range(length))
+
+
+def _timestamp_id() -> str:
+    """Generate a sortable timestamp ID (100ms resolution, 2024 epoch) with random suffix."""
+    now_ms = int(time.time() * 1000)
+    offset = (now_ms - EPOCH_2024) // 100  # 100ms resolution
+    return f"{_base62(offset)}{_random_suffix()}"
+
+
+def generate_id(prefix: str) -> str:
+    """Generate a sortable ID with an arbitrary prefix like 'laconic-iqE6Za'."""
+    return f"{prefix}-{_timestamp_id()}"