From fb69cc58ffd015e187d208910b233c140128046c Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 3 Mar 2026 05:28:52 +0000 Subject: [PATCH 1/6] feat(k8s): map compose service ports to Kind extraPortMappings and support hostNetwork Kind's extraPortMappings only included ports 80/443 for Caddy. Compose service ports (RPC, gossip, UDP) were never forwarded, making them unreachable from the host. Also adds hostNetwork/dnsPolicy to the k8s pod spec when any compose service uses network_mode: host. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/cluster_info.py | 13 ++++++++++ stack_orchestrator/deploy/k8s/helpers.py | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index da24bdc2..161fbd03 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -394,6 +394,14 @@ class ClusterInfo: result.append(pv) return result + def _any_service_has_host_network(self): + for pod_name in self.parsed_pod_yaml_map: + pod = self.parsed_pod_yaml_map[pod_name] + for svc in pod.get("services", {}).values(): + if svc.get("network_mode") == "host": + return True + return False + # TODO: put things like image pull policy into an object-scope struct def get_deployment(self, image_pull_policy: Optional[str] = None): containers = [] @@ -568,6 +576,7 @@ class ClusterInfo: ) ) + use_host_network = self._any_service_has_host_network() template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(annotations=annotations, labels=labels), spec=client.V1PodSpec( @@ -577,6 +586,10 @@ class ClusterInfo: affinity=affinity, tolerations=tolerations, runtime_class_name=self.spec.get_runtime_class(), + host_network=use_host_network or None, + dns_policy=( + "ClusterFirstWithHostNet" if use_host_network else None + ), ), ) spec = client.V1DeploymentSpec( diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 8b367f86..4d9cbe3b 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -683,11 +683,35 @@ def _generate_kind_port_mappings_from_services(parsed_pod_files): def _generate_kind_port_mappings(parsed_pod_files): port_definitions = [] + seen = set() # Map port 80 and 443 for the Caddy ingress controller (HTTPS support) for port_string in ["80", "443"]: port_definitions.append( f" - containerPort: {port_string}\n hostPort: {port_string}\n" ) + seen.add((port_string, "TCP")) + # Map ports declared in compose services + for pod in parsed_pod_files: + parsed_pod_file = parsed_pod_files[pod] + if "services" in parsed_pod_file: + for service_name in parsed_pod_file["services"]: + service_obj = parsed_pod_file["services"][service_name] + for port_entry in service_obj.get("ports", []): + port_str = str(port_entry) + protocol = "TCP" + if "/" in port_str: + port_str, proto = port_str.split("/", 1) + protocol = proto.upper() + if ":" in port_str: + port_str = port_str.split(":")[-1] + port_num = port_str.strip("'\"") + if (port_num, protocol) not in seen: + seen.add((port_num, protocol)) + port_definitions.append( + f" - containerPort: {port_num}\n" + f" hostPort: {port_num}\n" + f" protocol: {protocol}\n" + ) return ( "" if len(port_definitions) == 0 From f305214ce1b6a545c2d5c1533e7a3b5e6b391f01 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 3 Mar 2026 05:28:55 +0000 Subject: [PATCH 2/6] add local test runner script Co-Authored-By: Claude Opus 4.6 --- tests/scripts/run-test-local.sh | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100755 tests/scripts/run-test-local.sh diff --git a/tests/scripts/run-test-local.sh b/tests/scripts/run-test-local.sh new file mode 100755 index 00000000..f6f32346 --- /dev/null +++ b/tests/scripts/run-test-local.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Run a test suite locally in an isolated venv. +# +# Usage: +# ./tests/scripts/run-test-local.sh +# +# Examples: +# ./tests/scripts/run-test-local.sh tests/webapp-test/run-webapp-test.sh +# ./tests/scripts/run-test-local.sh tests/smoke-test/run-smoke-test.sh +# ./tests/scripts/run-test-local.sh tests/k8s-deploy/run-deploy-test.sh +# +# The script creates a temporary venv, installs shiv, builds the laconic-so +# package, runs the requested test, then cleans up. + +set -euo pipefail + +if [ $# -lt 1 ]; then + echo "Usage: $0 [args...]" + exit 1 +fi + +TEST_SCRIPT="$1" +shift + +if [ ! -f "$TEST_SCRIPT" ]; then + echo "Error: $TEST_SCRIPT not found" + exit 1 +fi + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +VENV_DIR=$(mktemp -d /tmp/so-test-XXXXXX) + +cleanup() { + echo "Cleaning up venv: $VENV_DIR" + rm -rf "$VENV_DIR" +} +trap cleanup EXIT + +cd "$REPO_DIR" + +echo "==> Creating venv in $VENV_DIR" +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" + +echo "==> Installing shiv" +pip install -q shiv + +echo "==> Building laconic-so package" +./scripts/create_build_tag_file.sh +./scripts/build_shiv_package.sh + +echo "==> Running: $TEST_SCRIPT $*" +exec "./$TEST_SCRIPT" "$@" From 7cd5043a835ff8866b58bf7fd8e3173aaddb89af Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Wed, 4 Mar 2026 16:41:16 +0000 Subject: [PATCH 3/6] feat(k8s): add kind-mount-root for unified kind extraMount When kind-mount-root is set in spec.yml, emit a single extraMount mapping the root to /mnt instead of per-volume mounts. This allows adding new volumes without recreating the kind cluster. Volumes whose host path is under the root are skipped for individual extraMounts and their PV paths resolve to /mnt/{relative_path}. Volumes outside the root keep individual extraMounts as before. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/constants.py | 1 + stack_orchestrator/deploy/k8s/cluster_info.py | 6 +++++- stack_orchestrator/deploy/k8s/helpers.py | 21 ++++++++++++++++++- stack_orchestrator/deploy/spec.py | 3 +++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/stack_orchestrator/constants.py b/stack_orchestrator/constants.py index 75bd0ebc..2c0c8de0 100644 --- a/stack_orchestrator/constants.py +++ b/stack_orchestrator/constants.py @@ -45,3 +45,4 @@ runtime_class_key = "runtime-class" high_memlock_runtime = "high-memlock" high_memlock_spec_filename = "high-memlock-spec.json" acme_email_key = "acme-email" +kind_mount_root_key = "kind-mount-root" diff --git a/stack_orchestrator/deploy/k8s/cluster_info.py b/stack_orchestrator/deploy/k8s/cluster_info.py index 161fbd03..dc967469 100644 --- a/stack_orchestrator/deploy/k8s/cluster_info.py +++ b/stack_orchestrator/deploy/k8s/cluster_info.py @@ -371,7 +371,11 @@ class ClusterInfo: if self.spec.is_kind_deployment(): host_path = client.V1HostPathVolumeSource( - path=get_kind_pv_bind_mount_path(volume_name) + path=get_kind_pv_bind_mount_path( + volume_name, + kind_mount_root=self.spec.get_kind_mount_root(), + host_path=volume_path, + ) ) else: host_path = client.V1HostPathVolumeSource(path=volume_path) diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 4d9cbe3b..a316f0c7 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -440,7 +440,11 @@ def named_volumes_from_pod_files(parsed_pod_files): return named_volumes -def get_kind_pv_bind_mount_path(volume_name: str): +def get_kind_pv_bind_mount_path(volume_name: str, kind_mount_root: Optional[str] = None, + host_path: Optional[str] = None): + if kind_mount_root and host_path and host_path.startswith(kind_mount_root): + rel = os.path.relpath(host_path, kind_mount_root) + return f"/mnt/{rel}" return f"/mnt/{volume_name}" @@ -563,6 +567,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_definitions = [] volume_host_path_map = _get_host_paths_for_volumes(deployment_context) seen_host_path_mounts = set() # Track to avoid duplicate mounts + kind_mount_root = deployment_context.spec.get_kind_mount_root() # Cluster state backup for offline data recovery (unique per deployment) # etcd contains all k8s state; PKI certs needed to decrypt etcd offline @@ -583,6 +588,17 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): f" - hostPath: {pki_host_path}\n" f" containerPath: /etc/kubernetes/pki\n" ) + # When kind-mount-root is set, emit a single extraMount for the root. + # Individual volumes whose host path starts with the root are covered + # by this single mount and don't need their own extraMount entries. + mount_root_emitted = False + if kind_mount_root: + volume_definitions.append( + f" - hostPath: {kind_mount_root}\n" + f" containerPath: /mnt\n" + ) + mount_root_emitted = True + # Note these paths are relative to the location of the pod files (at present) # So we need to fix up to make them correct and absolute because kind assumes # relative to the cwd. @@ -642,6 +658,9 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_host_path_map[volume_name], deployment_dir, ) + # Skip individual extraMount if covered by mount root + if mount_root_emitted and str(host_path).startswith(kind_mount_root): + continue container_path = get_kind_pv_bind_mount_path( volume_name ) diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index e5647b04..c62d0aea 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -202,5 +202,8 @@ class Spec: def is_kind_deployment(self): return self.get_deployment_type() in [constants.k8s_kind_deploy_type] + def get_kind_mount_root(self): + return self.obj.get(constants.kind_mount_root_key) + def is_docker_deployment(self): return self.get_deployment_type() in [constants.compose_deploy_type] From 26dea540e98cf145ab9b83a5cfb648d56334f8e8 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Wed, 4 Mar 2026 17:13:08 +0000 Subject: [PATCH 4/6] fix(k8s): use deployment namespace for pod and container lookups pods_in_deployment() and containers_in_pod() were hardcoded to search the "default" namespace, but deployments are created in a per-deployment namespace (laconic-{name}). This caused logs() to report "Pods not running" even when pods were healthy. Co-Authored-By: Claude Opus 4.6 --- stack_orchestrator/deploy/k8s/deploy_k8s.py | 4 ++-- stack_orchestrator/deploy/k8s/helpers.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index f7f8ad43..58801d33 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -574,14 +574,14 @@ class K8sDeployer(Deployer): def logs(self, services, tail, follow, stream): self.connect_api() - pods = pods_in_deployment(self.core_api, self.cluster_info.app_name) + pods = pods_in_deployment(self.core_api, self.cluster_info.app_name, namespace=self.k8s_namespace) if len(pods) > 1: print("Warning: more than one pod in the deployment") if len(pods) == 0: log_data = "******* Pods not running ********\n" else: k8s_pod_name = pods[0] - containers = containers_in_pod(self.core_api, k8s_pod_name) + containers = containers_in_pod(self.core_api, k8s_pod_name, namespace=self.k8s_namespace) # If pod not started, logs request below will throw an exception try: log_data = "" diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index a316f0c7..c7b9703d 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -393,10 +393,10 @@ def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]): raise DeployerException(f"kind load docker-image failed: {result}") -def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): +def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str, namespace: str = "default"): pods = [] pod_response = core_api.list_namespaced_pod( - namespace="default", label_selector=f"app={deployment_name}" + namespace=namespace, label_selector=f"app={deployment_name}" ) if opts.o.debug: print(f"pod_response: {pod_response}") @@ -406,10 +406,10 @@ def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): return pods -def containers_in_pod(core_api: client.CoreV1Api, pod_name: str) -> List[str]: +def containers_in_pod(core_api: client.CoreV1Api, pod_name: str, namespace: str = "default") -> List[str]: containers: List[str] = [] pod_response = cast( - client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace="default") + client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace=namespace) ) if opts.o.debug: print(f"pod_response: {pod_response}") From d090f2064e60ac0e1ffa10069b7cdcb437b0602b Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 08:47:12 +0000 Subject: [PATCH 5/6] docs: annotate spec.yml config layering conventions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compose file owns application defaults. spec.yml config: section is for deployment-specific overrides only (hostnames, IPs, secrets). Start scripts should not have their own defaults — they read what the compose file provides. Annotations added: - CLAUDE.md: config layering table and anti-pattern callout - spec.py: Spec class docstring with good/bad config examples - deployment_create.py: _write_config_file docstring Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 27 +++++++++++++++++ .../deploy/deployment_create.py | 12 ++++++++ stack_orchestrator/deploy/spec.py | 29 +++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 845cbd22..33054f8d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,33 @@ One Kind cluster per host by design. Never request or expect separate clusters. - `helpers.py`: `create_cluster()`, etcd cleanup, kind operations - `cluster_info.py`: K8s resource generation (Deployment, Service, Ingress) +## spec.yml: Config Layering + +**The compose file is the single source of truth for application defaults.** + +The configuration chain is: compose defaults → spec.yml overrides → container env. + +| Layer | Owns | Example | +|-------|------|---------| +| **compose file** | All env vars and their defaults | `RPC_PORT: ${RPC_PORT:-8899}` | +| **spec.yml config:** | Deployment-specific overrides only | `GOSSIP_HOST: 10.0.0.1` | +| **start script** | Reads env vars, no defaults of its own | `${RPC_PORT}` | + +**What goes in spec.yml config:** +- Values unique to this deployment (hostnames, IPs, endpoints) +- Secrets (`$generate:hex:32$`) +- Overrides that differ from the compose default for this specific deployment + +**What does NOT go in spec.yml config:** +- Application defaults (ports, log levels, intervals, feature flags) +- Values that would be the same across all deployments of this stack +- Every env var the service accepts — that's the compose file's job + +**Anti-pattern:** Dumping all env vars from the compose file into spec.yml. +This creates three sources of truth (compose, spec, start script) that +inevitably diverge. If someone changes the default in the compose file, +spec.yml still has the old value and silently overrides it. + ## Insights and Observations ### Design Principles diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py index 511445be..0e2effc2 100644 --- a/stack_orchestrator/deploy/deployment_create.py +++ b/stack_orchestrator/deploy/deployment_create.py @@ -639,6 +639,18 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: def _write_config_file( spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None ): + """Write spec.yml config: entries to config.env. + + The config: section in spec.yml should contain only deployment-specific + overrides — values that differ between deployments (hostnames, endpoints, + credentials, secrets via $generate:...$). + + Application defaults (ports, log levels, feature flags, tuning params) + belong in the compose file's environment section. The compose file is + the single source of truth for what env vars a service accepts and + their default values. spec.yml overrides those defaults for a specific + deployment. + """ spec_content = get_parsed_deployment_spec(spec_file) config_vars = spec_content.get("config", {}) or {} diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index c62d0aea..072b035c 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -73,6 +73,35 @@ class Resources: class Spec: + """Deployment spec (spec.yml) — describes WHERE and HOW to deploy a stack. + + A spec.yml contains deployment-specific infrastructure configuration: + - stack: path to the stack definition + - deploy-to: target platform (k8s-kind, k8s, compose) + - network: ports, http-proxy, acme-email + - resources: CPU/memory limits and reservations + - security: privileged, capabilities, memlock + - volumes: host path mappings for persistent data + - configmaps: directories mounted as k8s ConfigMaps + - config: deployment-specific env var OVERRIDES (see below) + + The config: section is for deployment-specific values only — things + that differ between deployments (hostnames, endpoints, secrets). + Application defaults belong in the compose file's environment section, + not here. If a value would be the same across all deployments of this + stack, it belongs in the compose file, not in spec.yml. + + Good config: entries (deployment-specific): + VALIDATOR_ENTRYPOINT: my-cluster.example.com:8001 + PUBLIC_RPC_ADDRESS: my-node.example.com:8899 + GOSSIP_HOST: 10.0.0.1 + + Bad config: entries (these are application defaults): + RPC_PORT: '8899' # same everywhere, belongs in compose + LIMIT_LEDGER_SIZE: '50000000' # same everywhere, belongs in compose + RUST_LOG: info # same everywhere, belongs in compose + """ + obj: typing.Any file_path: Optional[Path] From d4dcbedd48bd3fe984fbc3fe17c6a51258aed6f5 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sat, 7 Mar 2026 09:55:24 +0000 Subject: [PATCH 6/6] bug: deploy create doesn't auto-generate volume mappings for new pods Co-Authored-By: Claude Opus 4.6 --- TODO.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/TODO.md b/TODO.md index 349530c8..65439ab5 100644 --- a/TODO.md +++ b/TODO.md @@ -7,6 +7,25 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment. +## Bugs + +### `deploy create` doesn't auto-generate volume mappings for new pods + +When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create` +does not generate default host path mappings in spec.yml for the new pod's +volumes. The deployment then fails at scheduling because the PVCs don't exist. + +**Expected**: `deploy create` enumerates all volumes from all compose files +in the stack and generates default host paths for any that aren't already +mapped in the spec.yml `volumes:` section. + +**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently +missing, causing `FailedScheduling: persistentvolumeclaim not found`. + +**Workaround**: Manually add volume entries to spec.yml and create host dirs. + +**Files**: `deployment_create.py` (`_write_config_file`, volume handling) + ## Architecture Refactoring ### Separate Deployer from Stack Orchestrator CLI