diff --git a/CLAUDE.md b/CLAUDE.md index 845cbd22..33054f8d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,33 @@ One Kind cluster per host by design. Never request or expect separate clusters. - `helpers.py`: `create_cluster()`, etcd cleanup, kind operations - `cluster_info.py`: K8s resource generation (Deployment, Service, Ingress) +## spec.yml: Config Layering + +**The compose file is the single source of truth for application defaults.** + +The configuration chain is: compose defaults → spec.yml overrides → container env. + +| Layer | Owns | Example | +|-------|------|---------| +| **compose file** | All env vars and their defaults | `RPC_PORT: ${RPC_PORT:-8899}` | +| **spec.yml config:** | Deployment-specific overrides only | `GOSSIP_HOST: 10.0.0.1` | +| **start script** | Reads env vars, no defaults of its own | `${RPC_PORT}` | + +**What goes in spec.yml config:** +- Values unique to this deployment (hostnames, IPs, endpoints) +- Secrets (`$generate:hex:32$`) +- Overrides that differ from the compose default for this specific deployment + +**What does NOT go in spec.yml config:** +- Application defaults (ports, log levels, intervals, feature flags) +- Values that would be the same across all deployments of this stack +- Every env var the service accepts — that's the compose file's job + +**Anti-pattern:** Dumping all env vars from the compose file into spec.yml. +This creates three sources of truth (compose, spec, start script) that +inevitably diverge. If someone changes the default in the compose file, +spec.yml still has the old value and silently overrides it. + ## Insights and Observations ### Design Principles diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py index 511445be..0e2effc2 100644 --- a/stack_orchestrator/deploy/deployment_create.py +++ b/stack_orchestrator/deploy/deployment_create.py @@ -639,6 +639,18 @@ def create_registry_secret(spec: Spec, deployment_name: str) -> Optional[str]: def _write_config_file( spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None ): + """Write spec.yml config: entries to config.env. + + The config: section in spec.yml should contain only deployment-specific + overrides — values that differ between deployments (hostnames, endpoints, + credentials, secrets via $generate:...$). + + Application defaults (ports, log levels, feature flags, tuning params) + belong in the compose file's environment section. The compose file is + the single source of truth for what env vars a service accepts and + their default values. spec.yml overrides those defaults for a specific + deployment. + """ spec_content = get_parsed_deployment_spec(spec_file) config_vars = spec_content.get("config", {}) or {} diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 1eee8ffd..1703b758 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -663,14 +663,14 @@ class K8sDeployer(Deployer): def logs(self, services, tail, follow, stream): self.connect_api() - pods = pods_in_deployment(self.core_api, self.cluster_info.app_name) + pods = pods_in_deployment(self.core_api, self.cluster_info.app_name, namespace=self.k8s_namespace) if len(pods) > 1: print("Warning: more than one pod in the deployment") if len(pods) == 0: log_data = "******* Pods not running ********\n" else: k8s_pod_name = pods[0] - containers = containers_in_pod(self.core_api, k8s_pod_name) + containers = containers_in_pod(self.core_api, k8s_pod_name, namespace=self.k8s_namespace) # If pod not started, logs request below will throw an exception try: log_data = "" diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 2f392fb2..f2fa0feb 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -409,10 +409,10 @@ def load_images_into_kind(kind_cluster_name: str, image_set: Set[str]): raise DeployerException(f"kind load docker-image failed: {result}") -def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): +def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str, namespace: str = "default"): pods = [] pod_response = core_api.list_namespaced_pod( - namespace="default", label_selector=f"app={deployment_name}" + namespace=namespace, label_selector=f"app={deployment_name}" ) if opts.o.debug: print(f"pod_response: {pod_response}") @@ -422,10 +422,10 @@ def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str): return pods -def containers_in_pod(core_api: client.CoreV1Api, pod_name: str) -> List[str]: +def containers_in_pod(core_api: client.CoreV1Api, pod_name: str, namespace: str = "default") -> List[str]: containers: List[str] = [] pod_response = cast( - client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace="default") + client.V1Pod, core_api.read_namespaced_pod(pod_name, namespace=namespace) ) if opts.o.debug: print(f"pod_response: {pod_response}") @@ -730,11 +730,35 @@ def _generate_kind_port_mappings_from_services(parsed_pod_files): def _generate_kind_port_mappings(parsed_pod_files): port_definitions = [] + seen = set() # Map port 80 and 443 for the Caddy ingress controller (HTTPS support) for port_string in ["80", "443"]: port_definitions.append( f" - containerPort: {port_string}\n hostPort: {port_string}\n" ) + seen.add((port_string, "TCP")) + # Map ports declared in compose services + for pod in parsed_pod_files: + parsed_pod_file = parsed_pod_files[pod] + if "services" in parsed_pod_file: + for service_name in parsed_pod_file["services"]: + service_obj = parsed_pod_file["services"][service_name] + for port_entry in service_obj.get("ports", []): + port_str = str(port_entry) + protocol = "TCP" + if "/" in port_str: + port_str, proto = port_str.split("/", 1) + protocol = proto.upper() + if ":" in port_str: + port_str = port_str.split(":")[-1] + port_num = port_str.strip("'\"") + if (port_num, protocol) not in seen: + seen.add((port_num, protocol)) + port_definitions.append( + f" - containerPort: {port_num}\n" + f" hostPort: {port_num}\n" + f" protocol: {protocol}\n" + ) return ( "" if len(port_definitions) == 0 diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index c4cde6f8..617b9095 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -73,6 +73,35 @@ class Resources: class Spec: + """Deployment spec (spec.yml) — describes WHERE and HOW to deploy a stack. + + A spec.yml contains deployment-specific infrastructure configuration: + - stack: path to the stack definition + - deploy-to: target platform (k8s-kind, k8s, compose) + - network: ports, http-proxy, acme-email + - resources: CPU/memory limits and reservations + - security: privileged, capabilities, memlock + - volumes: host path mappings for persistent data + - configmaps: directories mounted as k8s ConfigMaps + - config: deployment-specific env var OVERRIDES (see below) + + The config: section is for deployment-specific values only — things + that differ between deployments (hostnames, endpoints, secrets). + Application defaults belong in the compose file's environment section, + not here. If a value would be the same across all deployments of this + stack, it belongs in the compose file, not in spec.yml. + + Good config: entries (deployment-specific): + VALIDATOR_ENTRYPOINT: my-cluster.example.com:8001 + PUBLIC_RPC_ADDRESS: my-node.example.com:8899 + GOSSIP_HOST: 10.0.0.1 + + Bad config: entries (these are application defaults): + RPC_PORT: '8899' # same everywhere, belongs in compose + LIMIT_LEDGER_SIZE: '50000000' # same everywhere, belongs in compose + RUST_LOG: info # same everywhere, belongs in compose + """ + obj: typing.Any file_path: Optional[Path] diff --git a/tests/scripts/run-test-local.sh b/tests/scripts/run-test-local.sh new file mode 100755 index 00000000..f6f32346 --- /dev/null +++ b/tests/scripts/run-test-local.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Run a test suite locally in an isolated venv. +# +# Usage: +# ./tests/scripts/run-test-local.sh +# +# Examples: +# ./tests/scripts/run-test-local.sh tests/webapp-test/run-webapp-test.sh +# ./tests/scripts/run-test-local.sh tests/smoke-test/run-smoke-test.sh +# ./tests/scripts/run-test-local.sh tests/k8s-deploy/run-deploy-test.sh +# +# The script creates a temporary venv, installs shiv, builds the laconic-so +# package, runs the requested test, then cleans up. + +set -euo pipefail + +if [ $# -lt 1 ]; then + echo "Usage: $0 [args...]" + exit 1 +fi + +TEST_SCRIPT="$1" +shift + +if [ ! -f "$TEST_SCRIPT" ]; then + echo "Error: $TEST_SCRIPT not found" + exit 1 +fi + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +VENV_DIR=$(mktemp -d /tmp/so-test-XXXXXX) + +cleanup() { + echo "Cleaning up venv: $VENV_DIR" + rm -rf "$VENV_DIR" +} +trap cleanup EXIT + +cd "$REPO_DIR" + +echo "==> Creating venv in $VENV_DIR" +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" + +echo "==> Installing shiv" +pip install -q shiv + +echo "==> Building laconic-so package" +./scripts/create_build_tag_file.sh +./scripts/build_shiv_package.sh + +echo "==> Running: $TEST_SCRIPT $*" +exec "./$TEST_SCRIPT" "$@"