diff --git a/.pebbles/events.jsonl b/.pebbles/events.jsonl index d478cc6f..d9ac495f 100644 --- a/.pebbles/events.jsonl +++ b/.pebbles/events.jsonl @@ -1,3 +1,18 @@ +{"type":"create","timestamp":"2026-03-08T06:56:07.080584539Z","issue_id":"so-076","payload":{"description":"Currently laconic-so maps one stack to one deployment to one pod. All containers\nin a stack's compose files become containers in a single k8s pod. This means:\n\n- Can't upgrade doublezero without restarting agave-validator\n- Can't restart monitoring without disrupting the validator\n- Can't independently scale or lifecycle-manage components\n\nThe fix is stack composition. A meta-stack (e.g. biscayne-stack) composes\nsub-stacks (agave, doublezero, agave-monitoring), each becoming its own\nk8s Deployment with independent lifecycle.","priority":"2","title":"Stack composition: deploy multiple stacks into one kind cluster","type":"epic"}} +{"type":"create","timestamp":"2026-03-08T06:56:07.551986919Z","issue_id":"so-ab0","payload":{"description":"Add laconic-so deployment prepare that creates cluster infrastructure without pods. Already implemented, needs review.","priority":"2","title":"deployment prepare command","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:56:07.884418759Z","issue_id":"so-04f","payload":{"description":"deployment stop on ANY deployment deletes the shared kind cluster. Should only delete its own namespace.","priority":"2","title":"deployment stop should not destroy shared cluster","type":"bug"}} +{"type":"create","timestamp":"2026-03-08T06:56:08.253520249Z","issue_id":"so-370","payload":{"description":"Allow stack.yml to reference sub-stacks. Each sub-stack becomes its own k8s Deployment sharing namespace and PVs.","priority":"2","title":"Add stacks: field to stack.yml for composition","type":"task"}} +{"type":"create","timestamp":"2026-03-08T06:56:08.646764337Z","issue_id":"so-f7c","payload":{"description":"Create three independent stacks from the monolithic agave-stack. Each gets its own compose file and independent lifecycle.","priority":"2","title":"Split agave-stack into agave + doublezero + monitoring","type":"task"}} +{"type":"rename","timestamp":"2026-03-08T06:56:14.499990161Z","issue_id":"so-ab0","payload":{"new_id":"so-076.1"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:14.499992031Z","issue_id":"so-076.1","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:14.786407752Z","issue_id":"so-04f","payload":{"new_id":"so-076.2"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:14.786409842Z","issue_id":"so-076.2","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:15.058959714Z","issue_id":"so-370","payload":{"new_id":"so-076.3"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:15.058961364Z","issue_id":"so-076.3","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"rename","timestamp":"2026-03-08T06:56:15.410080785Z","issue_id":"so-f7c","payload":{"new_id":"so-076.4"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:15.410082305Z","issue_id":"so-076.4","payload":{"dep_type":"parent-child","depends_on":"so-076"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:16.313585082Z","issue_id":"so-076.3","payload":{"dep_type":"blocks","depends_on":"so-076.2"}} +{"type":"dep_add","timestamp":"2026-03-08T06:56:16.567629422Z","issue_id":"so-076.4","payload":{"dep_type":"blocks","depends_on":"so-076.3"}} {"type": "create", "timestamp": "2026-03-18T14:45:07.038870Z", "issue_id": "so-a1a", "payload": {"title": "deploy create should support external credential injection", "type": "feature", "priority": "2", "description": "deploy create generates config.env but provides no mechanism to inject external credentials (API keys, tokens, etc.) at creation time. Operators must append to config.env after the fact, which mutates a build artifact. deploy create should accept --credentials-file or similar to include secrets in the generated config.env."}} {"type": "create", "timestamp": "2026-03-18T14:45:07.038942Z", "issue_id": "so-b2b", "payload": {"title": "REGISTRY_TOKEN / imagePullSecret flow undocumented", "type": "bug", "priority": "2", "description": "create_registry_secret() exists in deployment_create.py and is called during up(), but REGISTRY_TOKEN is not documented in spec.yml or any user-facing docs. The restart command warns \"Registry token env var REGISTRY_TOKEN not set, skipping registry secret\" but doesn't explain how to set it. For GHCR private images, this is required and the flow from spec.yml -> config.env -> imagePullSecret needs documentation."}} {"type": "create", "timestamp": "2026-03-18T19:10:00.000000Z", "issue_id": "so-k1k", "payload": {"title": "Stack path resolution differs between deploy create and deployment restart", "type": "bug", "priority": "2", "description": "deploy create resolves --stack as a relative path from cwd. deployment restart resolves --stack-path as absolute, then computes repo_root as 4 parents up (assuming stack_orchestrator/data/stacks/name structure). External stacks with different nesting depths (e.g. stack-orchestrator/stacks/name = 3 levels) get wrong repo_root, causing --spec-file resolution to fail. The two commands should use the same path resolution logic."}} diff --git a/.pebbles/pebbles.db b/.pebbles/pebbles.db new file mode 100644 index 00000000..f1587825 Binary files /dev/null and b/.pebbles/pebbles.db differ diff --git a/CLAUDE.md b/CLAUDE.md index 845cbd22..33054f8d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,33 @@ One Kind cluster per host by design. Never request or expect separate clusters. - `helpers.py`: `create_cluster()`, etcd cleanup, kind operations - `cluster_info.py`: K8s resource generation (Deployment, Service, Ingress) +## spec.yml: Config Layering + +**The compose file is the single source of truth for application defaults.** + +The configuration chain is: compose defaults → spec.yml overrides → container env. + +| Layer | Owns | Example | +|-------|------|---------| +| **compose file** | All env vars and their defaults | `RPC_PORT: ${RPC_PORT:-8899}` | +| **spec.yml config:** | Deployment-specific overrides only | `GOSSIP_HOST: 10.0.0.1` | +| **start script** | Reads env vars, no defaults of its own | `${RPC_PORT}` | + +**What goes in spec.yml config:** +- Values unique to this deployment (hostnames, IPs, endpoints) +- Secrets (`$generate:hex:32$`) +- Overrides that differ from the compose default for this specific deployment + +**What does NOT go in spec.yml config:** +- Application defaults (ports, log levels, intervals, feature flags) +- Values that would be the same across all deployments of this stack +- Every env var the service accepts — that's the compose file's job + +**Anti-pattern:** Dumping all env vars from the compose file into spec.yml. +This creates three sources of truth (compose, spec, start script) that +inevitably diverge. If someone changes the default in the compose file, +spec.yml still has the old value and silently overrides it. + ## Insights and Observations ### Design Principles diff --git a/stack_orchestrator/deploy/compose/deploy_docker.py b/stack_orchestrator/deploy/compose/deploy_docker.py index dca0ddfb..2804c4ea 100644 --- a/stack_orchestrator/deploy/compose/deploy_docker.py +++ b/stack_orchestrator/deploy/compose/deploy_docker.py @@ -62,7 +62,7 @@ class DockerDeployer(Deployer): except DockerException as e: raise DeployerException(e) - def update(self): + def update_envs(self): if not opts.o.dry_run: try: return self.docker.compose.restart() diff --git a/stack_orchestrator/deploy/deploy.py b/stack_orchestrator/deploy/deploy.py index c8cf2535..8ede3e72 100644 --- a/stack_orchestrator/deploy/deploy.py +++ b/stack_orchestrator/deploy/deploy.py @@ -189,8 +189,14 @@ def status_operation(ctx): ctx.obj.deployer.status() -def update_operation(ctx): - ctx.obj.deployer.update() +def prepare_operation(ctx, skip_cluster_management=False): + ctx.obj.deployer.prepare( + skip_cluster_management=skip_cluster_management, + ) + + +def update_envs_operation(ctx): + ctx.obj.deployer.update_envs() def ps_operation(ctx): diff --git a/stack_orchestrator/deploy/deployer.py b/stack_orchestrator/deploy/deployer.py index 6362cc48..1da7bd38 100644 --- a/stack_orchestrator/deploy/deployer.py +++ b/stack_orchestrator/deploy/deployer.py @@ -28,7 +28,7 @@ class Deployer(ABC): pass @abstractmethod - def update(self): + def update_envs(self): pass @abstractmethod @@ -69,6 +69,15 @@ class Deployer(ABC): def run_job(self, job_name: str, release_name: Optional[str] = None): pass + def prepare(self, skip_cluster_management): + """Create cluster infrastructure (namespace, PVs, services) without starting pods. + + Only supported for k8s deployers. Compose deployers raise an error. + """ + raise DeployerException( + "prepare is only supported for k8s deployments" + ) + class DeployerException(Exception): def __init__(self, *args: object) -> None: diff --git a/stack_orchestrator/deploy/deployment.py b/stack_orchestrator/deploy/deployment.py index 55a7598d..4edf8c52 100644 --- a/stack_orchestrator/deploy/deployment.py +++ b/stack_orchestrator/deploy/deployment.py @@ -23,6 +23,7 @@ from stack_orchestrator.deploy.images import push_images_operation from stack_orchestrator.deploy.deploy import ( up_operation, down_operation, + prepare_operation, ps_operation, port_operation, status_operation, @@ -31,7 +32,7 @@ from stack_orchestrator.deploy.deploy import ( exec_operation, logs_operation, create_deploy_context, - update_operation, + update_envs_operation, ) from stack_orchestrator.deploy.deploy_types import DeployCommandContext from stack_orchestrator.deploy.deployment_context import DeploymentContext @@ -114,7 +115,7 @@ def up(ctx, stay_attached, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: up @@ -125,6 +126,27 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args): up_operation(ctx, services_list, stay_attached, skip_cluster_management) +@command.command() +@click.option( + "--skip-cluster-management/--perform-cluster-management", + default=False, + help="Skip cluster initialization (only for kind-k8s deployments)", +) +@click.pass_context +def prepare(ctx, skip_cluster_management): + """Create cluster infrastructure without starting pods. + + Sets up the kind cluster, namespace, PVs, PVCs, ConfigMaps, Services, + and Ingresses — everything that 'start' does EXCEPT creating the + Deployment resource. No pods will be scheduled. + + Use 'start --skip-cluster-management' afterward to create the Deployment + and start pods when ready. + """ + ctx.obj = make_deploy_context(ctx) + prepare_operation(ctx, skip_cluster_management) + + # TODO: remove legacy up command since it's an alias for stop @command.command() @click.option( @@ -132,7 +154,7 @@ def start(ctx, stay_attached, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: down @@ -151,7 +173,7 @@ def down(ctx, delete_volumes, skip_cluster_management, extra_args): ) @click.option( "--skip-cluster-management/--perform-cluster-management", - default=False, + default=True, help="Skip cluster initialization/tear-down (only for kind-k8s deployments)", ) @click.argument("extra_args", nargs=-1) # help: command: down @@ -210,11 +232,11 @@ def status(ctx): status_operation(ctx) -@command.command() +@command.command(name="update-envs") @click.pass_context -def update(ctx): +def update_envs(ctx): ctx.obj = make_deploy_context(ctx) - update_operation(ctx) + update_envs_operation(ctx) @command.command() diff --git a/stack_orchestrator/deploy/deployment_create.py b/stack_orchestrator/deploy/deployment_create.py index 851df8c3..2cc43ea2 100644 --- a/stack_orchestrator/deploy/deployment_create.py +++ b/stack_orchestrator/deploy/deployment_create.py @@ -24,11 +24,13 @@ from typing import List, Optional import random from shutil import copy, copyfile, copytree, rmtree from secrets import token_hex +import subprocess import sys import filecmp import tempfile from stack_orchestrator import constants +from stack_orchestrator.ids import generate_id from stack_orchestrator.opts import opts from stack_orchestrator.util import ( get_stack_path, @@ -513,7 +515,9 @@ def init_operation( GENERATE_TOKEN_PATTERN = re.compile(r"\$generate:(\w+):(\d+)\$") -def _generate_and_store_secrets(config_vars: dict, deployment_name: str): +def _generate_and_store_secrets( + config_vars: dict, deployment_name: str, namespace: str = "default" +): """Generate secrets for $generate:...$ tokens and store in K8s Secret. Called by `deploy create` - generates fresh secrets and stores them. @@ -555,7 +559,6 @@ def _generate_and_store_secrets(config_vars: dict, deployment_name: str): v1 = client.CoreV1Api() secret_name = f"{deployment_name}-generated-secrets" - namespace = "default" secret_data = {k: base64.b64encode(v.encode()).decode() for k, v in secrets.items()} k8s_secret = client.V1Secret( @@ -676,8 +679,23 @@ def create_registry_secret(spec: Spec, deployment_name: str, namespace: str = "d def _write_config_file( - spec_file: Path, config_env_file: Path, deployment_name: Optional[str] = None + spec_file: Path, + config_env_file: Path, + deployment_name: Optional[str] = None, + namespace: str = "default", ): + """Write spec.yml config: entries to config.env. + + The config: section in spec.yml should contain only deployment-specific + overrides — values that differ between deployments (hostnames, endpoints, + credentials, secrets via $generate:...$). + + Application defaults (ports, log levels, feature flags, tuning params) + belong in the compose file's environment section. The compose file is + the single source of truth for what env vars a service accepts and + their default values. spec.yml overrides those defaults for a specific + deployment. + """ spec_content = get_parsed_deployment_spec(spec_file) config_vars = spec_content.get("config", {}) or {} @@ -688,7 +706,7 @@ def _write_config_file( for v in config_vars.values() ) if has_generate_tokens: - _generate_and_store_secrets(config_vars, deployment_name) + _generate_and_store_secrets(config_vars, deployment_name, namespace) # Write non-secret config to config.env (exclude $generate:...$ tokens) with open(config_env_file, "w") as output_file: @@ -736,9 +754,31 @@ def _copy_files_to_directory(file_paths: List[Path], directory: Path): copy(path, os.path.join(directory, os.path.basename(path))) +def _get_existing_kind_cluster() -> Optional[str]: + """Return the name of an existing Kind cluster, or None.""" + try: + result = subprocess.run( + ["kind", "get", "clusters"], + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode == 0: + clusters = [ + c.strip() for c in result.stdout.strip().splitlines() if c.strip() + ] + if clusters: + return clusters[0] + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + return None + + def _create_deployment_file(deployment_dir: Path, stack_source: Optional[Path] = None): deployment_file_path = deployment_dir.joinpath(constants.deployment_file_name) - cluster = f"{constants.cluster_name_prefix}{token_hex(8)}" + # Reuse existing Kind cluster if one exists, otherwise generate a timestamp-based ID + existing = _get_existing_kind_cluster() + cluster = existing if existing else generate_id("laconic") deployment_content = {constants.cluster_id_key: cluster} if stack_source: deployment_content["stack-source"] = str(stack_source) @@ -990,8 +1030,13 @@ def _write_deployment_files( # Use stack_name as deployment_name for K8s secret naming # Extract just the name part if stack_name is a path ("path/to/stack" -> "stack") deployment_name = Path(stack_name).name.replace("_", "-") + # Derive namespace from spec or stack name, matching deploy_k8s logic + namespace = parsed_spec.get_namespace() or f"laconic-{deployment_name}" _write_config_file( - spec_file, target_dir.joinpath(constants.config_file_name), deployment_name + spec_file, + target_dir.joinpath(constants.config_file_name), + deployment_name, + namespace=namespace, ) # Copy any k8s config file into the target dir diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index cf5404d2..72c14d1c 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -135,6 +135,11 @@ class K8sDeployer(Deployer): # path resolution). Extract just the directory basename for labels. raw_name = deployment_context.stack.name if deployment_context else "" stack_name = Path(raw_name).name if raw_name else "" + # Use spec namespace if provided, otherwise derive from stack name + self.k8s_namespace = deployment_context.spec.get_namespace() or ( + f"laconic-{stack_name}" if stack_name else f"laconic-{compose_project_name}" + ) + self.cluster_info = ClusterInfo() self.cluster_info.int( compose_files, compose_env_file, @@ -365,8 +370,9 @@ class K8sDeployer(Deployer): print("PVs already present:") print(f"{pv_resp}") continue - except: # noqa: E722 - pass + except ApiException as e: + if e.status != 404: + raise pv_resp = self.core_api.create_persistent_volume(body=pv) if opts.o.debug: @@ -389,8 +395,9 @@ class K8sDeployer(Deployer): print("PVCs already present:") print(f"{pvc_resp}") continue - except: # noqa: E722 - pass + except ApiException as e: + if e.status != 404: + raise pvc_resp = self.core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace=self.k8s_namespace @@ -549,6 +556,7 @@ class K8sDeployer(Deployer): raise def _create_deployment(self): + """Create the k8s Deployment resource (which starts pods).""" # Skip if there are no pods to deploy (e.g. jobs-only stacks) if not self.cluster_info.parsed_pod_yaml_map: if opts.o.debug: @@ -1035,7 +1043,7 @@ class K8sDeployer(Deployer): log_data = "******* No logs available ********\n" return log_stream_from_string(log_data) - def update(self): + def update_envs(self): if not self.cluster_info.parsed_pod_yaml_map: if opts.o.debug: print("No pods defined, skipping update") diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 56e49adf..396c286b 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -148,8 +148,16 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool: etcd_image = "gcr.io/etcd-development/etcd:v3.5.9" temp_dir = "/tmp/laconic-etcd-cleanup" - # Whitelist: prefixes to KEEP - everything else gets deleted - keep_prefixes = "/registry/secrets/caddy-system" + # Whitelist: prefixes to KEEP - everything else gets deleted. + # Must include core cluster resources (kubernetes service, kube-system + # secrets) or kindnet panics on restart — KUBERNETES_SERVICE_HOST is + # injected from the kubernetes ClusterIP service in default namespace. + keep_prefixes = [ + "/registry/secrets/caddy-system", + "/registry/services/specs/default/kubernetes", + "/registry/services/endpoints/default/kubernetes", + ] + keep_prefixes_str = " ".join(keep_prefixes) # The etcd image is distroless (no shell). We extract the statically-linked # etcdctl binary and run it from alpine which has shell + jq support. @@ -195,13 +203,21 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool: sleep 3 # Use alpine with extracted etcdctl to run commands (alpine has shell + jq) - # Export caddy secrets + # Export whitelisted keys (caddy TLS certs + core cluster services) docker run --rm \ -v {temp_dir}:/backup \ --network container:laconic-etcd-cleanup \ - $ALPINE_IMAGE sh -c \ - '/backup/etcdctl get --prefix "{keep_prefixes}" -w json \ - > /backup/kept.json 2>/dev/null || echo "{{}}" > /backup/kept.json' + $ALPINE_IMAGE sh -c ' + apk add --no-cache jq >/dev/null 2>&1 + echo "[]" > /backup/all-kvs.json + for prefix in {keep_prefixes_str}; do + /backup/etcdctl get --prefix "$prefix" -w json 2>/dev/null \ + | jq ".kvs // []" >> /backup/all-kvs.json || true + done + jq -s "add" /backup/all-kvs.json \ + | jq "{{kvs: .}}" > /backup/kept.json 2>/dev/null \ + || echo "{{}}" > /backup/kept.json + ' # Delete ALL registry keys docker run --rm \ @@ -591,14 +607,18 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): Path(f"./data/{backup_subdir}/etcd"), deployment_dir ) volume_definitions.append( - f" - hostPath: {etcd_host_path}\n" f" containerPath: /var/lib/etcd\n" + f" - hostPath: {etcd_host_path}\n" + f" containerPath: /var/lib/etcd\n" + f" propagation: HostToContainer\n" ) pki_host_path = _make_absolute_host_path( Path(f"./data/{backup_subdir}/pki"), deployment_dir ) volume_definitions.append( - f" - hostPath: {pki_host_path}\n" f" containerPath: /etc/kubernetes/pki\n" + f" - hostPath: {pki_host_path}\n" + f" containerPath: /etc/kubernetes/pki\n" + f" propagation: HostToContainer\n" ) # When kind-mount-root is set, emit a single extraMount for the root. @@ -607,7 +627,9 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): mount_root_emitted = False if kind_mount_root: volume_definitions.append( - f" - hostPath: {kind_mount_root}\n" f" containerPath: /mnt\n" + f" - hostPath: {kind_mount_root}\n" + f" containerPath: /mnt\n" + f" propagation: HostToContainer\n" ) mount_root_emitted = True @@ -649,6 +671,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" + f" propagation: HostToContainer\n" ) if opts.o.debug: print(f"Added host path mount: {host_path}") @@ -682,6 +705,7 @@ def _generate_kind_mounts(parsed_pod_files, deployment_dir, deployment_context): volume_definitions.append( f" - hostPath: {host_path}\n" f" containerPath: {container_path}\n" + f" propagation: HostToContainer\n" ) return ( "" @@ -717,11 +741,35 @@ def _generate_kind_port_mappings_from_services(parsed_pod_files): def _generate_kind_port_mappings(parsed_pod_files): port_definitions = [] + seen = set() # Map port 80 and 443 for the Caddy ingress controller (HTTPS support) for port_string in ["80", "443"]: port_definitions.append( f" - containerPort: {port_string}\n hostPort: {port_string}\n" ) + seen.add((port_string, "TCP")) + # Map ports declared in compose services + for pod in parsed_pod_files: + parsed_pod_file = parsed_pod_files[pod] + if "services" in parsed_pod_file: + for service_name in parsed_pod_file["services"]: + service_obj = parsed_pod_file["services"][service_name] + for port_entry in service_obj.get("ports", []): + port_str = str(port_entry) + protocol = "TCP" + if "/" in port_str: + port_str, proto = port_str.split("/", 1) + protocol = proto.upper() + if ":" in port_str: + port_str = port_str.split(":")[-1] + port_num = port_str.strip("'\"") + if (port_num, protocol) not in seen: + seen.add((port_num, protocol)) + port_definitions.append( + f" - containerPort: {port_num}\n" + f" hostPort: {port_num}\n" + f" protocol: {protocol}\n" + ) return ( "" if len(port_definitions) == 0 @@ -737,7 +785,11 @@ def _generate_high_memlock_spec_mount(deployment_dir: Path): references an absolute path. """ spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve() - return f" - hostPath: {spec_path}\n" f" containerPath: {spec_path}\n" + return ( + f" - hostPath: {spec_path}\n" + f" containerPath: {spec_path}\n" + f" propagation: HostToContainer\n" + ) def generate_high_memlock_spec_json(): diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index 420ce07f..a798fdab 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -73,6 +73,35 @@ class Resources: class Spec: + """Deployment spec (spec.yml) — describes WHERE and HOW to deploy a stack. + + A spec.yml contains deployment-specific infrastructure configuration: + - stack: path to the stack definition + - deploy-to: target platform (k8s-kind, k8s, compose) + - network: ports, http-proxy, acme-email + - resources: CPU/memory limits and reservations + - security: privileged, capabilities, memlock + - volumes: host path mappings for persistent data + - configmaps: directories mounted as k8s ConfigMaps + - config: deployment-specific env var OVERRIDES (see below) + + The config: section is for deployment-specific values only — things + that differ between deployments (hostnames, endpoints, secrets). + Application defaults belong in the compose file's environment section, + not here. If a value would be the same across all deployments of this + stack, it belongs in the compose file, not in spec.yml. + + Good config: entries (deployment-specific): + VALIDATOR_ENTRYPOINT: my-cluster.example.com:8001 + PUBLIC_RPC_ADDRESS: my-node.example.com:8899 + GOSSIP_HOST: 10.0.0.1 + + Bad config: entries (these are application defaults): + RPC_PORT: '8899' # same everywhere, belongs in compose + LIMIT_LEDGER_SIZE: '50000000' # same everywhere, belongs in compose + RUST_LOG: info # same everywhere, belongs in compose + """ + obj: typing.Any file_path: Optional[Path] diff --git a/stack_orchestrator/deploy/webapp/util.py b/stack_orchestrator/deploy/webapp/util.py index 3c536477..84accbcd 100644 --- a/stack_orchestrator/deploy/webapp/util.py +++ b/stack_orchestrator/deploy/webapp/util.py @@ -696,7 +696,7 @@ def deploy_to_k8s(deploy_record, deployment_dir, recreate, logger): if not deploy_record: commands_to_run = ["start"] else: - commands_to_run = ["update"] + commands_to_run = ["update-envs"] for command in commands_to_run: logger.log(f"Running {command} command on deployment dir: {deployment_dir}") diff --git a/stack_orchestrator/ids.py b/stack_orchestrator/ids.py new file mode 100644 index 00000000..e6a67782 --- /dev/null +++ b/stack_orchestrator/ids.py @@ -0,0 +1,47 @@ +"""Sortable timestamp-based ID generation for cluster naming. + +Uses base62 encoding with 100ms resolution and a 2024-01-01 epoch +to produce compact, sortable IDs like 'laconic-iqE6Za'. + +Format: {prefix}-{timestamp}{random} +- timestamp: 5 chars (100ms resolution, ~180 years from 2024) +- random: 2 chars (3,844 unique per 100ms slot) +""" +# Adapted from exophial/src/exophial/ids.py + +import random +import time + +# 2024-01-01 00:00:00 UTC in milliseconds +EPOCH_2024 = 1704067200000 + +# Sortable base62 alphabet (0-9, A-Z, a-z) +ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + + +def _base62(n: int) -> str: + """Encode integer as base62 string.""" + if n == 0: + return ALPHABET[0] + s = "" + while n: + n, r = divmod(n, 62) + s = ALPHABET[r] + s + return s + + +def _random_suffix(length: int = 2) -> str: + """Generate random base62 suffix.""" + return "".join(random.choice(ALPHABET) for _ in range(length)) + + +def _timestamp_id() -> str: + """Generate a sortable timestamp ID (100ms resolution, 2024 epoch) with random suffix.""" + now_ms = int(time.time() * 1000) + offset = (now_ms - EPOCH_2024) // 100 # 100ms resolution + return f"{_base62(offset)}{_random_suffix()}" + + +def generate_id(prefix: str) -> str: + """Generate a sortable ID with an arbitrary prefix like 'laconic-iqE6Za'.""" + return f"{prefix}-{_timestamp_id()}"