Compare commits

..

No commits in common. "main" and "v1.1.0-21d4790-202602032231" have entirely different histories.

6 changed files with 109 additions and 254 deletions

19
TODO.md
View File

@ -7,25 +7,6 @@ We need an "update stack" command in stack orchestrator and cleaner documentatio
**Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment. **Context**: Currently, `deploy init` generates a spec file and `deploy create` creates a deployment directory. The `deployment update` command (added by Thomas Lackey) only syncs env vars and restarts - it doesn't regenerate configurations. There's a gap in the workflow for updating stack configurations after initial deployment.
## Bugs
### `deploy create` doesn't auto-generate volume mappings for new pods
When a new pod is added to `stack.yml` (e.g. `monitoring`), `deploy create`
does not generate default host path mappings in spec.yml for the new pod's
volumes. The deployment then fails at scheduling because the PVCs don't exist.
**Expected**: `deploy create` enumerates all volumes from all compose files
in the stack and generates default host paths for any that aren't already
mapped in the spec.yml `volumes:` section.
**Actual**: Only volumes already in spec.yml get PVs. New volumes are silently
missing, causing `FailedScheduling: persistentvolumeclaim not found`.
**Workaround**: Manually add volume entries to spec.yml and create host dirs.
**Files**: `deployment_create.py` (`_write_config_file`, volume handling)
## Architecture Refactoring ## Architecture Refactoring
### Separate Deployer from Stack Orchestrator CLI ### Separate Deployer from Stack Orchestrator CLI

View File

@ -31,7 +31,6 @@ from stack_orchestrator.deploy.k8s.helpers import (
envs_from_environment_variables_map, envs_from_environment_variables_map,
envs_from_compose_file, envs_from_compose_file,
merge_envs, merge_envs,
translate_sidecar_service_names,
) )
from stack_orchestrator.deploy.deploy_util import ( from stack_orchestrator.deploy.deploy_util import (
parsed_pod_files_map_from_file_names, parsed_pod_files_map_from_file_names,
@ -126,8 +125,7 @@ class ClusterInfo:
name=( name=(
f"{self.app_name}-nodeport-" f"{self.app_name}-nodeport-"
f"{pod_port}-{protocol.lower()}" f"{pod_port}-{protocol.lower()}"
), )
labels={"app": self.app_name},
), ),
spec=client.V1ServiceSpec( spec=client.V1ServiceSpec(
type="NodePort", type="NodePort",
@ -210,9 +208,7 @@ class ClusterInfo:
ingress = client.V1Ingress( ingress = client.V1Ingress(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-ingress", name=f"{self.app_name}-ingress", annotations=ingress_annotations
labels={"app": self.app_name},
annotations=ingress_annotations,
), ),
spec=spec, spec=spec,
) )
@ -242,10 +238,7 @@ class ClusterInfo:
] ]
service = client.V1Service( service = client.V1Service(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(name=f"{self.app_name}-service"),
name=f"{self.app_name}-service",
labels={"app": self.app_name},
),
spec=client.V1ServiceSpec( spec=client.V1ServiceSpec(
type="ClusterIP", type="ClusterIP",
ports=service_ports, ports=service_ports,
@ -327,7 +320,7 @@ class ClusterInfo:
spec = client.V1ConfigMap( spec = client.V1ConfigMap(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{cfg_map_name}", name=f"{self.app_name}-{cfg_map_name}",
labels={"app": self.app_name, "configmap-label": cfg_map_name}, labels={"configmap-label": cfg_map_name},
), ),
binary_data=data, binary_data=data,
) )
@ -384,53 +377,20 @@ class ClusterInfo:
pv = client.V1PersistentVolume( pv = client.V1PersistentVolume(
metadata=client.V1ObjectMeta( metadata=client.V1ObjectMeta(
name=f"{self.app_name}-{volume_name}", name=f"{self.app_name}-{volume_name}",
labels={ labels={"volume-label": f"{self.app_name}-{volume_name}"},
"app": self.app_name,
"volume-label": f"{self.app_name}-{volume_name}",
},
), ),
spec=spec, spec=spec,
) )
result.append(pv) result.append(pv)
return result return result
def _any_service_has_host_network(self):
for pod_name in self.parsed_pod_yaml_map:
pod = self.parsed_pod_yaml_map[pod_name]
for svc in pod.get("services", {}).values():
if svc.get("network_mode") == "host":
return True
return False
def _resolve_container_resources(
self, container_name: str, service_info: dict, global_resources: Resources
) -> Resources:
"""Resolve resources for a container using layered priority.
Priority: spec per-container > compose deploy.resources
> spec global > DEFAULT
"""
# 1. Check spec.yml for per-container override
per_container = self.spec.get_container_resources_for(container_name)
if per_container:
return per_container
# 2. Check compose service_info for deploy.resources
deploy_block = service_info.get("deploy", {})
compose_resources = deploy_block.get("resources", {}) if deploy_block else {}
if compose_resources:
return Resources(compose_resources)
# 3. Fall back to spec.yml global (already resolved with DEFAULT fallback)
return global_resources
# TODO: put things like image pull policy into an object-scope struct # TODO: put things like image pull policy into an object-scope struct
def get_deployment(self, image_pull_policy: Optional[str] = None): def get_deployment(self, image_pull_policy: Optional[str] = None):
containers = [] containers = []
services = {} services = {}
global_resources = self.spec.get_container_resources() resources = self.spec.get_container_resources()
if not global_resources: if not resources:
global_resources = DEFAULT_CONTAINER_RESOURCES resources = DEFAULT_CONTAINER_RESOURCES
for pod_name in self.parsed_pod_yaml_map: for pod_name in self.parsed_pod_yaml_map:
pod = self.parsed_pod_yaml_map[pod_name] pod = self.parsed_pod_yaml_map[pod_name]
services = pod["services"] services = pod["services"]
@ -470,12 +430,6 @@ class ClusterInfo:
if "environment" in service_info if "environment" in service_info
else self.environment_variables.map else self.environment_variables.map
) )
# Translate docker-compose service names to localhost for sidecars
# All services in the same pod share the network namespace
sibling_services = [s for s in services.keys() if s != service_name]
merged_envs = translate_sidecar_service_names(
merged_envs, sibling_services
)
envs = envs_from_environment_variables_map(merged_envs) envs = envs_from_environment_variables_map(merged_envs)
if opts.o.debug: if opts.o.debug:
print(f"Merged envs: {envs}") print(f"Merged envs: {envs}")
@ -513,9 +467,6 @@ class ClusterInfo:
) )
) )
] ]
container_resources = self._resolve_container_resources(
container_name, service_info, global_resources
)
container = client.V1Container( container = client.V1Container(
name=container_name, name=container_name,
image=image_to_use, image=image_to_use,
@ -534,7 +485,7 @@ class ClusterInfo:
if self.spec.get_capabilities() if self.spec.get_capabilities()
else None, else None,
), ),
resources=to_k8s_resource_requirements(container_resources), resources=to_k8s_resource_requirements(resources),
) )
containers.append(container) containers.append(container)
volumes = volumes_for_pod_files( volumes = volumes_for_pod_files(
@ -601,7 +552,6 @@ class ClusterInfo:
) )
) )
use_host_network = self._any_service_has_host_network()
template = client.V1PodTemplateSpec( template = client.V1PodTemplateSpec(
metadata=client.V1ObjectMeta(annotations=annotations, labels=labels), metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
spec=client.V1PodSpec( spec=client.V1PodSpec(
@ -611,8 +561,6 @@ class ClusterInfo:
affinity=affinity, affinity=affinity,
tolerations=tolerations, tolerations=tolerations,
runtime_class_name=self.spec.get_runtime_class(), runtime_class_name=self.spec.get_runtime_class(),
host_network=use_host_network or None,
dns_policy=("ClusterFirstWithHostNet" if use_host_network else None),
), ),
) )
spec = client.V1DeploymentSpec( spec = client.V1DeploymentSpec(

View File

@ -96,7 +96,7 @@ class K8sDeployer(Deployer):
core_api: client.CoreV1Api core_api: client.CoreV1Api
apps_api: client.AppsV1Api apps_api: client.AppsV1Api
networking_api: client.NetworkingV1Api networking_api: client.NetworkingV1Api
k8s_namespace: str k8s_namespace: str = "default"
kind_cluster_name: str kind_cluster_name: str
skip_cluster_management: bool skip_cluster_management: bool
cluster_info: ClusterInfo cluster_info: ClusterInfo
@ -113,7 +113,6 @@ class K8sDeployer(Deployer):
) -> None: ) -> None:
self.type = type self.type = type
self.skip_cluster_management = False self.skip_cluster_management = False
self.k8s_namespace = "default" # Will be overridden below if context exists
# TODO: workaround pending refactoring above to cope with being # TODO: workaround pending refactoring above to cope with being
# created with a null deployment_context # created with a null deployment_context
if deployment_context is None: if deployment_context is None:
@ -121,8 +120,6 @@ class K8sDeployer(Deployer):
self.deployment_dir = deployment_context.deployment_dir self.deployment_dir = deployment_context.deployment_dir
self.deployment_context = deployment_context self.deployment_context = deployment_context
self.kind_cluster_name = compose_project_name self.kind_cluster_name = compose_project_name
# Use deployment-specific namespace for resource isolation and easy cleanup
self.k8s_namespace = f"laconic-{compose_project_name}"
self.cluster_info = ClusterInfo() self.cluster_info = ClusterInfo()
self.cluster_info.int( self.cluster_info.int(
compose_files, compose_files,
@ -152,46 +149,6 @@ class K8sDeployer(Deployer):
self.apps_api = client.AppsV1Api() self.apps_api = client.AppsV1Api()
self.custom_obj_api = client.CustomObjectsApi() self.custom_obj_api = client.CustomObjectsApi()
def _ensure_namespace(self):
"""Create the deployment namespace if it doesn't exist."""
if opts.o.dry_run:
print(f"Dry run: would create namespace {self.k8s_namespace}")
return
try:
self.core_api.read_namespace(name=self.k8s_namespace)
if opts.o.debug:
print(f"Namespace {self.k8s_namespace} already exists")
except ApiException as e:
if e.status == 404:
# Create the namespace
ns = client.V1Namespace(
metadata=client.V1ObjectMeta(
name=self.k8s_namespace,
labels={"app": self.cluster_info.app_name},
)
)
self.core_api.create_namespace(body=ns)
if opts.o.debug:
print(f"Created namespace {self.k8s_namespace}")
else:
raise
def _delete_namespace(self):
"""Delete the deployment namespace and all resources within it."""
if opts.o.dry_run:
print(f"Dry run: would delete namespace {self.k8s_namespace}")
return
try:
self.core_api.delete_namespace(name=self.k8s_namespace)
if opts.o.debug:
print(f"Deleted namespace {self.k8s_namespace}")
except ApiException as e:
if e.status == 404:
if opts.o.debug:
print(f"Namespace {self.k8s_namespace} not found")
else:
raise
def _create_volume_data(self): def _create_volume_data(self):
# Create the host-path-mounted PVs for this deployment # Create the host-path-mounted PVs for this deployment
pvs = self.cluster_info.get_pvs() pvs = self.cluster_info.get_pvs()
@ -357,8 +314,6 @@ class K8sDeployer(Deployer):
load_images_into_kind(self.kind_cluster_name, local_images) load_images_into_kind(self.kind_cluster_name, local_images)
# Note: if no local containers defined, all images come from registries # Note: if no local containers defined, all images come from registries
self.connect_api() self.connect_api()
# Create deployment-specific namespace for resource isolation
self._ensure_namespace()
if self.is_kind() and not self.skip_cluster_management: if self.is_kind() and not self.skip_cluster_management:
# Configure ingress controller (not installed by default in kind) # Configure ingress controller (not installed by default in kind)
# Skip if already running (idempotent for shared cluster) # Skip if already running (idempotent for shared cluster)
@ -426,30 +381,107 @@ class K8sDeployer(Deployer):
print("NodePort created:") print("NodePort created:")
print(f"{nodeport_resp}") print(f"{nodeport_resp}")
def down(self, timeout, volumes, skip_cluster_management): def down(self, timeout, volumes, skip_cluster_management): # noqa: C901
self.skip_cluster_management = skip_cluster_management self.skip_cluster_management = skip_cluster_management
self.connect_api() self.connect_api()
# Delete the k8s objects
# PersistentVolumes are cluster-scoped (not namespaced), so delete by label
if volumes: if volumes:
try: # Create the host-path-mounted PVs for this deployment
pvs = self.core_api.list_persistent_volume( pvs = self.cluster_info.get_pvs()
label_selector=f"app={self.cluster_info.app_name}" for pv in pvs:
)
for pv in pvs.items:
if opts.o.debug:
print(f"Deleting PV: {pv.metadata.name}")
try:
self.core_api.delete_persistent_volume(name=pv.metadata.name)
except ApiException as e:
_check_delete_exception(e)
except ApiException as e:
if opts.o.debug: if opts.o.debug:
print(f"Error listing PVs: {e}") print(f"Deleting this pv: {pv}")
try:
pv_resp = self.core_api.delete_persistent_volume(
name=pv.metadata.name
)
if opts.o.debug:
print("PV deleted:")
print(f"{pv_resp}")
except ApiException as e:
_check_delete_exception(e)
# Delete the deployment namespace - this cascades to all namespaced resources # Figure out the PVCs for this deployment
# (PVCs, ConfigMaps, Deployments, Services, Ingresses, etc.) pvcs = self.cluster_info.get_pvcs()
self._delete_namespace() for pvc in pvcs:
if opts.o.debug:
print(f"Deleting this pvc: {pvc}")
try:
pvc_resp = self.core_api.delete_namespaced_persistent_volume_claim(
name=pvc.metadata.name, namespace=self.k8s_namespace
)
if opts.o.debug:
print("PVCs deleted:")
print(f"{pvc_resp}")
except ApiException as e:
_check_delete_exception(e)
# Figure out the ConfigMaps for this deployment
cfg_maps = self.cluster_info.get_configmaps()
for cfg_map in cfg_maps:
if opts.o.debug:
print(f"Deleting this ConfigMap: {cfg_map}")
try:
cfg_map_resp = self.core_api.delete_namespaced_config_map(
name=cfg_map.metadata.name, namespace=self.k8s_namespace
)
if opts.o.debug:
print("ConfigMap deleted:")
print(f"{cfg_map_resp}")
except ApiException as e:
_check_delete_exception(e)
deployment = self.cluster_info.get_deployment()
if opts.o.debug:
print(f"Deleting this deployment: {deployment}")
if deployment and deployment.metadata and deployment.metadata.name:
try:
self.apps_api.delete_namespaced_deployment(
name=deployment.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
service = self.cluster_info.get_service()
if opts.o.debug:
print(f"Deleting service: {service}")
if service and service.metadata and service.metadata.name:
try:
self.core_api.delete_namespaced_service(
namespace=self.k8s_namespace, name=service.metadata.name
)
except ApiException as e:
_check_delete_exception(e)
ingress = self.cluster_info.get_ingress(use_tls=not self.is_kind())
if ingress and ingress.metadata and ingress.metadata.name:
if opts.o.debug:
print(f"Deleting this ingress: {ingress}")
try:
self.networking_api.delete_namespaced_ingress(
name=ingress.metadata.name, namespace=self.k8s_namespace
)
except ApiException as e:
_check_delete_exception(e)
else:
if opts.o.debug:
print("No ingress to delete")
nodeports: List[client.V1Service] = self.cluster_info.get_nodeports()
for nodeport in nodeports:
if opts.o.debug:
print(f"Deleting this nodeport: {nodeport}")
if nodeport.metadata and nodeport.metadata.name:
try:
self.core_api.delete_namespaced_service(
namespace=self.k8s_namespace, name=nodeport.metadata.name
)
except ApiException as e:
_check_delete_exception(e)
else:
if opts.o.debug:
print("No nodeport to delete")
if self.is_kind() and not self.skip_cluster_management: if self.is_kind() and not self.skip_cluster_management:
# Destroy the kind cluster # Destroy the kind cluster
@ -587,7 +619,7 @@ class K8sDeployer(Deployer):
log_data = "" log_data = ""
for container in containers: for container in containers:
container_log = self.core_api.read_namespaced_pod_log( container_log = self.core_api.read_namespaced_pod_log(
k8s_pod_name, namespace=self.k8s_namespace, container=container k8s_pod_name, namespace="default", container=container
) )
container_log_lines = container_log.splitlines() container_log_lines = container_log.splitlines()
for line in container_log_lines: for line in container_log_lines:

View File

@ -942,41 +942,6 @@ def envs_from_compose_file(
return result return result
def translate_sidecar_service_names(
envs: Mapping[str, str], sibling_service_names: List[str]
) -> Mapping[str, str]:
"""Translate docker-compose service names to localhost for sidecar containers.
In docker-compose, services can reference each other by name (e.g., 'db:5432').
In Kubernetes, when multiple containers are in the same pod (sidecars), they
share the same network namespace and must use 'localhost' instead.
This function replaces service name references with 'localhost' in env values.
"""
import re
if not sibling_service_names:
return envs
result = {}
for env_var, env_val in envs.items():
if env_val is None:
result[env_var] = env_val
continue
new_val = str(env_val)
for service_name in sibling_service_names:
# Match service name followed by optional port (e.g., 'db:5432', 'db')
# Handle URLs like: postgres://user:pass@db:5432/dbname
# and simple refs like: db:5432 or just db
pattern = rf"\b{re.escape(service_name)}(:\d+)?\b"
new_val = re.sub(pattern, lambda m: f'localhost{m.group(1) or ""}', new_val)
result[env_var] = new_val
return result
def envs_from_environment_variables_map( def envs_from_environment_variables_map(
map: Mapping[str, str] map: Mapping[str, str]
) -> List[client.V1EnvVar]: ) -> List[client.V1EnvVar]:

View File

@ -120,27 +120,6 @@ class Spec:
self.obj.get(constants.resources_key, {}).get("containers", {}) self.obj.get(constants.resources_key, {}).get("containers", {})
) )
def get_container_resources_for(
self, container_name: str
) -> typing.Optional[Resources]:
"""Look up per-container resource overrides from spec.yml.
Checks resources.containers.<container_name> in the spec. Returns None
if no per-container override exists (caller falls back to other sources).
"""
containers_block = self.obj.get(constants.resources_key, {}).get(
"containers", {}
)
if container_name in containers_block:
entry = containers_block[container_name]
# Only treat it as a per-container override if it's a dict with
# reservations/limits nested inside (not a top-level global key)
if isinstance(entry, dict) and (
"reservations" in entry or "limits" in entry
):
return Resources(entry)
return None
def get_volume_resources(self): def get_volume_resources(self):
return Resources( return Resources(
self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {}) self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})
@ -149,6 +128,9 @@ class Spec:
def get_http_proxy(self): def get_http_proxy(self):
return self.obj.get(constants.network_key, {}).get(constants.http_proxy_key, []) return self.obj.get(constants.network_key, {}).get(constants.http_proxy_key, [])
def get_acme_email(self):
return self.obj.get(constants.network_key, {}).get("acme-email", "")
def get_annotations(self): def get_annotations(self):
return self.obj.get(constants.annotations_key, {}) return self.obj.get(constants.annotations_key, {})

View File

@ -1,53 +0,0 @@
#!/bin/bash
# Run a test suite locally in an isolated venv.
#
# Usage:
# ./tests/scripts/run-test-local.sh <test-script>
#
# Examples:
# ./tests/scripts/run-test-local.sh tests/webapp-test/run-webapp-test.sh
# ./tests/scripts/run-test-local.sh tests/smoke-test/run-smoke-test.sh
# ./tests/scripts/run-test-local.sh tests/k8s-deploy/run-deploy-test.sh
#
# The script creates a temporary venv, installs shiv, builds the laconic-so
# package, runs the requested test, then cleans up.
set -euo pipefail
if [ $# -lt 1 ]; then
echo "Usage: $0 <test-script> [args...]"
exit 1
fi
TEST_SCRIPT="$1"
shift
if [ ! -f "$TEST_SCRIPT" ]; then
echo "Error: $TEST_SCRIPT not found"
exit 1
fi
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
VENV_DIR=$(mktemp -d /tmp/so-test-XXXXXX)
cleanup() {
echo "Cleaning up venv: $VENV_DIR"
rm -rf "$VENV_DIR"
}
trap cleanup EXIT
cd "$REPO_DIR"
echo "==> Creating venv in $VENV_DIR"
python3 -m venv "$VENV_DIR"
source "$VENV_DIR/bin/activate"
echo "==> Installing shiv"
pip install -q shiv
echo "==> Building laconic-so package"
./scripts/create_build_tag_file.sh
./scripts/build_shiv_package.sh
echo "==> Running: $TEST_SCRIPT $*"
exec "./$TEST_SCRIPT" "$@"