Merge branch 'bar-822-kind-load-after-rebuild'

# Conflicts:
#	stack-orchestrator/stack_orchestrator/deploy/k8s/deploy_k8s.py
#	stack-orchestrator/stack_orchestrator/deploy/k8s/helpers.py
fix/kind-mount-propagation
A. F. Dudley 2026-03-10 16:53:55 +00:00
commit b129aaa9a5
2 changed files with 126 additions and 20 deletions

View File

@ -25,16 +25,19 @@ from stack_orchestrator.deploy.deployer import Deployer, DeployerConfigGenerator
from stack_orchestrator.deploy.deployment_context import DeploymentContext
from stack_orchestrator.deploy.k8s.cluster_info import ClusterInfo
from stack_orchestrator.deploy.k8s.helpers import (
connect_registry_to_kind_network,
containers_in_pod,
create_cluster,
destroy_cluster,
ensure_local_registry,
generate_high_memlock_spec_json,
generate_kind_config,
install_ingress_for_kind,
is_ingress_running,
load_images_into_kind,
local_registry_image,
log_stream_from_string,
pods_in_deployment,
push_images_to_local_registry,
wait_for_ingress_in_kind,
)
from stack_orchestrator.opts import opts
@ -450,11 +453,29 @@ class K8sDeployer(Deployer):
deployment = self.cluster_info.get_deployment(
image_pull_policy=None if self.is_kind() else "Always"
)
if self.is_kind():
self._rewrite_local_images(deployment)
if opts.o.debug:
print(f"Sending this deployment: {deployment}")
if not opts.o.dry_run:
self._ensure_deployment(deployment)
def _rewrite_local_images(self, deployment):
"""Rewrite local container images to use the local registry.
Images built locally (listed in stack.yml containers) are pushed to
localhost:5001 by push_images_to_local_registry(). The k8s pod spec
must reference them at that address so containerd pulls from the
local registry instead of trying to find them in its local store.
"""
local_containers = self.deployment_context.stack.obj.get("containers", [])
if not local_containers:
return
containers = deployment.spec.template.spec.containers or []
for container in containers:
if any(c in container.image for c in local_containers):
container.image = local_registry_image(container.image)
def _find_certificate_for_host_name(self, host_name):
all_certificates = self.custom_obj_api.list_namespaced_custom_object(
group="cert-manager.io",
@ -504,10 +525,12 @@ class K8sDeployer(Deployer):
self.skip_cluster_management = skip_cluster_management
if not opts.o.dry_run:
if self.is_kind() and not self.skip_cluster_management:
ensure_local_registry()
kind_config = str(self.deployment_dir.joinpath(constants.kind_config_filename))
actual_cluster = create_cluster(self.kind_cluster_name, kind_config)
if actual_cluster != self.kind_cluster_name:
self.kind_cluster_name = actual_cluster
connect_registry_to_kind_network(self.kind_cluster_name)
local_containers = self.deployment_context.stack.obj.get("containers", [])
if local_containers:
local_images = {
@ -516,7 +539,7 @@ class K8sDeployer(Deployer):
if any(c in img for c in local_containers)
}
if local_images:
load_images_into_kind(self.kind_cluster_name, local_images)
push_images_to_local_registry(local_images)
self.connect_api()
self._ensure_namespace()
if self.is_kind() and not self.skip_cluster_management:

View File

@ -398,11 +398,82 @@ def install_ingress_for_kind(acme_email: str = ""):
)
def load_images_into_kind(kind_cluster_name: str, image_set: set[str]):
for image in image_set:
result = _run_command(f"kind load docker-image {image} --name {kind_cluster_name}")
LOCAL_REGISTRY_NAME = "kind-registry"
LOCAL_REGISTRY_HOST_PORT = 5001
LOCAL_REGISTRY_CONTAINER_PORT = 5000
def ensure_local_registry():
"""Ensure a persistent local registry container is running.
The registry survives kind cluster recreates images pushed to it
remain available without re-pushing. After ensuring the registry is
running, connects it to the kind Docker network so kind nodes can
pull from it.
"""
# Check if registry container exists (running or stopped)
check = subprocess.run(
f"docker inspect {LOCAL_REGISTRY_NAME}",
shell=True,
capture_output=True,
)
if check.returncode != 0:
# Create the registry container
result = _run_command(
f"docker run -d --restart=always"
f" -p {LOCAL_REGISTRY_HOST_PORT}:{LOCAL_REGISTRY_CONTAINER_PORT}"
f" --name {LOCAL_REGISTRY_NAME} registry:2"
)
if result.returncode != 0:
raise DeployerException(f"kind load docker-image failed: {result}")
raise DeployerException(f"Failed to start local registry: {result}")
print(f"Started local registry on port {LOCAL_REGISTRY_HOST_PORT}")
else:
# Ensure it's running (may have been stopped)
_run_command(f"docker start {LOCAL_REGISTRY_NAME}")
if opts.o.debug:
print("Local registry already exists, ensured running")
def connect_registry_to_kind_network(kind_cluster_name: str):
"""Connect the local registry to the kind Docker network.
Idempotent silently succeeds if already connected.
"""
network = "kind"
result = subprocess.run(
f"docker network connect {network} {LOCAL_REGISTRY_NAME}",
shell=True,
capture_output=True,
)
if result.returncode != 0 and b"already exists" not in result.stderr:
raise DeployerException(
f"Failed to connect registry to kind network: " f"{result.stderr.decode()}"
)
def push_images_to_local_registry(image_set: set[str]):
"""Tag and push images to the local registry.
Near-instant compared to kind load (shared filesystem, layer dedup).
"""
for image in image_set:
registry_image = local_registry_image(image)
tag_result = _run_command(f"docker tag {image} {registry_image}")
if tag_result.returncode != 0:
raise DeployerException(f"docker tag failed for {image}: {tag_result}")
push_result = _run_command(f"docker push {registry_image}")
if push_result.returncode != 0:
raise DeployerException(f"docker push failed for {registry_image}: {push_result}")
if opts.o.debug:
print(f"Pushed {registry_image} to local registry")
def local_registry_image(image: str) -> str:
"""Rewrite an image reference to use the local registry.
e.g. laconicnetwork/agave:local -> localhost:5001/laconicnetwork/agave:local
"""
return f"localhost:{LOCAL_REGISTRY_HOST_PORT}/{image}"
def pods_in_deployment(core_api: client.CoreV1Api, deployment_name: str):
@ -870,25 +941,37 @@ def generate_cri_base_json():
def _generate_containerd_config_patches(deployment_dir: Path, has_high_memlock: bool) -> str:
"""Generate containerdConfigPatches YAML for custom runtime handlers.
"""Generate containerdConfigPatches YAML for containerd configuration.
This configures containerd to have a runtime handler named 'high-memlock'
that uses a custom OCI base spec with unlimited RLIMIT_MEMLOCK.
Includes:
- Local registry mirror (localhost:5001 -> http://kind-registry:5000)
- Custom runtime handler for high-memlock (if enabled)
"""
if not has_high_memlock:
return ""
patches = []
# Always configure the local registry mirror so kind nodes pull from it
registry_plugin = f'plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:{LOCAL_REGISTRY_HOST_PORT}"'
endpoint = f"http://{LOCAL_REGISTRY_NAME}:{LOCAL_REGISTRY_CONTAINER_PORT}"
patches.append(f" [{registry_plugin}]\n" f' endpoint = ["{endpoint}"]')
if has_high_memlock:
spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve()
runtime_name = constants.high_memlock_runtime
plugin_path = 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes'
return (
"containerdConfigPatches:\n"
" - |-\n"
patches.append(
f" [{plugin_path}.{runtime_name}]\n"
' runtime_type = "io.containerd.runc.v2"\n'
f' base_runtime_spec = "{spec_path}"\n'
f' base_runtime_spec = "{spec_path}"'
)
if not patches:
return ""
result = "containerdConfigPatches:\n"
for patch in patches:
result += " - |-\n" + patch + "\n"
return result
# Note: this makes any duplicate definition in b overwrite a
def merge_envs(a: Mapping[str, str], b: Mapping[str, str]) -> Mapping[str, str]: