diff --git a/docs/deployment_patterns.md b/docs/deployment_patterns.md index e716f2e0..0eb2548a 100644 --- a/docs/deployment_patterns.md +++ b/docs/deployment_patterns.md @@ -213,14 +213,23 @@ Its image is configurable per deployment: caddy-ingress-image: ghcr.io/laconicnetwork/caddy-ingress:v1.2.3 ``` -Defaults to `ghcr.io/laconicnetwork/caddy-ingress:latest` when not set. +Two cases, intentionally different: -On subsequent `deployment start`, if the running Caddy image differs -from the spec value, laconic-so patches the Caddy Deployment to the -new image. The Deployment uses `strategy: Recreate` (the hostPort -80/443 binding blocks a rolling update from ever completing), so -expect ~10–30s of ingress downtime while the old pod terminates and -the new one starts. +- **Spec key set**: on first install the manifest is templated with + this image. On subsequent `deployment start`, if the running Caddy + Deployment's image differs, laconic-so patches it and waits for the + rollout. The Deployment uses `strategy: Recreate` (hostPort 80/443 + blocks rolling updates from ever completing), so expect ~10–30s of + ingress downtime while the old pod terminates and the new one + starts. +- **Spec key absent**: on first install the manifest's hardcoded + default (`ghcr.io/laconicnetwork/caddy-ingress:latest`) is used. + On subsequent `deployment start`, laconic-so does **not** touch the + running Caddy Deployment. This matters when the image was set + out-of-band (via an ansible playbook, or by another deployment's + spec that's since been removed) — a silent revert to the default + would be worse than doing nothing. If you want to go back to the + default image, set `caddy-ingress-image` to it explicitly. **Cluster-scoped caveat**: `caddy-system` is shared by every deployment on the cluster. Setting `caddy-ingress-image` in any one diff --git a/stack_orchestrator/deploy/k8s/deploy_k8s.py b/stack_orchestrator/deploy/k8s/deploy_k8s.py index 7912ff1c..a706b1c1 100644 --- a/stack_orchestrator/deploy/k8s/deploy_k8s.py +++ b/stack_orchestrator/deploy/k8s/deploy_k8s.py @@ -884,19 +884,23 @@ class K8sDeployer(Deployer): if self.is_kind() and not self.skip_cluster_management: caddy_image = self.cluster_info.spec.get_caddy_ingress_image() if not is_ingress_running(): + # Fresh install — always needs an image; use the spec + # value if set, else the hardcoded default. install_ingress_for_kind( self.cluster_info.spec.get_acme_email(), self.cluster_info.spec.get_kind_mount_root(), caddy_image=caddy_image, ) wait_for_ingress_in_kind() - else: - # Ingress is already up from a prior start — reconcile - # the running image against this deployment's spec. - # Patches only if they differ. Note: caddy-system is - # cluster-scoped, so every deployment sharing the - # cluster effectively votes on the image; last start - # wins. Documented in deployment_patterns.md. + elif caddy_image is not None: + # Ingress already up AND the operator explicitly set a + # caddy-ingress-image in spec — reconcile the running + # image. Spec absent => don't touch: the operator may + # have set the image out-of-band (ansible playbook, + # prior explicit spec on a different deployment) and a + # silent revert would be worse than doing nothing. + # Note: caddy-system is cluster-scoped, so whichever + # deployment's spec sets the image last, wins. if update_caddy_ingress_image(caddy_image): wait_for_ingress_in_kind() if self.cluster_info.spec.get_unlimited_memlock(): diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 7194d3ca..fc12cbef 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -488,6 +488,8 @@ def install_ingress_for_kind( if opts.o.debug: print(f"Configured Caddy with ACME email: {acme_email}") + # Substitute image only when an override is requested; otherwise + # leave the hardcoded default in the manifest. if caddy_image and caddy_image != constants.default_caddy_ingress_image: yaml_content = yaml_content.replace( constants.default_caddy_ingress_image, caddy_image diff --git a/stack_orchestrator/deploy/spec.py b/stack_orchestrator/deploy/spec.py index a18c14bc..fcdc9de8 100644 --- a/stack_orchestrator/deploy/spec.py +++ b/stack_orchestrator/deploy/spec.py @@ -304,18 +304,23 @@ class Spec: """ return self.obj.get(constants.kind_mount_root_key) - def get_caddy_ingress_image(self) -> str: - """Return the Caddy ingress controller image to deploy/patch. + def get_caddy_ingress_image(self) -> typing.Optional[str]: + """Return the Caddy ingress controller image override, or None. - Defaults to the upstream tag when not set in spec. Cluster- - scoped: the Caddy ingress lives in the shared `caddy-system` - namespace, so setting this key in any deployment's spec will - roll the controller for every deployment using the cluster. + Returns None (not the default image) when the spec key is + absent. That distinction matters: the install path falls back + to the hardcoded default so there's always *some* image to + deploy, while the update-on-reuse path treats None as "operator + didn't ask to touch Caddy" and skips the patch — avoiding + silent reverts of an image set out-of-band (e.g. via an + ansible playbook or a prior deployment's spec). + + Cluster-scoped: the Caddy ingress lives in the shared + `caddy-system` namespace, so setting this key in any + deployment's spec rolls the controller for every deployment + using the cluster. """ - return self.obj.get( - constants.caddy_ingress_image_key, - constants.default_caddy_ingress_image, - ) + return self.obj.get(constants.caddy_ingress_image_key) def get_maintenance_service(self) -> typing.Optional[str]: """Return maintenance-service value (e.g. 'dumpster-maintenance:8000') or None.