fix(k8s): reconcile Caddy image regardless of --skip-cluster-management

The Caddy image reconcile was gated on `not self.skip_cluster_management`
alongside the install path. With --skip-cluster-management being the
default, that meant spec image changes never propagated on normal
restarts — the operator had to pass --perform-cluster-management for
the reconcile to run, which is surprising and conflates cluster
lifecycle ownership with routine workload reconciliation.

Split the two: install still requires cluster-management ownership
(it seeds namespace + secrets + CronJob, which are cluster-creation
concerns). The update-on-reuse patch is a plain k8s-API operation
against a running Deployment — run it in both modes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
pull/749/head
Prathamesh Musale 2026-04-21 09:02:22 +00:00
parent 8712a5ea62
commit d65802f8ce
1 changed files with 16 additions and 14 deletions

View File

@ -881,33 +881,35 @@ class K8sDeployer(Deployer):
check_mounts_compatible(existing, kind_config) check_mounts_compatible(existing, kind_config)
self.connect_api() self.connect_api()
self._ensure_namespace() self._ensure_namespace()
caddy_image = self.cluster_info.spec.get_caddy_ingress_image()
# Fresh-install path: gated on cluster lifecycle ownership
# because install_ingress_for_kind also seeds caddy-system
# (namespace, secrets restore, cert-backup CronJob).
if self.is_kind() and not self.skip_cluster_management: if self.is_kind() and not self.skip_cluster_management:
caddy_image = self.cluster_info.spec.get_caddy_ingress_image()
if not is_ingress_running(): if not is_ingress_running():
# Fresh install — always needs an image; use the spec
# value if set, else the hardcoded default.
install_ingress_for_kind( install_ingress_for_kind(
self.cluster_info.spec.get_acme_email(), self.cluster_info.spec.get_acme_email(),
self.cluster_info.spec.get_kind_mount_root(), self.cluster_info.spec.get_kind_mount_root(),
caddy_image=caddy_image, caddy_image=caddy_image,
) )
wait_for_ingress_in_kind() wait_for_ingress_in_kind()
elif caddy_image is not None:
# Ingress already up AND the operator explicitly set a
# caddy-ingress-image in spec — reconcile the running
# image. Spec absent => don't touch: the operator may
# have set the image out-of-band (ansible playbook,
# prior explicit spec on a different deployment) and a
# silent revert would be worse than doing nothing.
# Note: caddy-system is cluster-scoped, so whichever
# deployment's spec sets the image last, wins.
if update_caddy_ingress_image(caddy_image):
wait_for_ingress_in_kind()
if self.cluster_info.spec.get_unlimited_memlock(): if self.cluster_info.spec.get_unlimited_memlock():
_create_runtime_class( _create_runtime_class(
constants.high_memlock_runtime, constants.high_memlock_runtime,
constants.high_memlock_runtime, constants.high_memlock_runtime,
) )
# Reconcile Caddy image whenever the operator explicitly set
# it in spec, regardless of cluster lifecycle ownership —
# --skip-cluster-management (the default) shouldn't prevent
# a routine k8s-API-level patch of a running Deployment.
# Spec absent => don't touch: the operator may have set the
# image out-of-band (ansible playbook, prior explicit spec on
# a different deployment) and a silent revert would be worse
# than doing nothing. caddy-system is cluster-scoped, so
# whichever deployment's spec sets the image last wins.
if self.is_kind() and caddy_image is not None and is_ingress_running():
if update_caddy_ingress_image(caddy_image):
wait_for_ingress_in_kind()
def _create_ingress(self): def _create_ingress(self):
"""Create or update Ingress with TLS certificate lookup.""" """Create or update Ingress with TLS certificate lookup."""