fix(k8s): drop stale ACME accounts during etcd cleanup
_clean_etcd_keeping_certs() preserved ALL caddy-system secrets across cluster recreations, including ACME account secrets registered with wrong/empty email. Caddy reuses these stale accounts instead of registering fresh ones, causing recurring "unable to parse email address" errors. Filter the etcd restore loop to only keep certificate secrets (keys matching *certificates*). ACME accounts, OCSP staples, and locks are transient and get recreated automatically by Caddy on startup. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>fix-etcd-drop-stale-acme
parent
4a1b5d86fd
commit
6a2f2a5dde
|
|
@ -118,12 +118,17 @@ def _get_etcd_host_path_from_kind_config(config_file: str) -> Optional[str]:
|
||||||
|
|
||||||
|
|
||||||
def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
|
def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
|
||||||
"""Clean persisted etcd, keeping only TLS certificates.
|
"""Clean persisted etcd, keeping only TLS certificate secrets.
|
||||||
|
|
||||||
When etcd is persisted and a cluster is recreated, kind tries to install
|
When etcd is persisted and a cluster is recreated, kind tries to install
|
||||||
resources fresh but they already exist. Instead of trying to delete
|
resources fresh but they already exist. Instead of trying to delete
|
||||||
specific stale resources (blacklist), we keep only the valuable data
|
specific stale resources (blacklist), we keep only the valuable data
|
||||||
(caddy TLS certs) and delete everything else (whitelist approach).
|
(caddy TLS certificate secrets) and delete everything else.
|
||||||
|
|
||||||
|
ACME account secrets are intentionally NOT preserved — they may contain
|
||||||
|
stale registrations (e.g. with wrong/empty email) that cause "unable to
|
||||||
|
parse email address" errors. Caddy re-registers accounts automatically
|
||||||
|
on startup using the email from the ConfigMap.
|
||||||
|
|
||||||
The etcd image is distroless (no shell), so we extract the statically-linked
|
The etcd image is distroless (no shell), so we extract the statically-linked
|
||||||
etcdctl binary and run it from alpine which has shell support.
|
etcdctl binary and run it from alpine which has shell support.
|
||||||
|
|
@ -218,6 +223,11 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
|
||||||
jq -r ".kvs[] | @base64" /backup/kept.json 2>/dev/null | \
|
jq -r ".kvs[] | @base64" /backup/kept.json 2>/dev/null | \
|
||||||
while read encoded; do
|
while read encoded; do
|
||||||
key=$(echo $encoded | base64 -d | jq -r ".key" | base64 -d)
|
key=$(echo $encoded | base64 -d | jq -r ".key" | base64 -d)
|
||||||
|
# Keep only certificate secrets, drop ACME accounts/locks/OCSP
|
||||||
|
case "$key" in
|
||||||
|
*certificates*) ;;
|
||||||
|
*) continue ;;
|
||||||
|
esac
|
||||||
val=$(echo $encoded | base64 -d | jq -r ".value" | base64 -d)
|
val=$(echo $encoded | base64 -d | jq -r ".value" | base64 -d)
|
||||||
echo "$val" | /backup/etcdctl put "$key"
|
echo "$val" | /backup/etcdctl put "$key"
|
||||||
done
|
done
|
||||||
|
|
@ -259,7 +269,7 @@ def _clean_etcd_keeping_certs(etcd_path: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if opts.o.debug:
|
if opts.o.debug:
|
||||||
print("Cleaned etcd, kept only TLS certificates")
|
print("Cleaned etcd, kept only TLS certificate secrets (dropped ACME accounts)")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue