so-o2o: use alpine/kubectl; surface diagnostics on job timeout
CI test hit 'timed out waiting for the condition on jobs/caddy-cert-backup-manual'. Root cause: bitnami/kubectl runs as uid 1001 by default, but the hostPath mount target (/mnt/caddy-cert-backup on the kind node → /srv/kind/... on the host) is root-owned because kind creates bind-mounted dirs via the docker daemon. The pod couldn't write its output. Switch to alpine/kubectl:1.35.3 which runs as root by default and is smaller (faster pull in CI). Bump wait timeout to 120s as a cushion. Dump describe/pod-list/logs on timeout so future failures are debuggable from log output alone. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>pull/746/head
parent
c835ad7fdf
commit
52fab97e9b
|
|
@ -76,7 +76,7 @@ spec:
|
||||||
operator: Equal
|
operator: Equal
|
||||||
containers:
|
containers:
|
||||||
- name: backup
|
- name: backup
|
||||||
image: bitnami/kubectl:latest
|
image: alpine/kubectl:1.35.3
|
||||||
command:
|
command:
|
||||||
- sh
|
- sh
|
||||||
- -c
|
- -c
|
||||||
|
|
|
||||||
|
|
@ -322,17 +322,30 @@ kubectl label secret "$fake_cert_name" -n caddy-system manager=caddy
|
||||||
# Trigger the CronJob immediately (it fires every 5min on its own).
|
# Trigger the CronJob immediately (it fires every 5min on its own).
|
||||||
kubectl create job --from=cronjob/caddy-cert-backup \
|
kubectl create job --from=cronjob/caddy-cert-backup \
|
||||||
caddy-cert-backup-manual -n caddy-system
|
caddy-cert-backup-manual -n caddy-system
|
||||||
kubectl wait --for=condition=complete \
|
if ! kubectl wait --for=condition=complete \
|
||||||
job/caddy-cert-backup-manual -n caddy-system --timeout=60s
|
job/caddy-cert-backup-manual -n caddy-system --timeout=120s; then
|
||||||
|
echo "caddy cert backup job test: FAILED (job did not complete)"
|
||||||
|
echo "--- job description ---"
|
||||||
|
kubectl describe job/caddy-cert-backup-manual -n caddy-system || true
|
||||||
|
echo "--- pod list ---"
|
||||||
|
kubectl get pod -n caddy-system -l job-name=caddy-cert-backup-manual -o wide || true
|
||||||
|
echo "--- pod logs ---"
|
||||||
|
kubectl logs -n caddy-system -l job-name=caddy-cert-backup-manual --tail=200 || true
|
||||||
|
cleanup_and_exit
|
||||||
|
fi
|
||||||
|
|
||||||
# Backup file is root-owned (CronJob writes as root via kind bind mount).
|
# Backup file is root-owned (CronJob writes as root via kind bind mount).
|
||||||
|
# The secret's data.value is base64-encoded in YAML output, so assert on
|
||||||
|
# the secret name (which is plaintext in metadata). Value correctness is
|
||||||
|
# verified in the restore phase after a round-trip decode.
|
||||||
backup_file=$KIND_MOUNT_ROOT/caddy-cert-backup/caddy-secrets.yaml
|
backup_file=$KIND_MOUNT_ROOT/caddy-cert-backup/caddy-secrets.yaml
|
||||||
if ! sudo test -f "$backup_file"; then
|
if ! sudo test -f "$backup_file"; then
|
||||||
echo "caddy cert backup file test: FAILED (missing $backup_file)"
|
echo "caddy cert backup file test: FAILED (missing $backup_file)"
|
||||||
cleanup_and_exit
|
cleanup_and_exit
|
||||||
fi
|
fi
|
||||||
if ! sudo grep -q "$fake_cert_value" "$backup_file"; then
|
if ! sudo grep -q "$fake_cert_name" "$backup_file"; then
|
||||||
echo "caddy cert backup content test: FAILED (value not found in backup)"
|
echo "caddy cert backup content test: FAILED (seeded secret not in backup)"
|
||||||
|
sudo head -50 "$backup_file" || true
|
||||||
cleanup_and_exit
|
cleanup_and_exit
|
||||||
fi
|
fi
|
||||||
echo "caddy cert backup write test: passed"
|
echo "caddy cert backup write test: passed"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue