stack-orchestrator/tests/k8s-deploy/run-deploy-test.sh

429 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
if [ -n "$CERC_SCRIPT_DEBUG" ]; then
set -x
# Dump environment variables for debugging
echo "Environment variables:"
env
fi
# Helper functions: TODO move into a separate file
wait_for_pods_started () {
for i in {1..50}
do
local ps_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir ps )
if [[ "$ps_output" == *"Running containers:"* ]]; then
# if ready, return
return
else
# if not ready, wait
sleep 5
fi
done
# Timed out, error exit
echo "waiting for pods to start: FAILED"
cleanup_and_exit
}
wait_for_log_output () {
for i in {1..50}
do
local log_output=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ ! -z "$log_output" ]] && [[ "$log_output" != *"No logs available"* ]] && [[ "$log_output" != *"Pods not running"* ]]; then
# if ready, return
return
else
# if not ready, wait
sleep 5
fi
done
# Timed out, error exit
echo "waiting for pods log content: FAILED"
cleanup_and_exit
}
cleanup_and_exit () {
# Full teardown so CI runners don't leak namespaces/PVs between runs.
$TEST_TARGET_SO deployment --dir $test_deployment_dir \
stop --delete-volumes --delete-namespace --skip-cluster-management || true
exit 1
}
assert_ns_phase () {
local expected=$1
local phase
phase=$(kubectl get namespace ${deployment_ns} -o jsonpath='{.status.phase}' 2>/dev/null || echo "Missing")
if [ "$phase" != "$expected" ]; then
echo "namespace phase test: FAILED (expected ${expected}, got ${phase})"
cleanup_and_exit
fi
}
# Count labeled resources in the deployment namespace. down() is
# synchronous on its own cleanup (waits for PVCs/pods to terminate
# before returning) so callers can assert immediately.
# Usage: assert_no_labeled_resources <kind>
assert_no_labeled_resources () {
local kind=$1
local count
count=$(kubectl get ${kind} -n ${deployment_ns} \
-l app.kubernetes.io/stack=test --no-headers 2>/dev/null | wc -l)
if [ "$count" -ne 0 ]; then
echo "labeled cleanup test: FAILED (${kind} still present: ${count})"
cleanup_and_exit
fi
}
# Note: eventually this test should be folded into ../deploy/
# but keeping it separate for now for convenience
TEST_TARGET_SO=$( ls -t1 ./package/laconic-so* | head -1 )
# Set a non-default repo dir
export CERC_REPO_BASE_DIR=~/stack-orchestrator-test/repo-base-dir
# kind-mount-root is cluster-level infra, independent of any deployment dir.
# Previous runs' CronJob wrote files here as root via the Kind bind mount.
export KIND_MOUNT_ROOT=~/stack-orchestrator-test/kind-mount
echo "Testing this package: $TEST_TARGET_SO"
echo "Test version command"
reported_version_string=$( $TEST_TARGET_SO version )
echo "Version reported is: ${reported_version_string}"
echo "Cloning repositories into: $CERC_REPO_BASE_DIR"
rm -rf $CERC_REPO_BASE_DIR
mkdir -p $CERC_REPO_BASE_DIR
sudo rm -rf $KIND_MOUNT_ROOT
mkdir -p $KIND_MOUNT_ROOT
$TEST_TARGET_SO --stack test setup-repositories
$TEST_TARGET_SO --stack test build-containers
# Test basic stack-orchestrator deploy to k8s
test_deployment_dir=$CERC_REPO_BASE_DIR/test-deployment-dir
test_deployment_spec=$CERC_REPO_BASE_DIR/test-deployment-spec.yml
$TEST_TARGET_SO --stack test deploy --deploy-to k8s-kind init --output $test_deployment_spec --config CERC_TEST_PARAM_1=PASSED
# Check the file now exists
if [ ! -f "$test_deployment_spec" ]; then
echo "deploy init test: spec file not present"
echo "deploy init test: FAILED"
exit 1
fi
echo "deploy init test: passed"
# Switch to a full path for bind mount.
sed -i "s|^\(\s*test-data-bind:$\)$|\1 ${test_deployment_dir}/data/test-data-bind|" $test_deployment_spec
# Enable caddy cert backup by setting kind-mount-root.
echo "kind-mount-root: $KIND_MOUNT_ROOT" >> $test_deployment_spec
$TEST_TARGET_SO --stack test deploy create --spec-file $test_deployment_spec --deployment-dir $test_deployment_dir
# Check the deployment dir exists
if [ ! -d "$test_deployment_dir" ]; then
echo "deploy create test: deployment directory not present"
echo "deploy create test: FAILED"
exit 1
fi
echo "deploy create test: passed"
# Check the file writted by the create command in the stack now exists
if [ ! -f "$test_deployment_dir/create-file" ]; then
echo "deploy create test: create output file not present"
echo "deploy create test: FAILED"
exit 1
fi
# And has the right content
create_file_content=$(<$test_deployment_dir/create-file)
if [ ! "$create_file_content" == "create-command-output-data" ]; then
echo "deploy create test: create output file contents not correct"
echo "deploy create test: FAILED"
exit 1
fi
# Add a config file to be picked up by the ConfigMap before starting.
echo "dbfc7a4d-44a7-416d-b5f3-29842cc47650" > $test_deployment_dir/configmaps/test-config/test_config
# Add secrets to the deployment spec (references a pre-existing k8s Secret by name).
# deploy init already writes an empty 'secrets: {}' key, so we replace it
# rather than appending (ruamel.yaml rejects duplicate keys).
deployment_spec_file=${test_deployment_dir}/spec.yml
sed -i 's/^secrets: {}$/secrets:\n test-secret:\n - TEST_SECRET_KEY/' ${deployment_spec_file}
# Get the deployment ID and namespace for kubectl queries
# deployment-id is what flows into app_name → resource name prefix.
# Fall back to cluster-id for deployment.yml files written before the
# deployment-id field existed (pre-decouple compatibility).
deployment_id=$(awk '/^deployment-id:/ {print $2; exit}' ${test_deployment_dir}/deployment.yml)
if [ -z "$deployment_id" ]; then
deployment_id=$(awk '/^cluster-id:/ {print $2; exit}' ${test_deployment_dir}/deployment.yml)
fi
# Namespace is derived from stack name: laconic-{stack_name}
deployment_ns="laconic-test"
echo "deploy create output file test: passed"
# Try to start the deployment (--perform-cluster-management needed on first start
# because 'start' defaults to --skip-cluster-management)
$TEST_TARGET_SO deployment --dir $test_deployment_dir start --perform-cluster-management
wait_for_pods_started
# Caddy cert backup install: CronJob + RBAC should exist in caddy-system
for kind in serviceaccount role rolebinding cronjob; do
if ! kubectl get $kind caddy-cert-backup -n caddy-system >/dev/null 2>&1; then
echo "caddy-cert-backup $kind install test: FAILED"
cleanup_and_exit
fi
done
echo "caddy-cert-backup install test: passed"
# Host-path compose volumes (../config/test/script.sh, ../config/test/settings.env)
# should flow through auto-generated per-namespace ConfigMaps — no kind
# extraMount, no compose/spec rewriting. The pod mount lands via
# ConfigMap + subPath.
for cm_name in \
"${deployment_id}-host-path-config-test-script-sh" \
"${deployment_id}-host-path-config-test-settings-env"; do
if ! kubectl get configmap "$cm_name" -n "$deployment_ns" >/dev/null 2>&1; then
echo "host-path configmap test: ConfigMap $cm_name not found"
cleanup_and_exit
fi
done
echo "host-path configmap test: passed"
# Deployment dir should be untouched — compose file still has the
# original host-path volume entries and no synthetic configmap dirs.
if ! grep -q '\.\./config/test/script\.sh:/opt/run\.sh' \
"$test_deployment_dir/compose/docker-compose-test.yml"; then
echo "compose unchanged test: host-path volume entry missing"
cleanup_and_exit
fi
if [ -d "$test_deployment_dir/configmaps/host-path-config-test-script-sh" ]; then
echo "compose unchanged test: unexpected configmaps/host-path-* dir present"
cleanup_and_exit
fi
echo "compose unchanged test: passed"
# kind-config.yml should NOT contain /mnt/host-path-* extraMounts —
# they are replaced by the ConfigMap mechanism.
if grep -q 'containerPath: /mnt/host-path-' "$test_deployment_dir/kind-config.yml"; then
echo "no-host-path-extramount test: FAILED"
cleanup_and_exit
fi
echo "no-host-path-extramount test: passed"
# Check logs command works
wait_for_log_output
sleep 1
log_output_3=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_3" == *"filesystem is fresh"* ]]; then
echo "deployment logs test: passed"
else
echo "deployment logs test: FAILED"
echo "$log_output_3"
cleanup_and_exit
fi
# Check the config variable CERC_TEST_PARAM_1 was passed correctly
if [[ "$log_output_3" == *"Test-param-1: PASSED"* ]]; then
echo "deployment config test: passed"
else
echo "deployment config test: FAILED"
cleanup_and_exit
fi
# Check the config variable CERC_TEST_PARAM_2 was passed correctly from the compose file
if [[ "$log_output_3" == *"Test-param-2: CERC_TEST_PARAM_2_VALUE"* ]]; then
echo "deployment compose config test: passed"
else
echo "deployment compose config test: FAILED"
exit 1
fi
# Check that the ConfigMap is mounted and contains the expected content.
log_output_4=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_4" == *"/config/test_config:"* ]] && [[ "$log_output_4" == *"dbfc7a4d-44a7-416d-b5f3-29842cc47650"* ]]; then
echo "deployment ConfigMap test: passed"
else
echo "deployment ConfigMap test: FAILED"
cleanup_and_exit
fi
# Check that the bind-mount volume is mounted.
log_output_5=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_5" == *"/data: MOUNTED"* ]]; then
echo "deployment bind volumes test: passed"
else
echo "deployment bind volumes test: FAILED"
echo "$log_output_5"
cleanup_and_exit
fi
# Check that the provisioner managed volume is mounted.
log_output_6=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_6" == *"/data2: MOUNTED"* ]]; then
echo "deployment provisioner volumes test: passed"
else
echo "deployment provisioner volumes test: FAILED"
echo "$log_output_6"
cleanup_and_exit
fi
# --- New feature tests: namespace, labels, jobs, secrets ---
# Check that the pod is in the deployment-specific namespace (not default)
ns_pod_count=$(kubectl get pods -n ${deployment_ns} -l app=${deployment_id} --no-headers 2>/dev/null | wc -l)
if [ "$ns_pod_count" -gt 0 ]; then
echo "namespace isolation test: passed"
else
echo "namespace isolation test: FAILED"
echo "Expected pod in namespace ${deployment_ns}"
cleanup_and_exit
fi
# Check that the stack label is set on the pod
stack_label_count=$(kubectl get pods -n ${deployment_ns} -l app.kubernetes.io/stack=test --no-headers 2>/dev/null | wc -l)
if [ "$stack_label_count" -gt 0 ]; then
echo "stack label test: passed"
else
echo "stack label test: FAILED"
cleanup_and_exit
fi
# Check that the job completed successfully
for i in {1..30}; do
job_status=$(kubectl get job ${deployment_id}-job-test-job -n ${deployment_ns} -o jsonpath='{.status.succeeded}' 2>/dev/null || true)
if [ "$job_status" == "1" ]; then
break
fi
sleep 2
done
if [ "$job_status" == "1" ]; then
echo "job completion test: passed"
else
echo "job completion test: FAILED"
echo "Job status.succeeded: ${job_status}"
cleanup_and_exit
fi
# Check that the secrets spec results in an envFrom secretRef on the pod
secret_ref=$(kubectl get pod -n ${deployment_ns} -l app=${deployment_id} \
-o jsonpath='{.items[0].spec.containers[0].envFrom[?(@.secretRef.name=="test-secret")].secretRef.name}' 2>/dev/null || true)
if [ "$secret_ref" == "test-secret" ]; then
echo "secrets envFrom test: passed"
else
echo "secrets envFrom test: FAILED"
echo "Expected secretRef 'test-secret', got: ${secret_ref}"
cleanup_and_exit
fi
# Stop with --delete-volumes (but not --delete-namespace) and verify:
# - namespace stays Active (no termination race on restart)
# - stack-labeled workloads are gone
# - bind-mount data on the host survives; provisioner volumes are recreated
$TEST_TARGET_SO deployment --dir $test_deployment_dir stop --delete-volumes --skip-cluster-management
assert_ns_phase "Active"
echo "stop preserves namespace test: passed"
for kind in deployment job ingress service configmap secret pvc pod; do
assert_no_labeled_resources "$kind"
done
echo "stop cleans labeled resources test: passed"
# Restart — no wait needed, the namespace is still Active.
$TEST_TARGET_SO deployment --dir $test_deployment_dir start --skip-cluster-management
wait_for_pods_started
wait_for_log_output
sleep 1
log_output_10=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_10" == *"/data filesystem is old"* ]]; then
echo "Retain bind volumes test: passed"
else
echo "Retain bind volumes test: FAILED"
cleanup_and_exit
fi
# Provisioner volumes are destroyed when PVs are deleted (--delete-volumes on stop).
# Unlike bind-mount volumes whose data persists on the host, provisioner storage
# is gone, so the volume appears fresh after restart.
log_output_11=$( $TEST_TARGET_SO deployment --dir $test_deployment_dir logs )
if [[ "$log_output_11" == *"/data2 filesystem is fresh"* ]]; then
echo "Fresh provisioner volumes test: passed"
else
echo "Fresh provisioner volumes test: FAILED"
cleanup_and_exit
fi
# --- Caddy cert backup/restore E2E ---
# Seed a fake cert secret in caddy-system (simulates an LE-issued cert).
fake_cert_name="caddy.ingress--certificates.test-domain.test-domain.crt"
fake_cert_value="fake-cert-$(date +%s)"
kubectl create secret generic "$fake_cert_name" \
-n caddy-system \
--from-literal=value="$fake_cert_value"
kubectl label secret "$fake_cert_name" -n caddy-system manager=caddy
# Trigger the CronJob immediately (it fires every 5min on its own).
kubectl create job --from=cronjob/caddy-cert-backup \
caddy-cert-backup-manual -n caddy-system
if ! kubectl wait --for=condition=complete \
job/caddy-cert-backup-manual -n caddy-system --timeout=120s; then
echo "caddy cert backup job test: FAILED (job did not complete)"
echo "--- job description ---"
kubectl describe job/caddy-cert-backup-manual -n caddy-system || true
echo "--- pod list ---"
kubectl get pod -n caddy-system -l job-name=caddy-cert-backup-manual -o wide || true
echo "--- pod logs ---"
kubectl logs -n caddy-system -l job-name=caddy-cert-backup-manual --tail=200 || true
cleanup_and_exit
fi
# Backup file is root-owned (CronJob writes as root via kind bind mount).
# The secret's data.value is base64-encoded in YAML output, so assert on
# the secret name (which is plaintext in metadata). Value correctness is
# verified in the restore phase after a round-trip decode.
backup_file=$KIND_MOUNT_ROOT/caddy-cert-backup/caddy-secrets.yaml
if ! sudo test -f "$backup_file"; then
echo "caddy cert backup file test: FAILED (missing $backup_file)"
cleanup_and_exit
fi
if ! sudo grep -q "$fake_cert_name" "$backup_file"; then
echo "caddy cert backup content test: FAILED (seeded secret not in backup)"
sudo head -50 "$backup_file" || true
cleanup_and_exit
fi
echo "caddy cert backup write test: passed"
# Full teardown including Kind cluster — --perform-cluster-management on stop
# destroys the cluster, simulating the "recreate from scratch" scenario.
$TEST_TARGET_SO deployment --dir $test_deployment_dir \
stop --delete-volumes --delete-namespace --perform-cluster-management
# Recreate: new Kind cluster, Caddy install should restore from backup BEFORE
# the Caddy Deployment pod starts.
$TEST_TARGET_SO deployment --dir $test_deployment_dir start --perform-cluster-management
wait_for_pods_started
if ! kubectl get secret "$fake_cert_name" -n caddy-system >/dev/null 2>&1; then
echo "caddy cert restore test: FAILED (secret missing from new cluster)"
cleanup_and_exit
fi
restored_value=$(kubectl get secret "$fake_cert_name" -n caddy-system \
-o jsonpath='{.data.value}' | base64 -d)
if [ "$restored_value" != "$fake_cert_value" ]; then
echo "caddy cert restore test: FAILED (value mismatch: '$restored_value')"
cleanup_and_exit
fi
echo "caddy cert restore test: passed"
# Final teardown: --delete-namespace nukes the namespace, and
# --perform-cluster-management tears down the Kind cluster so the next test
# step in this CI workflow (e.g. run-restart-test.sh) starts from a clean
# host.
$TEST_TARGET_SO deployment --dir $test_deployment_dir \
stop --delete-volumes --delete-namespace --perform-cluster-management
if kind get clusters 2>/dev/null | grep -q .; then
echo "cluster teardown test: FAILED (kind cluster still present)"
exit 1
fi
echo "cluster teardown test: passed"
echo "Test passed"