From 86462c940f73feb3b32076bc5b63fe17cdbec0b1 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Thu, 22 Jan 2026 02:12:11 -0500 Subject: [PATCH] Fix high-memlock spec to include complete OCI runtime config The base_runtime_spec for containerd requires a complete OCI spec, not just the rlimits section. The minimal spec was causing runc to fail with "open /proc/self/fd: no such file or directory" because essential mounts and namespaces were missing. This commit uses kind's default cri-base.json as the base and adds the rlimits configuration on top. The spec includes all necessary mounts, namespaces, capabilities, and kind-specific hooks. Co-Authored-By: Claude Opus 4.5 --- stack_orchestrator/deploy/k8s/helpers.py | 144 ++++++++++++++++++++++- 1 file changed, 142 insertions(+), 2 deletions(-) diff --git a/stack_orchestrator/deploy/k8s/helpers.py b/stack_orchestrator/deploy/k8s/helpers.py index 99876140..ef1fb922 100644 --- a/stack_orchestrator/deploy/k8s/helpers.py +++ b/stack_orchestrator/deploy/k8s/helpers.py @@ -336,19 +336,159 @@ def generate_high_memlock_spec_json(): The IPC_LOCK capability alone doesn't raise the RLIMIT_MEMLOCK limit - it only allows mlock() calls. We need to set the rlimit in the OCI runtime spec. + + IMPORTANT: This must be a complete OCI runtime spec, not just the rlimits + section. The spec is based on kind's default cri-base.json with rlimits added. """ import json # Use maximum 64-bit signed integer value for unlimited max_rlimit = 9223372036854775807 + # Based on kind's /etc/containerd/cri-base.json with rlimits added spec = { - "ociVersion": "1.0.2-dev", + "ociVersion": "1.1.0-rc.1", "process": { + "user": {"uid": 0, "gid": 0}, + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + ], + "effective": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + ], + "permitted": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + ], + }, "rlimits": [ {"type": "RLIMIT_MEMLOCK", "hard": max_rlimit, "soft": max_rlimit}, {"type": "RLIMIT_NOFILE", "hard": 1048576, "soft": 1048576}, - ] + ], + "noNewPrivileges": True, }, + "root": {"path": "rootfs"}, + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc", + "options": ["nosuid", "noexec", "nodev"], + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": ["nosuid", "strictatime", "mode=755", "size=65536k"], + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5", + ], + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": ["nosuid", "noexec", "nodev", "mode=1777", "size=65536k"], + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": ["nosuid", "noexec", "nodev"], + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": ["nosuid", "noexec", "nodev", "ro"], + }, + { + "destination": "/run", + "type": "tmpfs", + "source": "tmpfs", + "options": ["nosuid", "strictatime", "mode=755", "size=65536k"], + }, + ], + "linux": { + "resources": {"devices": [{"allow": False, "access": "rwm"}]}, + "cgroupsPath": "/default", + "namespaces": [ + {"type": "pid"}, + {"type": "ipc"}, + {"type": "uts"}, + {"type": "mount"}, + {"type": "network"}, + ], + "maskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi", + ], + "readonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + ], + }, + "hooks": {"createContainer": [{"path": "/kind/bin/mount-product-files.sh"}]}, } return json.dumps(spec, indent=2)