Add RuntimeClass support for unlimited RLIMIT_MEMLOCK

The previous approach of mounting cri-base.json into kind nodes failed
because we didn't tell containerd to use it via containerdConfigPatches.

RuntimeClass allows different stacks to have different rlimit profiles,
which is essential since kind only supports one cluster per host and
multiple stacks share the same cluster.

Changes:
- Add containerdConfigPatches to kind-config.yml to define runtime handlers
- Create RuntimeClass resources after cluster creation
- Add runtimeClassName to pod specs based on stack's security settings
- Rename cri-base.json to high-memlock-spec.json for clarity
- Add get_runtime_class() method to Spec that auto-derives from
  unlimited-memlock setting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
helm-charts-with-caddy
A. F. Dudley 2026-01-22 01:58:38 -05:00
parent dd856af2d3
commit 87db167d7f
5 changed files with 134 additions and 25 deletions

View File

@ -41,3 +41,6 @@ kind_config_filename = "kind-config.yml"
kube_config_filename = "kubeconfig.yml" kube_config_filename = "kubeconfig.yml"
cri_base_filename = "cri-base.json" cri_base_filename = "cri-base.json"
unlimited_memlock_key = "unlimited-memlock" unlimited_memlock_key = "unlimited-memlock"
runtime_class_key = "runtime-class"
high_memlock_runtime = "high-memlock"
high_memlock_spec_filename = "high-memlock-spec.json"

View File

@ -531,6 +531,7 @@ class ClusterInfo:
volumes=volumes, volumes=volumes,
affinity=affinity, affinity=affinity,
tolerations=tolerations, tolerations=tolerations,
runtime_class_name=self.spec.get_runtime_class(),
), ),
) )
spec = client.V1DeploymentSpec( spec = client.V1DeploymentSpec(

View File

@ -37,7 +37,7 @@ from stack_orchestrator.deploy.k8s.helpers import (
) )
from stack_orchestrator.deploy.k8s.helpers import ( from stack_orchestrator.deploy.k8s.helpers import (
generate_kind_config, generate_kind_config,
generate_cri_base_json, generate_high_memlock_spec_json,
) )
from stack_orchestrator.deploy.k8s.cluster_info import ClusterInfo from stack_orchestrator.deploy.k8s.cluster_info import ClusterInfo
from stack_orchestrator.opts import opts from stack_orchestrator.opts import opts
@ -59,6 +59,36 @@ def _check_delete_exception(e: ApiException) -> None:
error_exit(f"k8s api error: {e}") error_exit(f"k8s api error: {e}")
def _create_runtime_class(name: str, handler: str):
"""Create a RuntimeClass resource for custom containerd runtime handlers.
RuntimeClass allows pods to specify which runtime handler to use, enabling
different pods to have different rlimit profiles (e.g., high-memlock).
Args:
name: The name of the RuntimeClass resource
handler: The containerd runtime handler name
(must match containerdConfigPatches)
"""
api = client.NodeV1Api()
runtime_class = client.V1RuntimeClass(
api_version="node.k8s.io/v1",
kind="RuntimeClass",
metadata=client.V1ObjectMeta(name=name),
handler=handler,
)
try:
api.create_runtime_class(runtime_class)
if opts.o.debug:
print(f"Created RuntimeClass: {name}")
except ApiException as e:
if e.status == 409: # Already exists
if opts.o.debug:
print(f"RuntimeClass {name} already exists")
else:
raise
class K8sDeployer(Deployer): class K8sDeployer(Deployer):
name: str = "k8s" name: str = "k8s"
type: str type: str
@ -275,6 +305,12 @@ class K8sDeployer(Deployer):
# Wait for ingress to start # Wait for ingress to start
# (deployment provisioning will fail unless this is done) # (deployment provisioning will fail unless this is done)
wait_for_ingress_in_kind() wait_for_ingress_in_kind()
# Create RuntimeClass if unlimited_memlock is enabled
if self.cluster_info.spec.get_unlimited_memlock():
_create_runtime_class(
constants.high_memlock_runtime,
constants.high_memlock_runtime,
)
else: else:
print("Dry run mode enabled, skipping k8s API connect") print("Dry run mode enabled, skipping k8s API connect")
@ -669,17 +705,19 @@ class K8sDeployerConfigGenerator(DeployerConfigGenerator):
def generate(self, deployment_dir: Path): def generate(self, deployment_dir: Path):
# No need to do this for the remote k8s case # No need to do this for the remote k8s case
if self.type == "k8s-kind": if self.type == "k8s-kind":
# Generate cri-base.json if unlimited_memlock is enabled. # Generate high-memlock-spec.json if unlimited_memlock is enabled.
# Must be done before generate_kind_config() which references it. # Must be done before generate_kind_config() which references it.
if self.deployment_context.spec.get_unlimited_memlock(): if self.deployment_context.spec.get_unlimited_memlock():
cri_base_content = generate_cri_base_json() spec_content = generate_high_memlock_spec_json()
cri_base_file = deployment_dir.joinpath(constants.cri_base_filename) spec_file = deployment_dir.joinpath(
constants.high_memlock_spec_filename
)
if opts.o.debug: if opts.o.debug:
print( print(
f"Creating cri-base.json for unlimited memlock: {cri_base_file}" f"Creating high-memlock spec for unlimited memlock: {spec_file}"
) )
with open(cri_base_file, "w") as output_file: with open(spec_file, "w") as output_file:
output_file.write(cri_base_content) output_file.write(spec_content)
# Check the file isn't already there # Check the file isn't already there
# Get the config file contents # Get the config file contents

View File

@ -317,17 +317,19 @@ def _generate_kind_port_mappings(parsed_pod_files):
) )
def _generate_cri_base_mount(deployment_dir: Path): def _generate_high_memlock_spec_mount(deployment_dir: Path):
"""Generate the extraMount entry for cri-base.json to set RLIMIT_MEMLOCK.""" """Generate the extraMount entry for high-memlock-spec.json.
cri_base_path = deployment_dir.joinpath(constants.cri_base_filename).resolve()
return ( The spec file must be mounted at the same path inside the kind node
f" - hostPath: {cri_base_path}\n" as it appears on the host, because containerd's base_runtime_spec
f" containerPath: /etc/containerd/cri-base.json\n" references an absolute path.
) """
spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve()
return f" - hostPath: {spec_path}\n" f" containerPath: {spec_path}\n"
def generate_cri_base_json(): def generate_high_memlock_spec_json():
"""Generate cri-base.json content with unlimited RLIMIT_MEMLOCK. """Generate OCI spec JSON with unlimited RLIMIT_MEMLOCK.
This is needed for workloads like Solana validators that require large This is needed for workloads like Solana validators that require large
amounts of locked memory for memory-mapped files during snapshot decompression. amounts of locked memory for memory-mapped files during snapshot decompression.
@ -339,7 +341,7 @@ def generate_cri_base_json():
# Use maximum 64-bit signed integer value for unlimited # Use maximum 64-bit signed integer value for unlimited
max_rlimit = 9223372036854775807 max_rlimit = 9223372036854775807
cri_base = { spec = {
"ociVersion": "1.0.2-dev", "ociVersion": "1.0.2-dev",
"process": { "process": {
"rlimits": [ "rlimits": [
@ -348,7 +350,36 @@ def generate_cri_base_json():
] ]
}, },
} }
return json.dumps(cri_base, indent=2) return json.dumps(spec, indent=2)
# Keep old name as alias for backward compatibility
def generate_cri_base_json():
"""Deprecated: Use generate_high_memlock_spec_json() instead."""
return generate_high_memlock_spec_json()
def _generate_containerd_config_patches(
deployment_dir: Path, has_high_memlock: bool
) -> str:
"""Generate containerdConfigPatches YAML for custom runtime handlers.
This configures containerd to have a runtime handler named 'high-memlock'
that uses a custom OCI base spec with unlimited RLIMIT_MEMLOCK.
"""
if not has_high_memlock:
return ""
spec_path = deployment_dir.joinpath(constants.high_memlock_spec_filename).resolve()
runtime_name = constants.high_memlock_runtime
plugin_path = 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes'
return (
"containerdConfigPatches:\n"
" - |-\n"
f" [{plugin_path}.{runtime_name}]\n"
' runtime_type = "io.containerd.runc.v2"\n'
f' base_runtime_spec = "{spec_path}"\n'
)
# Note: this makes any duplicate definition in b overwrite a # Note: this makes any duplicate definition in b overwrite a
@ -430,19 +461,30 @@ def generate_kind_config(deployment_dir: Path, deployment_context):
parsed_pod_files_map, deployment_dir, deployment_context parsed_pod_files_map, deployment_dir, deployment_context
) )
# Check if unlimited_memlock is enabled and add cri-base.json mount # Check if unlimited_memlock is enabled
unlimited_memlock = deployment_context.spec.get_unlimited_memlock() unlimited_memlock = deployment_context.spec.get_unlimited_memlock()
# Generate containerdConfigPatches for RuntimeClass support
containerd_patches_yml = _generate_containerd_config_patches(
deployment_dir, unlimited_memlock
)
# Add high-memlock spec file mount if needed
if unlimited_memlock: if unlimited_memlock:
cri_base_mount = _generate_cri_base_mount(deployment_dir) spec_mount = _generate_high_memlock_spec_mount(deployment_dir)
if mounts_yml: if mounts_yml:
# Append to existing mounts # Append to existing mounts
mounts_yml = mounts_yml.rstrip() + "\n" + cri_base_mount mounts_yml = mounts_yml.rstrip() + "\n" + spec_mount
else: else:
mounts_yml = f" extraMounts:\n{cri_base_mount}" mounts_yml = f" extraMounts:\n{spec_mount}"
return ( # Build the config - containerdConfigPatches must be at cluster level (before nodes)
"kind: Cluster\n" config = "kind: Cluster\n" "apiVersion: kind.x-k8s.io/v1alpha4\n"
"apiVersion: kind.x-k8s.io/v1alpha4\n"
if containerd_patches_yml:
config += containerd_patches_yml
config += (
"nodes:\n" "nodes:\n"
"- role: control-plane\n" "- role: control-plane\n"
" kubeadmConfigPatches:\n" " kubeadmConfigPatches:\n"
@ -454,3 +496,5 @@ def generate_kind_config(deployment_dir: Path, deployment_context):
f"{port_mappings_yml}\n" f"{port_mappings_yml}\n"
f"{mounts_yml}\n" f"{mounts_yml}\n"
) )
return config

View File

@ -153,6 +153,29 @@ class Spec:
).lower() ).lower()
) )
def get_runtime_class(self):
"""Get runtime class name from spec, or derive from security settings.
The runtime class determines which containerd runtime handler to use,
allowing different pods to have different rlimit profiles (e.g., for
unlimited RLIMIT_MEMLOCK).
Returns:
Runtime class name string, or None to use default runtime.
"""
# Explicit runtime class takes precedence
explicit = self.obj.get(constants.security_key, {}).get(
constants.runtime_class_key, None
)
if explicit:
return explicit
# Auto-derive from unlimited-memlock setting
if self.get_unlimited_memlock():
return constants.high_memlock_runtime
return None # Use default runtime
def get_deployment_type(self): def get_deployment_type(self):
return self.obj.get(constants.deploy_to_key) return self.obj.get(constants.deploy_to_key)