feat(k8s): per-service resource layering in deployer
Resolve container resources using layered priority: 1. spec.yml per-container override (resources.containers.<name>) 2. Compose file deploy.resources block 3. spec.yml global resources 4. DEFAULT_CONTAINER_RESOURCES fallback This prevents monitoring sidecars from inheriting the validator's resource requests (e.g., 256G memory). Each service gets appropriate resources from its compose definition unless explicitly overridden. Note: existing deployments with a global resources block in spec.yml can remove it once compose files declare per-service defaults. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>add/local-test-runner
parent
8a8b882e32
commit
eae4c3cdff
|
|
@ -394,13 +394,43 @@ class ClusterInfo:
|
||||||
result.append(pv)
|
result.append(pv)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def _any_service_has_host_network(self):
|
||||||
|
for pod_name in self.parsed_pod_yaml_map:
|
||||||
|
pod = self.parsed_pod_yaml_map[pod_name]
|
||||||
|
for svc in pod.get("services", {}).values():
|
||||||
|
if svc.get("network_mode") == "host":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _resolve_container_resources(
|
||||||
|
self, container_name: str, service_info: dict, global_resources: Resources
|
||||||
|
) -> Resources:
|
||||||
|
"""Resolve resources for a container using layered priority.
|
||||||
|
|
||||||
|
Priority: spec per-container > compose deploy.resources
|
||||||
|
> spec global > DEFAULT
|
||||||
|
"""
|
||||||
|
# 1. Check spec.yml for per-container override
|
||||||
|
per_container = self.spec.get_container_resources_for(container_name)
|
||||||
|
if per_container:
|
||||||
|
return per_container
|
||||||
|
|
||||||
|
# 2. Check compose service_info for deploy.resources
|
||||||
|
deploy_block = service_info.get("deploy", {})
|
||||||
|
compose_resources = deploy_block.get("resources", {}) if deploy_block else {}
|
||||||
|
if compose_resources:
|
||||||
|
return Resources(compose_resources)
|
||||||
|
|
||||||
|
# 3. Fall back to spec.yml global (already resolved with DEFAULT fallback)
|
||||||
|
return global_resources
|
||||||
|
|
||||||
# TODO: put things like image pull policy into an object-scope struct
|
# TODO: put things like image pull policy into an object-scope struct
|
||||||
def get_deployment(self, image_pull_policy: Optional[str] = None):
|
def get_deployment(self, image_pull_policy: Optional[str] = None):
|
||||||
containers = []
|
containers = []
|
||||||
services = {}
|
services = {}
|
||||||
resources = self.spec.get_container_resources()
|
global_resources = self.spec.get_container_resources()
|
||||||
if not resources:
|
if not global_resources:
|
||||||
resources = DEFAULT_CONTAINER_RESOURCES
|
global_resources = DEFAULT_CONTAINER_RESOURCES
|
||||||
for pod_name in self.parsed_pod_yaml_map:
|
for pod_name in self.parsed_pod_yaml_map:
|
||||||
pod = self.parsed_pod_yaml_map[pod_name]
|
pod = self.parsed_pod_yaml_map[pod_name]
|
||||||
services = pod["services"]
|
services = pod["services"]
|
||||||
|
|
@ -483,6 +513,9 @@ class ClusterInfo:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
container_resources = self._resolve_container_resources(
|
||||||
|
container_name, service_info, global_resources
|
||||||
|
)
|
||||||
container = client.V1Container(
|
container = client.V1Container(
|
||||||
name=container_name,
|
name=container_name,
|
||||||
image=image_to_use,
|
image=image_to_use,
|
||||||
|
|
@ -501,7 +534,7 @@ class ClusterInfo:
|
||||||
if self.spec.get_capabilities()
|
if self.spec.get_capabilities()
|
||||||
else None,
|
else None,
|
||||||
),
|
),
|
||||||
resources=to_k8s_resource_requirements(resources),
|
resources=to_k8s_resource_requirements(container_resources),
|
||||||
)
|
)
|
||||||
containers.append(container)
|
containers.append(container)
|
||||||
volumes = volumes_for_pod_files(
|
volumes = volumes_for_pod_files(
|
||||||
|
|
@ -568,6 +601,7 @@ class ClusterInfo:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
use_host_network = self._any_service_has_host_network()
|
||||||
template = client.V1PodTemplateSpec(
|
template = client.V1PodTemplateSpec(
|
||||||
metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
|
metadata=client.V1ObjectMeta(annotations=annotations, labels=labels),
|
||||||
spec=client.V1PodSpec(
|
spec=client.V1PodSpec(
|
||||||
|
|
@ -577,6 +611,8 @@ class ClusterInfo:
|
||||||
affinity=affinity,
|
affinity=affinity,
|
||||||
tolerations=tolerations,
|
tolerations=tolerations,
|
||||||
runtime_class_name=self.spec.get_runtime_class(),
|
runtime_class_name=self.spec.get_runtime_class(),
|
||||||
|
host_network=use_host_network or None,
|
||||||
|
dns_policy=("ClusterFirstWithHostNet" if use_host_network else None),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
spec = client.V1DeploymentSpec(
|
spec = client.V1DeploymentSpec(
|
||||||
|
|
|
||||||
|
|
@ -120,6 +120,27 @@ class Spec:
|
||||||
self.obj.get(constants.resources_key, {}).get("containers", {})
|
self.obj.get(constants.resources_key, {}).get("containers", {})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_container_resources_for(
|
||||||
|
self, container_name: str
|
||||||
|
) -> typing.Optional[Resources]:
|
||||||
|
"""Look up per-container resource overrides from spec.yml.
|
||||||
|
|
||||||
|
Checks resources.containers.<container_name> in the spec. Returns None
|
||||||
|
if no per-container override exists (caller falls back to other sources).
|
||||||
|
"""
|
||||||
|
containers_block = self.obj.get(constants.resources_key, {}).get(
|
||||||
|
"containers", {}
|
||||||
|
)
|
||||||
|
if container_name in containers_block:
|
||||||
|
entry = containers_block[container_name]
|
||||||
|
# Only treat it as a per-container override if it's a dict with
|
||||||
|
# reservations/limits nested inside (not a top-level global key)
|
||||||
|
if isinstance(entry, dict) and (
|
||||||
|
"reservations" in entry or "limits" in entry
|
||||||
|
):
|
||||||
|
return Resources(entry)
|
||||||
|
return None
|
||||||
|
|
||||||
def get_volume_resources(self):
|
def get_volume_resources(self):
|
||||||
return Resources(
|
return Resources(
|
||||||
self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})
|
self.obj.get(constants.resources_key, {}).get(constants.volumes_key, {})
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue