Spaces:

SandyTheAdventurer
/

openenv

No application file

App Files Files Community

MrShadowBlade commited on 10 days ago

Commit

57c06cb

1 Parent(s): c8daa82

Implement Kubernetes action classes and execution logic

Browse files

Files changed (18) hide show

server/actions/__init__.py +58 -0
server/actions/delete_pod_action.py +7 -0
server/actions/describe_action.py +8 -0
server/actions/drain_action.py +7 -0
server/actions/hpa_action.py +17 -0
server/actions/patch_action.py +9 -0
server/actions/rollout_action.py +7 -0
server/actions/scale_action.py +8 -0
server/conditions/__init__.py +0 -0
server/executor.py +115 -0
server/graders/__init__.py +0 -0
server/models.py +84 -0
server/tasks/__init__.py +0 -0
server/validator.py +168 -0
server/worker.py +84 -0
tests/__init__.py +0 -0
tests/test_actions.py +211 -0
tests/test_executor.py +174 -0

server/actions/__init__.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from .scale_action import ScaleAction
+from .patch_action import PatchAction
+from .delete_pod_action import DeletePodAction
+from .rollout_action import RolloutRestartAction
+from .hpa_action import SetHPAAction
+from .drain_action import DrainNodeAction
+from .describe_action import DescribeAction
+from typing import Union, Any, Dict, Literal
+KubeAction = Union[
+    ScaleAction,
+    PatchAction,
+    DeletePodAction,
+    RolloutRestartAction,
+    SetHPAAction,
+    DrainNodeAction,
+    DescribeAction
+]
+ActionType = Literal["scale", "patch", "delete_pod", "rollout_restart", "set_hpa", "drain_node", "describe"]
+def parse_action(data: Dict[str, Any]) -> KubeAction:
+    if not isinstance(data, dict):
+        raise ValueError(f"Expected dict, got {type(data)}")
+    action_type = data.get("action_type")
+    if not action_type:
+        raise ValueError("Missing 'action_type' field")
+    action_map = {
+        "scale": ScaleAction,
+        "patch": PatchAction,
+        "delete_pod": DeletePodAction,
+        "rollout_restart": RolloutRestartAction,
+        "set_hpa": SetHPAAction,
+        "drain_node": DrainNodeAction,
+        "describe": DescribeAction,
+    }
+    action_class = action_map.get(action_type)
+    if not action_class:
+        raise ValueError(f"Unknown action_type: {action_type}")
+    return action_class(**data)
+__all__ = [
+    "ScaleAction",
+    "PatchAction",
+    "DeletePodAction",
+    "RolloutRestartAction",
+    "SetHPAAction",
+    "DrainNodeAction",
+    "DescribeAction",
+    "KubeAction",
+    "parse_action",
+]

server/actions/delete_pod_action.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pydantic import BaseModel, Field
+from typing import Literal
+class DeletePodAction(BaseModel):
+    action_type: Literal["delete_pod"] = "delete_pod"
+    pod_name: str = Field(..., description="Exact name of the pod to delete")

server/actions/describe_action.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic import BaseModel, Field
+from typing import Literal
+class DescribeAction(BaseModel):
+    action_type: Literal["describe"] = "describe"
+    resource_type: Literal["deployment", "pod", "node", "service", "configmap"] = Field(..., description="Resource type to inspect")
+    name: str = Field(..., description="Resource name to inspect")

server/actions/drain_action.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pydantic import BaseModel, Field
+from typing import Literal
+class DrainNodeAction(BaseModel):
+    action_type: Literal["drain_node"] = "drain_node"
+    node_name: str = Field(..., description="Node to cordon and drain")

server/actions/hpa_action.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic import BaseModel, Field, field_validator
+from typing import Literal
+class SetHPAAction(BaseModel):
+    action_type: Literal["set_hpa"] = "set_hpa"
+    deployment: str = Field(..., description="Target deployment name")
+    min_replicas: int = Field(..., ge=1, le=20, description="Minimum replicas")
+    max_replicas: int = Field(..., ge=1, le=20, description="Maximum replicas")
+    cpu_target_percent: int = Field(..., ge=10, le=90, description="Target CPU percentage")
+    @field_validator("max_replicas")
+    @classmethod
+    def max_must_be_gte_min(cls, v, info):
+        if "min_replicas" in info.data and v < info.data["min_replicas"]:
+            raise ValueError("max_replicas must be >= min_replicas")
+        return v

server/actions/patch_action.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pydantic import BaseModel, Field
+from typing import Literal, Dict, Any
+class PatchAction(BaseModel):
+    action_type: Literal["patch"] = "patch"
+    resource_type: Literal["deployment", "configmap", "service"] = Field(..., description="One of: deployment, configmap, service")
+    name: str = Field(..., description="Resource name")
+    patch: Dict[str, Any] = Field(..., description="Fields to update (partial patch)")

server/actions/rollout_action.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pydantic import BaseModel, Field
+from typing import Literal
+class RolloutRestartAction(BaseModel):
+    action_type: Literal["rollout_restart"] = "rollout_restart"
+    deployment: str = Field(..., description="Deployment to restart all pods for")

server/actions/scale_action.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic import BaseModel, Field
+from typing import Literal
+class ScaleAction(BaseModel):
+    action_type: Literal["scale"] = "scale"
+    deployment: str = Field(..., description="Name of the deployment to scale")
+    replicas: int = Field(..., ge=1, le=20, description="Target replica count (1-20)")

server/conditions/__init__.py ADDED Viewed

File without changes

server/executor.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from pydantic import BaseModel
+from typing import Any, Dict, Optional
+from server.actions import (
+    KubeAction,
+    ScaleAction,
+    DeletePodAction,
+    PatchAction,
+    RolloutRestartAction,
+    SetHPAAction,
+    DrainNodeAction,
+    DescribeAction,
+)
+from server.models import ClusterObservation
+class ExecutionResult(BaseModel):
+    observation: ClusterObservation
+    action_applied: str
+    tick_advanced: bool
+    describe_detail: Optional[Dict[str, Any]] = None
+def execute(action: KubeAction, world) -> ExecutionResult:
+    if isinstance(action, ScaleAction):
+        return _execute_scale(action, world)
+    elif isinstance(action, DeletePodAction):
+        return _execute_delete_pod(action, world)
+    elif isinstance(action, PatchAction):
+        return _execute_patch(action, world)
+    elif isinstance(action, RolloutRestartAction):
+        return _execute_rollout_restart(action, world)
+    elif isinstance(action, SetHPAAction):
+        return _execute_set_hpa(action, world)
+    elif isinstance(action, DrainNodeAction):
+        return _execute_drain_node(action, world)
+    elif isinstance(action, DescribeAction):
+        return _execute_describe(action, world)
+    else:
+        raise ValueError(f"Unknown action type: {type(action)}")
+def _execute_scale(action: ScaleAction, world) -> ExecutionResult:
+    world.scale(action.deployment, action.replicas)
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Scaled '{action.deployment}' to {action.replicas} replicas",
+        tick_advanced=True
+    )
+def _execute_delete_pod(action: DeletePodAction, world) -> ExecutionResult:
+    world.delete_pod(action.pod_name)
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Deleted pod '{action.pod_name}'",
+        tick_advanced=True
+    )
+def _execute_patch(action: PatchAction, world) -> ExecutionResult:
+    world.apply_patch(action.resource_type, action.name, action.patch)
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Patched {action.resource_type} '{action.name}'",
+        tick_advanced=True
+    )
+def _execute_rollout_restart(action: RolloutRestartAction, world) -> ExecutionResult:
+    world.rollout_restart(action.deployment)
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Rollout restarted '{action.deployment}'",
+        tick_advanced=True
+    )
+def _execute_set_hpa(action: SetHPAAction, world) -> ExecutionResult:
+    world.set_hpa(
+        action.deployment,
+        action.min_replicas,
+        action.max_replicas,
+        action.cpu_target_percent
+    )
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Set HPA for '{action.deployment}': {action.min_replicas}-{action.max_replicas} replicas, {action.cpu_target_percent}% CPU",
+        tick_advanced=True
+    )
+def _execute_drain_node(action: DrainNodeAction, world) -> ExecutionResult:
+    world.drain_node(action.node_name)
+    world.tick()
+    return ExecutionResult(
+        observation=world.get_observation(),
+        action_applied=f"Drained node '{action.node_name}'",
+        tick_advanced=True
+    )
+def _execute_describe(action: DescribeAction, world) -> ExecutionResult:
+    detail = world.describe(action.resource_type, action.name)
+    obs = world.get_observation()
+    return ExecutionResult(
+        observation=obs,
+        action_applied=f"Described {action.resource_type} '{action.name}'",
+        tick_advanced=False,
+        describe_detail=detail
+    )

server/graders/__init__.py ADDED Viewed

File without changes

server/models.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import Optional, List, Dict, Any, Literal
+from pydantic import BaseModel, Field
+class PodStatus(BaseModel):
+    name: str
+    namespace: str = "default"
+    status: Literal["Running", "Pending", "CrashLoopBackOff", "OOMKilled", "Terminating", "Unknown"]
+    node: Optional[str] = None
+    restarts: int = 0
+    cpu_usage: float = 0.0
+    mem_usage: float = 0.0
+    container_image: str = "nginx:1.21"
+    env_vars: Dict[str, str] = Field(default_factory=dict)
+    resources: Dict[str, Dict[str, str]] = Field(default_factory=lambda: {"limits": {}, "requests": {}})
+class NodeStatus(BaseModel):
+    name: str
+    status: Literal["Ready", "NotReady", "SchedulingDisabled"] = "Ready"
+    cpu_capacity: float = 4.0
+    mem_capacity: float = 8192.0
+    cpu_usage: float = 0.0
+    mem_usage: float = 0.0
+    pods: List[str] = Field(default_factory=list)
+class DeploymentStatus(BaseModel):
+    name: str
+    namespace: str = "default"
+    desired_replicas: int = 1
+    available_replicas: int = 1
+    image: str = "nginx:1.21"
+    env_vars: List[Dict[str, str]] = Field(default_factory=list)
+    resources: Dict[str, Dict[str, str]] = Field(default_factory=lambda: {"limits": {}, "requests": {}})
+    hpa: Optional[Dict[str, Any]] = None
+class ServiceStatus(BaseModel):
+    name: str
+    namespace: str = "default"
+    service_type: str = "ClusterIP"
+    selector: Dict[str, str] = Field(default_factory=dict)
+    ports: List[Dict[str, Any]] = Field(default_factory=lambda: [{"port": 80, "targetPort": 80}])
+    external_ip: Optional[str] = None
+    error_rate: float = 0.0
+    latency_p95: float = 0.0
+class ConfigMapStatus(BaseModel):
+    name: str
+    namespace: str = "default"
+    data: Dict[str, str] = Field(default_factory=dict)
+class HPAStatus(BaseModel):
+    name: str
+    namespace: str = "default"
+    target_deployment: str
+    min_replicas: int = 1
+    max_replicas: int = 10
+    cpu_target_percent: int = 80
+    current_replicas: int = 1
+class ClusterEvent(BaseModel):
+    message: str
+    reason: str
+    type: Literal["Normal", "Warning"] = "Normal"
+    involved_object: str = ""
+    first_timestamp: Optional[str] = None
+    count: int = 1
+class ClusterObservation(BaseModel):
+    nodes: List[NodeStatus] = Field(default_factory=list)
+    pods: List[PodStatus] = Field(default_factory=list)
+    deployments: List[DeploymentStatus] = Field(default_factory=list)
+    services: List[ServiceStatus] = Field(default_factory=list)
+    configmaps: List[ConfigMapStatus] = Field(default_factory=list)
+    hpa: List[HPAStatus] = Field(default_factory=list)
+    events: List[ClusterEvent] = Field(default_factory=list)
+    step: int = 0
+    objective: str = ""

server/tasks/__init__.py ADDED Viewed

File without changes

server/validator.py ADDED Viewed

	@@ -0,0 +1,168 @@

+from typing import Optional, Dict, Any, List
+from server.actions import (
+    KubeAction,
+    ScaleAction,
+    DeletePodAction,
+    PatchAction,
+    RolloutRestartAction,
+    SetHPAAction,
+    DrainNodeAction,
+    DescribeAction,
+)
+def validate(action: KubeAction, world_state: Dict[str, Any]) -> Optional[str]:
+    if isinstance(action, ScaleAction):
+        return _validate_scale(action, world_state)
+    elif isinstance(action, DeletePodAction):
+        return _validate_delete_pod(action, world_state)
+    elif isinstance(action, PatchAction):
+        return _validate_patch(action, world_state)
+    elif isinstance(action, RolloutRestartAction):
+        return _validate_rollout_restart(action, world_state)
+    elif isinstance(action, SetHPAAction):
+        return _validate_set_hpa(action, world_state)
+    elif isinstance(action, DrainNodeAction):
+        return _validate_drain_node(action, world_state)
+    elif isinstance(action, DescribeAction):
+        return _validate_describe(action, world_state)
+    return None
+def _validate_scale(action: ScaleAction, world_state: Dict[str, Any]) -> Optional[str]:
+    deployments = world_state.get("deployments", [])
+    deployment_names = [d.get("name") for d in deployments]
+    if action.deployment not in deployment_names:
+        return f"Deployment '{action.deployment}' not found. Available: {deployment_names}"
+    if action.replicas < 1 or action.replicas > 20:
+        return f"Replica count must be between 1 and 20, got {action.replicas}"
+    return None
+def _validate_delete_pod(action: DeletePodAction, world_state: Dict[str, Any]) -> Optional[str]:
+    pods = world_state.get("pods", [])
+    pod_names = [p.get("name") for p in pods]
+    if action.pod_name not in pod_names:
+        return f"Pod '{action.pod_name}' not found in cluster. Available: {pod_names}"
+    pod = next((p for p in pods if p.get("name") == action.pod_name), None)
+    if pod and pod.get("status") == "Terminating":
+        return f"Pod '{action.pod_name}' is already terminating"
+    return None
+def _validate_patch(action: PatchAction, world_state: Dict[str, Any]) -> Optional[str]:
+    resource_type = action.resource_type
+    name = action.name
+    if resource_type == "deployment":
+        deployments = world_state.get("deployments", [])
+        deployment_names = [d.get("name") for d in deployments]
+        if name not in deployment_names:
+            return f"Deployment '{name}' not found. Available: {deployment_names}"
+    elif resource_type == "configmap":
+        configmaps = world_state.get("configmaps", [])
+        configmap_names = [c.get("name") for c in configmaps]
+        if name not in configmap_names:
+            return f"ConfigMap '{name}' not found. Available: {configmap_names}"
+    elif resource_type == "service":
+        services = world_state.get("services", [])
+        service_names = [s.get("name") for s in services]
+        if name not in service_names:
+            return f"Service '{name}' not found. Available: {service_names}"
+    else:
+        return f"Invalid resource_type: {resource_type}. Must be one of: deployment, configmap, service"
+    return None
+def _validate_rollout_restart(action: RolloutRestartAction, world_state: Dict[str, Any]) -> Optional[str]:
+    deployments = world_state.get("deployments", [])
+    deployment_names = [d.get("name") for d in deployments]
+    if action.deployment not in deployment_names:
+        return f"Deployment '{action.deployment}' not found. Available: {deployment_names}"
+    return None
+def _validate_set_hpa(action: SetHPAAction, world_state: Dict[str, Any]) -> Optional[str]:
+    deployments = world_state.get("deployments", [])
+    deployment_names = [d.get("name") for d in deployments]
+    if action.deployment not in deployment_names:
+        return f"Deployment '{action.deployment}' not found. Available: {deployment_names}"
+    if action.max_replicas < action.min_replicas:
+        return f"max_replicas ({action.max_replicas}) must be >= min_replicas ({action.min_replicas})"
+    if action.cpu_target_percent < 10 or action.cpu_target_percent > 90:
+        return f"cpu_target_percent must be between 10 and 90, got {action.cpu_target_percent}"
+    return None
+def _validate_drain_node(action: DrainNodeAction, world_state: Dict[str, Any]) -> Optional[str]:
+    nodes = world_state.get("nodes", [])
+    node_names = [n.get("name") for n in nodes]
+    if action.node_name not in node_names:
+        return f"Node '{action.node_name}' not found. Available: {node_names}"
+    node = next((n for n in nodes if n.get("name") == action.node_name), None)
+    if node and node.get("status") == "SchedulingDisabled":
+        return f"Node '{action.node_name}' is already drained (SchedulingDisabled)"
+    ready_nodes = [n for n in nodes if n.get("status") == "Ready"]
+    if len(ready_nodes) <= 1 and node and node.get("status") == "Ready":
+        return "Cannot drain last healthy node — cluster would lose all capacity"
+    return None
+def _validate_describe(action: DescribeAction, world_state: Dict[str, Any]) -> Optional[str]:
+    resource_type = action.resource_type
+    name = action.name
+    if resource_type == "deployment":
+        deployments = world_state.get("deployments", [])
+        deployment_names = [d.get("name") for d in deployments]
+        if name not in deployment_names:
+            return f"Deployment '{name}' not found. Available: {deployment_names}"
+    elif resource_type == "pod":
+        pods = world_state.get("pods", [])
+        pod_names = [p.get("name") for p in pods]
+        if name not in pod_names:
+            return f"Pod '{name}' not found. Available: {pod_names}"
+    elif resource_type == "node":
+        nodes = world_state.get("nodes", [])
+        node_names = [n.get("name") for n in nodes]
+        if name not in node_names:
+            return f"Node '{name}' not found. Available: {node_names}"
+    elif resource_type == "service":
+        services = world_state.get("services", [])
+        service_names = [s.get("name") for s in services]
+        if name not in service_names:
+            return f"Service '{name}' not found. Available: {service_names}"
+    elif resource_type == "configmap":
+        configmaps = world_state.get("configmaps", [])
+        configmap_names = [c.get("name") for c in configmaps]
+        if name not in configmap_names:
+            return f"ConfigMap '{name}' not found. Available: {configmap_names}"
+    else:
+        return f"Invalid resource_type: {resource_type}. Must be one of: deployment, pod, node, service, configmap"
+    return None

server/worker.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from dataclasses import dataclass, field
+from typing import List, Callable, Any, Optional, Dict
+@dataclass
+class StepRecord:
+    step: int
+    action_applied: str
+    reward: float
+    done: bool
+    error: Optional[str] = None
+@dataclass
+class EpisodeResult:
+    task_id: str
+    steps_taken: int
+    rewards: List[float]
+    success: bool
+    history: List[StepRecord] = field(default_factory=list)
+    @property
+    def total_reward(self) -> float:
+        return sum(self.rewards)
+class Worker:
+    def run_episode(
+        self,
+        task_id: str,
+        world: Any,
+        get_action: Callable[[Any], Any],
+        max_steps: int,
+        grader: Any
+    ) -> EpisodeResult:
+        obs = world.reset(task=task_id)
+        history: List[StepRecord] = []
+        rewards: List[float] = []
+        done = False
+        for step in range(1, max_steps + 1):
+            action = get_action(obs)
+            error = None
+            from server.validator import validate
+            validation_error = validate(action, world.get_raw_state())
+            if validation_error:
+                history.append(StepRecord(
+                    step=step,
+                    action_applied="invalid_action",
+                    reward=0.0,
+                    done=False,
+                    error=validation_error
+                ))
+                rewards.append(0.0)
+                continue
+            from server.executor import execute
+            result = execute(action, world)
+            reward = grader.grade(world.get_raw_state(), step, max_steps)
+            done = grader.is_done(world.get_raw_state())
+            history.append(StepRecord(
+                step=step,
+                action_applied=result.action_applied,
+                reward=reward,
+                done=done,
+                error=None
+            ))
+            rewards.append(reward)
+            obs = result.observation
+            if done:
+                break
+        return EpisodeResult(
+            task_id=task_id,
+            steps_taken=len(history),
+            rewards=rewards,
+            success=done,
+            history=history
+        )

tests/__init__.py ADDED Viewed

File without changes

tests/test_actions.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import pytest
+from pydantic import ValidationError
+from server.actions import (
+    ScaleAction,
+    PatchAction,
+    DeletePodAction,
+    RolloutRestartAction,
+    SetHPAAction,
+    DrainNodeAction,
+    DescribeAction,
+    parse_action,
+)
+class TestScaleAction:
+    def test_valid_scale_action(self):
+        action = ScaleAction(action_type="scale", deployment="frontend", replicas=3)
+        assert action.deployment == "frontend"
+        assert action.replicas == 3
+    def test_scale_action_rejects_zero_replicas(self):
+        with pytest.raises(ValidationError):
+            ScaleAction(action_type="scale", deployment="frontend", replicas=0)
+    def test_scale_action_rejects_negative_replicas(self):
+        with pytest.raises(ValidationError):
+            ScaleAction(action_type="scale", deployment="frontend", replicas=-1)
+    def test_scale_action_rejects_too_many_replicas(self):
+        with pytest.raises(ValidationError):
+            ScaleAction(action_type="scale", deployment="frontend", replicas=21)
+    def test_scale_action_accepts_boundary_values(self):
+        action_min = ScaleAction(action_type="scale", deployment="frontend", replicas=1)
+        action_max = ScaleAction(action_type="scale", deployment="frontend", replicas=20)
+        assert action_min.replicas == 1
+        assert action_max.replicas == 20
+class TestPatchAction:
+    def test_valid_patch_action(self):
+        action = PatchAction(
+            action_type="patch",
+            resource_type="deployment",
+            name="frontend",
+            patch={"env": [{"name": "DB_HOST", "value": "db.prod.internal"}]}
+        )
+        assert action.resource_type == "deployment"
+        assert action.name == "frontend"
+    def test_patch_action_rejects_invalid_resource_type(self):
+        with pytest.raises(ValidationError):
+            PatchAction(
+                action_type="patch",
+                resource_type="invalid",
+                name="frontend",
+                patch={}
+            )
+class TestDeletePodAction:
+    def test_valid_delete_pod_action(self):
+        action = DeletePodAction(action_type="delete_pod", pod_name="frontend-7d9f-xkp2")
+        assert action.pod_name == "frontend-7d9f-xkp2"
+class TestRolloutRestartAction:
+    def test_valid_rollout_restart_action(self):
+        action = RolloutRestartAction(action_type="rollout_restart", deployment="frontend")
+        assert action.deployment == "frontend"
+class TestSetHPAAction:
+    def test_valid_hpa_action(self):
+        action = SetHPAAction(
+            action_type="set_hpa",
+            deployment="api",
+            min_replicas=2,
+            max_replicas=10,
+            cpu_target_percent=70
+        )
+        assert action.deployment == "api"
+        assert action.min_replicas == 2
+        assert action.max_replicas == 10
+    def test_hpa_action_rejects_max_less_than_min(self):
+        with pytest.raises(ValidationError):
+            SetHPAAction(
+                action_type="set_hpa",
+                deployment="api",
+                min_replicas=5,
+                max_replicas=2,
+                cpu_target_percent=60
+            )
+    def test_hpa_action_rejects_invalid_cpu_target(self):
+        with pytest.raises(ValidationError):
+            SetHPAAction(
+                action_type="set_hpa",
+                deployment="api",
+                min_replicas=1,
+                max_replicas=10,
+                cpu_target_percent=5
+            )
+    def test_hpa_action_accepts_boundary_cpu_target(self):
+        action_min = SetHPAAction(
+            action_type="set_hpa",
+            deployment="api",
+            min_replicas=1,
+            max_replicas=10,
+            cpu_target_percent=10
+        )
+        action_max = SetHPAAction(
+            action_type="set_hpa",
+            deployment="api",
+            min_replicas=1,
+            max_replicas=10,
+            cpu_target_percent=90
+        )
+        assert action_min.cpu_target_percent == 10
+        assert action_max.cpu_target_percent == 90
+class TestDrainNodeAction:
+    def test_valid_drain_node_action(self):
+        action = DrainNodeAction(action_type="drain_node", node_name="node-1")
+        assert action.node_name == "node-1"
+class TestDescribeAction:
+    def test_valid_describe_action(self):
+        action = DescribeAction(
+            action_type="describe",
+            resource_type="deployment",
+            name="frontend"
+        )
+        assert action.resource_type == "deployment"
+        assert action.name == "frontend"
+    def test_describe_action_rejects_invalid_resource_type(self):
+        with pytest.raises(ValidationError):
+            DescribeAction(
+                action_type="describe",
+                resource_type="invalid",
+                name="frontend"
+            )
+class TestParseAction:
+    def test_parse_scale_action(self):
+        raw = {"action_type": "scale", "deployment": "frontend", "replicas": 3}
+        action = parse_action(raw)
+        assert isinstance(action, ScaleAction)
+        assert action.deployment == "frontend"
+        assert action.replicas == 3
+    def test_parse_delete_pod_action(self):
+        raw = {"action_type": "delete_pod", "pod_name": "frontend-7d9f-xkp2"}
+        action = parse_action(raw)
+        assert isinstance(action, DeletePodAction)
+        assert action.pod_name == "frontend-7d9f-xkp2"
+    def test_parse_patch_action(self):
+        raw = {
+            "action_type": "patch",
+            "resource_type": "deployment",
+            "name": "frontend",
+            "patch": {"env": [{"name": "DB_HOST", "value": "db.prod.internal"}]}
+        }
+        action = parse_action(raw)
+        assert isinstance(action, PatchAction)
+        assert action.name == "frontend"
+    def test_parse_rollout_restart_action(self):
+        raw = {"action_type": "rollout_restart", "deployment": "frontend"}
+        action = parse_action(raw)
+        assert isinstance(action, RolloutRestartAction)
+        assert action.deployment == "frontend"
+    def test_parse_hpa_action(self):
+        raw = {
+            "action_type": "set_hpa",
+            "deployment": "api",
+            "min_replicas": 2,
+            "max_replicas": 10,
+            "cpu_target_percent": 70
+        }
+        action = parse_action(raw)
+        assert isinstance(action, SetHPAAction)
+        assert action.deployment == "api"
+    def test_parse_drain_node_action(self):
+        raw = {"action_type": "drain_node", "node_name": "node-1"}
+        action = parse_action(raw)
+        assert isinstance(action, DrainNodeAction)
+        assert action.node_name == "node-1"
+    def test_parse_describe_action(self):
+        raw = {"action_type": "describe", "resource_type": "deployment", "name": "frontend"}
+        action = parse_action(raw)
+        assert isinstance(action, DescribeAction)
+        assert action.name == "frontend"
+    def test_parse_unknown_action_type(self):
+        with pytest.raises(ValueError, match="Unknown action_type"):
+            parse_action({"action_type": "unknown_action"})
+    def test_parse_missing_action_type(self):
+        with pytest.raises(ValueError, match="Missing 'action_type'"):
+            parse_action({"deployment": "frontend"})

tests/test_executor.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import pytest
+from unittest.mock import MagicMock, call
+from server.actions import (
+    ScaleAction,
+    DeletePodAction,
+    PatchAction,
+    RolloutRestartAction,
+    SetHPAAction,
+    DrainNodeAction,
+    DescribeAction,
+)
+from server.executor import execute
+from server.models import ClusterObservation
+class MockWorld:
+    def __init__(self):
+        self.scale_called_with = None
+        self.delete_pod_called_with = None
+        self.apply_patch_called_with = None
+        self.rollout_restart_called_with = None
+        self.set_hpa_called_with = None
+        self.drain_node_called_with = None
+        self.describe_called_with = None
+        self.tick_called = False
+        self._observation = ClusterObservation(nodes=[], pods=[], deployments=[], services=[], configmaps=[], hpa=[], events=[], step=0, objective="")
+        self._raw_state = {"nodes": [], "pods": [], "deployments": [], "services": [], "configmaps": []}
+    def scale(self, deployment, replicas):
+        self.scale_called_with = (deployment, replicas)
+    def delete_pod(self, pod_name):
+        self.delete_pod_called_with = pod_name
+    def apply_patch(self, resource_type, name, patch):
+        self.apply_patch_called_with = (resource_type, name, patch)
+    def rollout_restart(self, deployment):
+        self.rollout_restart_called_with = deployment
+    def set_hpa(self, deployment, min_replicas, max_replicas, cpu_target_percent):
+        self.set_hpa_called_with = (deployment, min_replicas, max_replicas, cpu_target_percent)
+    def drain_node(self, node_name):
+        self.drain_node_called_with = node_name
+    def describe(self, resource_type, name):
+        self.describe_called_with = (resource_type, name)
+        return {"type": resource_type, "name": name, "detail": "mock detail"}
+    def tick(self):
+        self.tick_called = True
+    def get_observation(self):
+        return self._observation
+    def get_raw_state(self):
+        return self._raw_state
+class TestExecutorScale:
+    def test_scale_calls_world_scale_and_ticks(self):
+        mock_world = MockWorld()
+        action = ScaleAction(action_type="scale", deployment="frontend", replicas=3)
+        result = execute(action, mock_world)
+        assert mock_world.scale_called_with == ("frontend", 3)
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+        assert "Scaled" in result.action_applied
+    def test_scale_action_applied_message(self):
+        mock_world = MockWorld()
+        action = ScaleAction(action_type="scale", deployment="frontend", replicas=5)
+        result = execute(action, mock_world)
+        assert result.action_applied == "Scaled 'frontend' to 5 replicas"
+class TestExecutorDeletePod:
+    def test_delete_pod_calls_world_and_ticks(self):
+        mock_world = MockWorld()
+        action = DeletePodAction(action_type="delete_pod", pod_name="frontend-7d9f-xkp2")
+        result = execute(action, mock_world)
+        assert mock_world.delete_pod_called_with == "frontend-7d9f-xkp2"
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+class TestExecutorPatch:
+    def test_patch_calls_world_and_ticks(self):
+        mock_world = MockWorld()
+        action = PatchAction(
+            action_type="patch",
+            resource_type="deployment",
+            name="frontend",
+            patch={"env": [{"name": "DB_HOST", "value": "db.prod.internal"}]}
+        )
+        result = execute(action, mock_world)
+        assert mock_world.apply_patch_called_with == (
+            "deployment",
+            "frontend",
+            {"env": [{"name": "DB_HOST", "value": "db.prod.internal"}]}
+        )
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+class TestExecutorRolloutRestart:
+    def test_rollout_restart_calls_world_and_ticks(self):
+        mock_world = MockWorld()
+        action = RolloutRestartAction(action_type="rollout_restart", deployment="frontend")
+        result = execute(action, mock_world)
+        assert mock_world.rollout_restart_called_with == "frontend"
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+class TestExecutorSetHPA:
+    def test_set_hpa_calls_world_and_ticks(self):
+        mock_world = MockWorld()
+        action = SetHPAAction(
+            action_type="set_hpa",
+            deployment="api",
+            min_replicas=2,
+            max_replicas=10,
+            cpu_target_percent=70
+        )
+        result = execute(action, mock_world)
+        assert mock_world.set_hpa_called_with == ("api", 2, 10, 70)
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+class TestExecutorDrainNode:
+    def test_drain_node_calls_world_and_ticks(self):
+        mock_world = MockWorld()
+        action = DrainNodeAction(action_type="drain_node", node_name="node-1")
+        result = execute(action, mock_world)
+        assert mock_world.drain_node_called_with == "node-1"
+        assert mock_world.tick_called is True
+        assert result.tick_advanced is True
+class TestExecutorDescribe:
+    def test_describe_does_not_tick(self):
+        mock_world = MockWorld()
+        action = DescribeAction(
+            action_type="describe",
+            resource_type="deployment",
+            name="frontend"
+        )
+        result = execute(action, mock_world)
+        assert mock_world.describe_called_with == ("deployment", "frontend")
+        assert mock_world.tick_called is False
+        assert result.tick_advanced is False
+    def test_describe_returns_detail(self):
+        mock_world = MockWorld()
+        action = DescribeAction(
+            action_type="describe",
+            resource_type="deployment",
+            name="frontend"
+        )
+        result = execute(action, mock_world)
+        assert result.describe_detail is not None
+        assert result.describe_detail["type"] == "deployment"