File size: 1,916 Bytes
05a686e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""
Incident Task - Hard difficulty
Handle multi-service cascading incident
"""

from typing import Dict, Any
from ..coenv_environment import World
from ..conditions.cascade_failure import CascadeFailureCondition


class IncidentTask:
    """Incident task implementation"""
    
    def __init__(self, world: World, config: Dict[str, Any]):
        self.world = world
        self.config = config
        self.task_id = "incident"
        self.description = "Handle multi-service cascading incident"
        
    def reset(self):
        """Reset the task to initial state"""
        self.world.reset_to_healthy()
        
        cascade_condition = CascadeFailureCondition(self.world, self.config)
        cascade_condition.inject(root_cause_service="auth-service", failure_probability=0.6)
        
        self.objective = "Auth-service OOMKill has caused cascading failures. Identify the root cause, fix memory limits, restart workloads, and verify downstream recovery."
        
    def is_complete(self) -> bool:
        """Check if the task is complete"""
        key_services = ["auth-service", "api-gateway", "frontend"]
        healthy_services = 0
        
        for service_name in key_services:
            deployment = next((d for d in self.world.get_deployments() if d.name == service_name), None)
            if deployment:
                running_pods = [p for p in self.world.get_pods() 
                              if p.deployment == service_name and p.status == "Running"]
                if len(running_pods) >= deployment.desired_replicas * 0.8:
                    healthy_services += 1
        
        return healthy_services >= len(key_services) * 0.67
    
    def get_observation(self) -> Dict[str, Any]:
        """Get current observation for the task"""
        observation = self.world.get_full_state()
        observation["objective"] = self.objective
        return observation