from __future__ import annotations from pydantic import BaseModel, Field from typing import Dict, List, Optional, Any class ServiceMetrics(BaseModel): cpu_percent: float memory_mb: float error_rate: float # errors per second p99_latency_ms: float request_queue: int last_deploy: str status: str # healthy | degraded | critical | down class Alert(BaseModel): alert_id: str severity: str # P0 | P1 | P2 service: str message: str triggered_at_step: int is_red_herring: bool = False class FileView(BaseModel): service: str filename: str content: str total_lines: int class EditRecord(BaseModel): step: int service: str filename: str line_number: int old_code: str new_code: str class TestResult(BaseModel): suite: str # unit | integration | load | security passed: bool output: str errors: List[str] = Field(default_factory=list) class Observation(BaseModel): step: int incident_id: str system_metrics: Dict[str, ServiceMetrics] active_alerts: List[Alert] open_file: Optional[FileView] = None terminal_output: str git_diff: Optional[str] = None dependency_graph: Dict[str, List[str]] sre_memory: List[str] = Field(default_factory=list) budget_remaining: int task_id: int task_description: str class ActionRequest(BaseModel): tool: str params: Dict[str, Any] = Field(default_factory=dict) class ActionResult(BaseModel): tool: str output: Any reward_delta: float done: bool observation: Observation class EpisodeResult(BaseModel): incident_id: str task_id: int steps_taken: int total_reward: float normalized_score: float # 0.0 – 1.0 tests_passed: bool incident_report_accuracy: float fixed_within_sla: bool tool_call_log: List[Dict[str, Any]] weakness_tags: List[str] # for DifficultyController class IncidentReport(BaseModel): root_cause: str fix_applied: str services_affected: List[str] severity_classification: str # P0 | P1 | P2 class DifficultyState(BaseModel): async_bugs: float = 0.5 data_corruption: float = 0.5 security_bugs: float = 0.5 cascading_failures: float = 0.5 red_herrings: float = 0.5