from pydantic import BaseModel, Field from typing import Optional, Dict, Literal, List, Any class Action(BaseModel): optimized_code: str strategy: Optional[str] = None expected_speedup: Optional[float] = None class Reward(BaseModel): value: float = Field(ge=0.0, le=1.0) components: Dict[str, float] class Observation(BaseModel): task_id: str task_name: str difficulty: Literal["easy", "medium", "hard"] baseline_code: str current_best_code: str current_best_speedup: float step_count: int max_steps: int pending_checks: List[str] completed_checks: List[str] done: bool class EnvState(BaseModel): initialized: bool task_id: Optional[str] =None step_count: int = 0 max_steps: int = 0 total_reward: float = 0.0 best_code: str = "" best_speedup: float = 1.0 completed_checks: List[str] = Field(default_factory=list) action_history: List[Dict[str, Any]] = Field(default_factory=list) class ResetRequest(BaseModel): task_id: Optional[str] = None class StepRequest(BaseModel): action: Action class StepResult(BaseModel): observation:Observation reward: Reward done: bool info: Dict[str, Any]