from __future__ import annotations from typing import Any, Dict, List, Optional, Sequence from pydantic import BaseModel, Field class ObservationModel(BaseModel): code_length: float complexity_score: float runtime_s: float error_flag: bool @classmethod def from_vector(cls, values: Sequence[float]) -> "ObservationModel": vector = list(values) if len(vector) != 4: raise ValueError(f"observation vector must have length 4, got {len(vector)}") return cls( code_length=float(vector[0]), complexity_score=float(vector[1]), runtime_s=float(vector[2]), error_flag=bool(vector[3]), ) def to_vector(self) -> List[float]: return [ float(self.code_length), float(self.complexity_score), float(self.runtime_s), float(int(self.error_flag)), ] class ActionModel(BaseModel): action: int = Field(ge=0, le=4) action_name: Optional[str] = None class RewardModel(BaseModel): raw: float normalized: float = Field(ge=0.0, le=1.0) components: Dict[str, float] class HealthResponse(BaseModel): status: str env: str version: str class CompatibilityHealthResponse(BaseModel): status: str service: str class ResetRequest(BaseModel): task_id: Optional[str] = None seed: Optional[int] = None code: Optional[str] = None class StepRequest(BaseModel): action: int = Field(ge=0, le=4) class GradeRequest(BaseModel): code: str class TaskInfo(BaseModel): id: str name: str description: str difficulty: str initial_code: str class TasksResponse(BaseModel): tasks: List[TaskInfo] class GradeResponse(BaseModel): task_id: str score: float passed: bool class StateResponse(BaseModel): current_code: str episode_steps: int max_steps: int complexity: float last_runtime: float last_error: bool sample_id: Optional[str] language: Optional[str] task_id: Optional[str] observation: ObservationModel observation_vector: List[float] action_meanings: Dict[int, str] class ResetResponse(BaseModel): observation: ObservationModel observation_vector: List[float] info: Dict[str, Any] task_id: Optional[str] state: StateResponse class StepResponse(BaseModel): action: ActionModel observation: ObservationModel observation_vector: List[float] reward: RewardModel done: bool terminated: bool truncated: bool info: Dict[str, Any] state: StateResponse class OptimizeRequest(BaseModel): code: str task_id: Optional[str] = None max_steps: int = Field(default=5, ge=1, le=5) use_rl: bool = True use_llm: bool = False fallback_to_llm: bool = True rl_model_path: Optional[str] = None api_base_url: Optional[str] = None model_name: Optional[str] = None api_token: Optional[str] = None class OptimizationStep(BaseModel): step: int action: int action_name: str reason: str source: str reward: float normalized_reward: float changed: bool complexity: float class OptimizeResponse(BaseModel): original_code: str optimized_code: str diff: str steps: List[OptimizationStep] cumulative_reward: float task_id: Optional[str] task_score: Optional[float]