PRANAV05092003's picture
Fixed structure (moved files to root)
bc5030f
from __future__ import annotations
from typing import Any, Dict, List, Optional, Sequence
from pydantic import BaseModel, Field
class ObservationModel(BaseModel):
code_length: float
complexity_score: float
runtime_s: float
error_flag: bool
@classmethod
def from_vector(cls, values: Sequence[float]) -> "ObservationModel":
vector = list(values)
if len(vector) != 4:
raise ValueError(f"observation vector must have length 4, got {len(vector)}")
return cls(
code_length=float(vector[0]),
complexity_score=float(vector[1]),
runtime_s=float(vector[2]),
error_flag=bool(vector[3]),
)
def to_vector(self) -> List[float]:
return [
float(self.code_length),
float(self.complexity_score),
float(self.runtime_s),
float(int(self.error_flag)),
]
class ActionModel(BaseModel):
action: int = Field(ge=0, le=4)
action_name: Optional[str] = None
class RewardModel(BaseModel):
raw: float
normalized: float = Field(ge=0.0, le=1.0)
components: Dict[str, float]
class HealthResponse(BaseModel):
status: str
env: str
version: str
class CompatibilityHealthResponse(BaseModel):
status: str
service: str
class ResetRequest(BaseModel):
task_id: Optional[str] = None
seed: Optional[int] = None
code: Optional[str] = None
class StepRequest(BaseModel):
action: int = Field(ge=0, le=4)
class GradeRequest(BaseModel):
code: str
class TaskInfo(BaseModel):
id: str
name: str
description: str
difficulty: str
initial_code: str
class TasksResponse(BaseModel):
tasks: List[TaskInfo]
class GradeResponse(BaseModel):
task_id: str
score: float
passed: bool
class StateResponse(BaseModel):
current_code: str
episode_steps: int
max_steps: int
complexity: float
last_runtime: float
last_error: bool
sample_id: Optional[str]
language: Optional[str]
task_id: Optional[str]
observation: ObservationModel
observation_vector: List[float]
action_meanings: Dict[int, str]
class ResetResponse(BaseModel):
observation: ObservationModel
observation_vector: List[float]
info: Dict[str, Any]
task_id: Optional[str]
state: StateResponse
class StepResponse(BaseModel):
action: ActionModel
observation: ObservationModel
observation_vector: List[float]
reward: RewardModel
done: bool
terminated: bool
truncated: bool
info: Dict[str, Any]
state: StateResponse
class OptimizeRequest(BaseModel):
code: str
task_id: Optional[str] = None
max_steps: int = Field(default=5, ge=1, le=5)
use_rl: bool = True
use_llm: bool = False
fallback_to_llm: bool = True
rl_model_path: Optional[str] = None
api_base_url: Optional[str] = None
model_name: Optional[str] = None
api_token: Optional[str] = None
class OptimizationStep(BaseModel):
step: int
action: int
action_name: str
reason: str
source: str
reward: float
normalized_reward: float
changed: bool
complexity: float
class OptimizeResponse(BaseModel):
original_code: str
optimized_code: str
diff: str
steps: List[OptimizationStep]
cumulative_reward: float
task_id: Optional[str]
task_score: Optional[float]