File size: 3,412 Bytes
bc5030f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | from __future__ import annotations
from typing import Any, Dict, List, Optional, Sequence
from pydantic import BaseModel, Field
class ObservationModel(BaseModel):
code_length: float
complexity_score: float
runtime_s: float
error_flag: bool
@classmethod
def from_vector(cls, values: Sequence[float]) -> "ObservationModel":
vector = list(values)
if len(vector) != 4:
raise ValueError(f"observation vector must have length 4, got {len(vector)}")
return cls(
code_length=float(vector[0]),
complexity_score=float(vector[1]),
runtime_s=float(vector[2]),
error_flag=bool(vector[3]),
)
def to_vector(self) -> List[float]:
return [
float(self.code_length),
float(self.complexity_score),
float(self.runtime_s),
float(int(self.error_flag)),
]
class ActionModel(BaseModel):
action: int = Field(ge=0, le=4)
action_name: Optional[str] = None
class RewardModel(BaseModel):
raw: float
normalized: float = Field(ge=0.0, le=1.0)
components: Dict[str, float]
class HealthResponse(BaseModel):
status: str
env: str
version: str
class CompatibilityHealthResponse(BaseModel):
status: str
service: str
class ResetRequest(BaseModel):
task_id: Optional[str] = None
seed: Optional[int] = None
code: Optional[str] = None
class StepRequest(BaseModel):
action: int = Field(ge=0, le=4)
class GradeRequest(BaseModel):
code: str
class TaskInfo(BaseModel):
id: str
name: str
description: str
difficulty: str
initial_code: str
class TasksResponse(BaseModel):
tasks: List[TaskInfo]
class GradeResponse(BaseModel):
task_id: str
score: float
passed: bool
class StateResponse(BaseModel):
current_code: str
episode_steps: int
max_steps: int
complexity: float
last_runtime: float
last_error: bool
sample_id: Optional[str]
language: Optional[str]
task_id: Optional[str]
observation: ObservationModel
observation_vector: List[float]
action_meanings: Dict[int, str]
class ResetResponse(BaseModel):
observation: ObservationModel
observation_vector: List[float]
info: Dict[str, Any]
task_id: Optional[str]
state: StateResponse
class StepResponse(BaseModel):
action: ActionModel
observation: ObservationModel
observation_vector: List[float]
reward: RewardModel
done: bool
terminated: bool
truncated: bool
info: Dict[str, Any]
state: StateResponse
class OptimizeRequest(BaseModel):
code: str
task_id: Optional[str] = None
max_steps: int = Field(default=5, ge=1, le=5)
use_rl: bool = True
use_llm: bool = False
fallback_to_llm: bool = True
rl_model_path: Optional[str] = None
api_base_url: Optional[str] = None
model_name: Optional[str] = None
api_token: Optional[str] = None
class OptimizationStep(BaseModel):
step: int
action: int
action_name: str
reason: str
source: str
reward: float
normalized_reward: float
changed: bool
complexity: float
class OptimizeResponse(BaseModel):
original_code: str
optimized_code: str
diff: str
steps: List[OptimizationStep]
cumulative_reward: float
task_id: Optional[str]
task_score: Optional[float]
|