File size: 3,412 Bytes
bc5030f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence

from pydantic import BaseModel, Field


class ObservationModel(BaseModel):
    code_length: float
    complexity_score: float
    runtime_s: float
    error_flag: bool

    @classmethod
    def from_vector(cls, values: Sequence[float]) -> "ObservationModel":
        vector = list(values)
        if len(vector) != 4:
            raise ValueError(f"observation vector must have length 4, got {len(vector)}")
        return cls(
            code_length=float(vector[0]),
            complexity_score=float(vector[1]),
            runtime_s=float(vector[2]),
            error_flag=bool(vector[3]),
        )

    def to_vector(self) -> List[float]:
        return [
            float(self.code_length),
            float(self.complexity_score),
            float(self.runtime_s),
            float(int(self.error_flag)),
        ]


class ActionModel(BaseModel):
    action: int = Field(ge=0, le=4)
    action_name: Optional[str] = None


class RewardModel(BaseModel):
    raw: float
    normalized: float = Field(ge=0.0, le=1.0)
    components: Dict[str, float]


class HealthResponse(BaseModel):
    status: str
    env: str
    version: str


class CompatibilityHealthResponse(BaseModel):
    status: str
    service: str


class ResetRequest(BaseModel):
    task_id: Optional[str] = None
    seed: Optional[int] = None
    code: Optional[str] = None


class StepRequest(BaseModel):
    action: int = Field(ge=0, le=4)


class GradeRequest(BaseModel):
    code: str


class TaskInfo(BaseModel):
    id: str
    name: str
    description: str
    difficulty: str
    initial_code: str


class TasksResponse(BaseModel):
    tasks: List[TaskInfo]


class GradeResponse(BaseModel):
    task_id: str
    score: float
    passed: bool


class StateResponse(BaseModel):
    current_code: str
    episode_steps: int
    max_steps: int
    complexity: float
    last_runtime: float
    last_error: bool
    sample_id: Optional[str]
    language: Optional[str]
    task_id: Optional[str]
    observation: ObservationModel
    observation_vector: List[float]
    action_meanings: Dict[int, str]


class ResetResponse(BaseModel):
    observation: ObservationModel
    observation_vector: List[float]
    info: Dict[str, Any]
    task_id: Optional[str]
    state: StateResponse


class StepResponse(BaseModel):
    action: ActionModel
    observation: ObservationModel
    observation_vector: List[float]
    reward: RewardModel
    done: bool
    terminated: bool
    truncated: bool
    info: Dict[str, Any]
    state: StateResponse


class OptimizeRequest(BaseModel):
    code: str
    task_id: Optional[str] = None
    max_steps: int = Field(default=5, ge=1, le=5)
    use_rl: bool = True
    use_llm: bool = False
    fallback_to_llm: bool = True
    rl_model_path: Optional[str] = None
    api_base_url: Optional[str] = None
    model_name: Optional[str] = None
    api_token: Optional[str] = None


class OptimizationStep(BaseModel):
    step: int
    action: int
    action_name: str
    reason: str
    source: str
    reward: float
    normalized_reward: float
    changed: bool
    complexity: float


class OptimizeResponse(BaseModel):
    original_code: str
    optimized_code: str
    diff: str
    steps: List[OptimizationStep]
    cumulative_reward: float
    task_id: Optional[str]
    task_score: Optional[float]