Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +16 -11
- models.py +21 -15
- server/env.py +25 -15
inference.py
CHANGED
|
@@ -164,17 +164,22 @@ def run_task_episode(
|
|
| 164 |
action = PythonCodeReviewAction(action_type="analyze_code")
|
| 165 |
|
| 166 |
# Execute action
|
| 167 |
-
observation = env.step(action)
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
final_score = observation.score
|
| 180 |
if verbose:
|
|
|
|
| 164 |
action = PythonCodeReviewAction(action_type="analyze_code")
|
| 165 |
|
| 166 |
# Execute action
|
| 167 |
+
observation = env.step(action)
|
| 168 |
+
step_reward = float(observation.reward or 0.0)
|
| 169 |
+
total_reward += step_reward
|
| 170 |
+
|
| 171 |
+
if verbose:
|
| 172 |
+
print(f"Step {step_count}: {action.action_type}")
|
| 173 |
+
print(f" Reward: {step_reward:+.4f}")
|
| 174 |
+
print(f" Done: {observation.done}")
|
| 175 |
+
if step_reward != 0 or observation.reward_details.reason:
|
| 176 |
+
print(f" Reward Details: {observation.reward_details.reason}")
|
| 177 |
+
if observation.last_action_status:
|
| 178 |
+
print(f" Status: {observation.last_action_status}")
|
| 179 |
+
if observation.errors:
|
| 180 |
+
print(f" Errors: {observation.errors}")
|
| 181 |
+
if observation.test_results:
|
| 182 |
+
print(f" Tests: {observation.test_results}")
|
| 183 |
|
| 184 |
final_score = observation.score
|
| 185 |
if verbose:
|
models.py
CHANGED
|
@@ -43,20 +43,26 @@ class PythonCodeReviewAction(Action):
|
|
| 43 |
code: Optional[str] = Field(default=None, description="New code for edit_code actions")
|
| 44 |
|
| 45 |
|
| 46 |
-
class PythonCodeReviewObservation(Observation):
|
| 47 |
-
"""Observation returned by reset() and step()."""
|
| 48 |
-
|
| 49 |
-
task_id: str = Field(..., description="Current task identifier")
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
class PythonCodeReviewState(State):
|
|
@@ -106,4 +112,4 @@ class HealthResponse(BaseModel):
|
|
| 106 |
|
| 107 |
status: Literal["ok"] = "ok"
|
| 108 |
environment: str = "python_code_review_env"
|
| 109 |
-
task_count: int = Field(default=0, ge=0)
|
|
|
|
| 43 |
code: Optional[str] = Field(default=None, description="New code for edit_code actions")
|
| 44 |
|
| 45 |
|
| 46 |
+
class PythonCodeReviewObservation(Observation):
|
| 47 |
+
"""Observation returned by reset() and step()."""
|
| 48 |
+
|
| 49 |
+
task_id: str = Field(..., description="Current task identifier")
|
| 50 |
+
title: str = Field(default="", description="Human-readable task title")
|
| 51 |
+
difficulty: Difficulty = Field(..., description="Task difficulty level")
|
| 52 |
+
task_kind: Optional[TaskKind] = Field(default=None, description="Task type")
|
| 53 |
+
task_description: str = Field(..., description="Detailed task description")
|
| 54 |
+
current_code: str = Field(..., description="Current code state")
|
| 55 |
+
errors: str = Field(..., description="Syntax/compilation errors, if any")
|
| 56 |
+
test_results: str = Field(..., description="Results from test execution")
|
| 57 |
+
visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
|
| 58 |
+
history: List[HistoryEntry] = Field(default_factory=list, description="Action history")
|
| 59 |
+
attempts_remaining: int = Field(..., ge=0, description="Actions left in episode")
|
| 60 |
+
last_action_status: str = Field(default="", description="Outcome message from the last action")
|
| 61 |
+
score: float = Field(..., ge=0.0, le=1.0, description="Current episode score")
|
| 62 |
+
reward_details: RewardDetails = Field(
|
| 63 |
+
default_factory=lambda: RewardDetails(value=0.0, reason="Reset"),
|
| 64 |
+
description="Detailed reward breakdown for the last action",
|
| 65 |
+
)
|
| 66 |
|
| 67 |
|
| 68 |
class PythonCodeReviewState(State):
|
|
|
|
| 112 |
|
| 113 |
status: Literal["ok"] = "ok"
|
| 114 |
environment: str = "python_code_review_env"
|
| 115 |
+
task_count: int = Field(default=0, ge=0)
|
server/env.py
CHANGED
|
@@ -171,21 +171,31 @@ class PythonCodeReviewEnvironment(
|
|
| 171 |
"""Expose deterministic grading outside of an active episode."""
|
| 172 |
return grade_task(code, get_task(task_id), include_hidden=True)
|
| 173 |
|
| 174 |
-
def _build_observation(self) -> PythonCodeReviewObservation:
|
| 175 |
-
"""Build current observation from state."""
|
| 176 |
-
return PythonCodeReviewObservation(
|
| 177 |
-
task_id=self._state.task_id or "",
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
def _handle_analyze(self) -> tuple[RewardDetails, str]:
|
| 191 |
"""Analyze code for errors and test status."""
|
|
|
|
| 171 |
"""Expose deterministic grading outside of an active episode."""
|
| 172 |
return grade_task(code, get_task(task_id), include_hidden=True)
|
| 173 |
|
| 174 |
+
def _build_observation(self) -> PythonCodeReviewObservation:
|
| 175 |
+
"""Build current observation from state."""
|
| 176 |
+
return PythonCodeReviewObservation(
|
| 177 |
+
task_id=self._state.task_id or "",
|
| 178 |
+
title=self._task.title if self._task else "",
|
| 179 |
+
difficulty=self._state.difficulty or "easy",
|
| 180 |
+
task_kind=self._state.task_kind,
|
| 181 |
+
task_description=self._task.task_description if self._task else "",
|
| 182 |
+
current_code=self._state.current_code,
|
| 183 |
+
errors=self._state.errors,
|
| 184 |
+
test_results=self._state.test_results,
|
| 185 |
+
visible_tests=self._task.visible_tests if self._task else [],
|
| 186 |
+
history=self._state.history,
|
| 187 |
+
attempts_remaining=self._state.attempts_remaining,
|
| 188 |
+
last_action_status=self._last_status,
|
| 189 |
+
score=self._state.score,
|
| 190 |
+
reward=self._last_reward.value,
|
| 191 |
+
reward_details=self._last_reward,
|
| 192 |
+
done=self._done,
|
| 193 |
+
metadata={
|
| 194 |
+
"episode_id": self._state.episode_id,
|
| 195 |
+
"step_count": self._state.step_count,
|
| 196 |
+
"task_kind": self._state.task_kind,
|
| 197 |
+
},
|
| 198 |
+
)
|
| 199 |
|
| 200 |
def _handle_analyze(self) -> tuple[RewardDetails, str]:
|
| 201 |
"""Analyze code for errors and test status."""
|