uvpatel7271 commited on
Commit
a954add
·
verified ·
1 Parent(s): 4ae018d

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. inference.py +16 -11
  2. models.py +21 -15
  3. server/env.py +25 -15
inference.py CHANGED
@@ -164,17 +164,22 @@ def run_task_episode(
164
  action = PythonCodeReviewAction(action_type="analyze_code")
165
 
166
  # Execute action
167
- observation = env.step(action)
168
- total_reward += observation.reward.value
169
-
170
- if verbose:
171
- print(f"Step {step_count}: {action.action_type}")
172
- if observation.reward.value != 0:
173
- print(f" Reward: {observation.reward.value:+.4f} ({observation.reward.reason})")
174
- if observation.errors:
175
- print(f" Errors: {observation.errors}")
176
- if observation.test_results:
177
- print(f" Tests: {observation.test_results}")
 
 
 
 
 
178
 
179
  final_score = observation.score
180
  if verbose:
 
164
  action = PythonCodeReviewAction(action_type="analyze_code")
165
 
166
  # Execute action
167
+ observation = env.step(action)
168
+ step_reward = float(observation.reward or 0.0)
169
+ total_reward += step_reward
170
+
171
+ if verbose:
172
+ print(f"Step {step_count}: {action.action_type}")
173
+ print(f" Reward: {step_reward:+.4f}")
174
+ print(f" Done: {observation.done}")
175
+ if step_reward != 0 or observation.reward_details.reason:
176
+ print(f" Reward Details: {observation.reward_details.reason}")
177
+ if observation.last_action_status:
178
+ print(f" Status: {observation.last_action_status}")
179
+ if observation.errors:
180
+ print(f" Errors: {observation.errors}")
181
+ if observation.test_results:
182
+ print(f" Tests: {observation.test_results}")
183
 
184
  final_score = observation.score
185
  if verbose:
models.py CHANGED
@@ -43,20 +43,26 @@ class PythonCodeReviewAction(Action):
43
  code: Optional[str] = Field(default=None, description="New code for edit_code actions")
44
 
45
 
46
- class PythonCodeReviewObservation(Observation):
47
- """Observation returned by reset() and step()."""
48
-
49
- task_id: str = Field(..., description="Current task identifier")
50
- difficulty: Difficulty = Field(..., description="Task difficulty level")
51
- task_description: str = Field(..., description="Detailed task description")
52
- current_code: str = Field(..., description="Current code state")
53
- errors: str = Field(..., description="Syntax/compilation errors, if any")
54
- test_results: str = Field(..., description="Results from test execution")
55
- visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
56
- history: List[HistoryEntry] = Field(default_factory=list, description="Action history")
57
- attempts_remaining: int = Field(..., ge=0, description="Actions left in episode")
58
- score: float = Field(..., ge=0.0, le=1.0, description="Current episode score")
59
- reward: RewardDetails = Field(default_factory=lambda: RewardDetails(value=0.0, reason="Reset"))
 
 
 
 
 
 
60
 
61
 
62
  class PythonCodeReviewState(State):
@@ -106,4 +112,4 @@ class HealthResponse(BaseModel):
106
 
107
  status: Literal["ok"] = "ok"
108
  environment: str = "python_code_review_env"
109
- task_count: int = Field(default=0, ge=0)
 
43
  code: Optional[str] = Field(default=None, description="New code for edit_code actions")
44
 
45
 
46
+ class PythonCodeReviewObservation(Observation):
47
+ """Observation returned by reset() and step()."""
48
+
49
+ task_id: str = Field(..., description="Current task identifier")
50
+ title: str = Field(default="", description="Human-readable task title")
51
+ difficulty: Difficulty = Field(..., description="Task difficulty level")
52
+ task_kind: Optional[TaskKind] = Field(default=None, description="Task type")
53
+ task_description: str = Field(..., description="Detailed task description")
54
+ current_code: str = Field(..., description="Current code state")
55
+ errors: str = Field(..., description="Syntax/compilation errors, if any")
56
+ test_results: str = Field(..., description="Results from test execution")
57
+ visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
58
+ history: List[HistoryEntry] = Field(default_factory=list, description="Action history")
59
+ attempts_remaining: int = Field(..., ge=0, description="Actions left in episode")
60
+ last_action_status: str = Field(default="", description="Outcome message from the last action")
61
+ score: float = Field(..., ge=0.0, le=1.0, description="Current episode score")
62
+ reward_details: RewardDetails = Field(
63
+ default_factory=lambda: RewardDetails(value=0.0, reason="Reset"),
64
+ description="Detailed reward breakdown for the last action",
65
+ )
66
 
67
 
68
  class PythonCodeReviewState(State):
 
112
 
113
  status: Literal["ok"] = "ok"
114
  environment: str = "python_code_review_env"
115
+ task_count: int = Field(default=0, ge=0)
server/env.py CHANGED
@@ -171,21 +171,31 @@ class PythonCodeReviewEnvironment(
171
  """Expose deterministic grading outside of an active episode."""
172
  return grade_task(code, get_task(task_id), include_hidden=True)
173
 
174
- def _build_observation(self) -> PythonCodeReviewObservation:
175
- """Build current observation from state."""
176
- return PythonCodeReviewObservation(
177
- task_id=self._state.task_id or "",
178
- difficulty=self._state.difficulty or "easy",
179
- task_description=self._task.task_description if self._task else "",
180
- current_code=self._state.current_code,
181
- errors=self._state.errors,
182
- test_results=self._state.test_results,
183
- visible_tests=self._task.visible_tests if self._task else [],
184
- history=self._state.history,
185
- attempts_remaining=self._state.attempts_remaining,
186
- score=self._state.score,
187
- reward=self._last_reward,
188
- )
 
 
 
 
 
 
 
 
 
 
189
 
190
  def _handle_analyze(self) -> tuple[RewardDetails, str]:
191
  """Analyze code for errors and test status."""
 
171
  """Expose deterministic grading outside of an active episode."""
172
  return grade_task(code, get_task(task_id), include_hidden=True)
173
 
174
+ def _build_observation(self) -> PythonCodeReviewObservation:
175
+ """Build current observation from state."""
176
+ return PythonCodeReviewObservation(
177
+ task_id=self._state.task_id or "",
178
+ title=self._task.title if self._task else "",
179
+ difficulty=self._state.difficulty or "easy",
180
+ task_kind=self._state.task_kind,
181
+ task_description=self._task.task_description if self._task else "",
182
+ current_code=self._state.current_code,
183
+ errors=self._state.errors,
184
+ test_results=self._state.test_results,
185
+ visible_tests=self._task.visible_tests if self._task else [],
186
+ history=self._state.history,
187
+ attempts_remaining=self._state.attempts_remaining,
188
+ last_action_status=self._last_status,
189
+ score=self._state.score,
190
+ reward=self._last_reward.value,
191
+ reward_details=self._last_reward,
192
+ done=self._done,
193
+ metadata={
194
+ "episode_id": self._state.episode_id,
195
+ "step_count": self._state.step_count,
196
+ "task_kind": self._state.task_kind,
197
+ },
198
+ )
199
 
200
  def _handle_analyze(self) -> tuple[RewardDetails, str]:
201
  """Analyze code for errors and test status."""