Update environment.py
Browse files- environment.py +23 -23
environment.py
CHANGED
|
@@ -247,39 +247,39 @@ class CodeReviewEnv:
|
|
| 247 |
if self._last_action_type in ("write_comment", "ask_question", "propose_fix"):
|
| 248 |
author_response = self._test_results or ""
|
| 249 |
else:
|
| 250 |
-
|
| 251 |
|
| 252 |
return EnhancedObservation(
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
|
| 261 |
-
|
| 262 |
-
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
|
| 275 |
-
|
| 276 |
-
|
| 277 |
|
| 278 |
-
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
# ===================================================================
|
| 284 |
def _compute_dense_reward(
|
| 285 |
self,
|
|
|
|
| 247 |
if self._last_action_type in ("write_comment", "ask_question", "propose_fix"):
|
| 248 |
author_response = self._test_results or ""
|
| 249 |
else:
|
| 250 |
+
author_response = ""
|
| 251 |
|
| 252 |
return EnhancedObservation(
|
| 253 |
+
code_snippet=self._current_code,
|
| 254 |
+
last_tool_output=self._test_results or "",
|
| 255 |
+
author_response=author_response, # ← fixed
|
| 256 |
|
| 257 |
+
current_test_score=self._current_test_score,
|
| 258 |
+
current_lint_score=self._current_lint_score,
|
| 259 |
+
negotiation_score=self._author.get_negotiation_score(),
|
| 260 |
|
| 261 |
+
previous_test_score=self._previous_test_score,
|
| 262 |
+
previous_lint_score=self._previous_lint_score,
|
| 263 |
|
| 264 |
+
author_confidence=self._author._confidence,
|
| 265 |
+
author_threshold=self._author.thresholds.get(self._author.personality, 0.5),
|
| 266 |
|
| 267 |
+
step=self._step_count,
|
| 268 |
+
max_steps=self.max_steps,
|
| 269 |
+
progress_ratio=self._step_count / self.max_steps,
|
| 270 |
|
| 271 |
+
tests_run=self._tests_run,
|
| 272 |
+
linter_run=self._linter_run,
|
| 273 |
+
docs_queried=self._docs_queried,
|
| 274 |
|
| 275 |
+
last_action_type=self._last_action_type,
|
| 276 |
+
action_history=self._action_history[-5:],
|
| 277 |
|
| 278 |
+
done=self._done,
|
| 279 |
|
| 280 |
+
bug_description=self._bug_description,
|
| 281 |
+
comments_count=len(self._comments),
|
| 282 |
+
)
|
| 283 |
# ===================================================================
|
| 284 |
def _compute_dense_reward(
|
| 285 |
self,
|