100XZX001 commited on
Commit
bc6d6d6
·
verified ·
1 Parent(s): 031d78c

Update environment.py

Browse files
Files changed (1) hide show
  1. environment.py +23 -23
environment.py CHANGED
@@ -247,39 +247,39 @@ class CodeReviewEnv:
247
  if self._last_action_type in ("write_comment", "ask_question", "propose_fix"):
248
  author_response = self._test_results or ""
249
  else:
250
- author_response = ""
251
 
252
  return EnhancedObservation(
253
- code_snippet=self._current_code,
254
- last_tool_output=self._test_results or "",
255
- author_response=author_response, # ← fixed
256
 
257
- current_test_score=self._current_test_score,
258
- current_lint_score=self._current_lint_score,
259
- negotiation_score=self._author.get_negotiation_score(),
260
 
261
- previous_test_score=self._previous_test_score,
262
- previous_lint_score=self._previous_lint_score,
263
 
264
- author_confidence=self._author._confidence,
265
- author_threshold=self._author.thresholds.get(self._author.personality, 0.5),
266
 
267
- step=self._step_count,
268
- max_steps=self.max_steps,
269
- progress_ratio=self._step_count / self.max_steps,
270
 
271
- tests_run=self._tests_run,
272
- linter_run=self._linter_run,
273
- docs_queried=self._docs_queried,
274
 
275
- last_action_type=self._last_action_type,
276
- action_history=self._action_history[-5:],
277
 
278
- done=self._done,
279
 
280
- bug_description=self._bug_description,
281
- comments_count=len(self._comments),
282
- )
283
  # ===================================================================
284
  def _compute_dense_reward(
285
  self,
 
247
  if self._last_action_type in ("write_comment", "ask_question", "propose_fix"):
248
  author_response = self._test_results or ""
249
  else:
250
+ author_response = ""
251
 
252
  return EnhancedObservation(
253
+ code_snippet=self._current_code,
254
+ last_tool_output=self._test_results or "",
255
+ author_response=author_response, # ← fixed
256
 
257
+ current_test_score=self._current_test_score,
258
+ current_lint_score=self._current_lint_score,
259
+ negotiation_score=self._author.get_negotiation_score(),
260
 
261
+ previous_test_score=self._previous_test_score,
262
+ previous_lint_score=self._previous_lint_score,
263
 
264
+ author_confidence=self._author._confidence,
265
+ author_threshold=self._author.thresholds.get(self._author.personality, 0.5),
266
 
267
+ step=self._step_count,
268
+ max_steps=self.max_steps,
269
+ progress_ratio=self._step_count / self.max_steps,
270
 
271
+ tests_run=self._tests_run,
272
+ linter_run=self._linter_run,
273
+ docs_queried=self._docs_queried,
274
 
275
+ last_action_type=self._last_action_type,
276
+ action_history=self._action_history[-5:],
277
 
278
+ done=self._done,
279
 
280
+ bug_description=self._bug_description,
281
+ comments_count=len(self._comments),
282
+ )
283
  # ===================================================================
284
  def _compute_dense_reward(
285
  self,