Spaces:

100XZX001
/

code_review_env

Sleeping

App Files Files Community

100XZX001 commited on Mar 28

Commit

8be8d83

verified ·

1 Parent(s): 3ec5e61

Update environment.py

Browse files

Files changed (1) hide show

environment.py +1 -11

environment.py CHANGED Viewed

@@ -7,13 +7,11 @@ class CodeReviewEnv:
         self.reset()
     def set_task(self, task: str):
-        """Set the task before resetting."""
         if task not in ["easy", "medium", "hard"]:
             raise ValueError(f"Unknown task: {task}")
         self.task = task
     def reset(self) -> Observation:
-        """Reset environment to start a new episode for the current task."""
         if self.task is None:
             raise RuntimeError("Task not set. Call set_task() first.")
         self.step_count = 0
@@ -33,7 +31,6 @@ class CodeReviewEnv:
             raise RuntimeError(f"Invalid task: {self.task}")
         return self._get_observation()
     def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
         if self.done:
@@ -44,12 +41,9 @@ class CodeReviewEnv:
         if action.action_type == "write_comment":
             self.agent_comment = action.comment_text or ""
-            # Dense reward: +0.2 for writing any comment
             reward = 0.2
-            # Then evaluate quality and add the final score (0.0–1.0)
             quality_score = self._grade_comment(self.agent_comment)
             reward += quality_score
-            # But cap at 1.0
             if reward > 1.0:
                 reward = 1.0
             self.done = True
@@ -68,14 +62,10 @@ class CodeReviewEnv:
         return obs, Reward(value=reward), self.done, info
     def _grade_comment(self, comment: str) -> float:
-        """
-        Return a score in [0.0, 1.0] based on how helpful the comment is.
-        Uses keyword matching with partial credit.
-        """
         if self.task == "easy":
             keywords = ["null", "key", "missing", "check", "exists", "handle"]
             matched = sum(1 for kw in keywords if kw in comment.lower())
-            return min(1.0, matched / 3)  # up to 3 keywords for full score
         elif self.task == "medium":
             keywords = ["enumerate", "for item in", "range", "inefficient", "optimize"]
             matched = sum(1 for kw in keywords if kw in comment.lower())

         self.reset()
     def set_task(self, task: str):
         if task not in ["easy", "medium", "hard"]:
             raise ValueError(f"Unknown task: {task}")
         self.task = task
     def reset(self) -> Observation:
         if self.task is None:
             raise RuntimeError("Task not set. Call set_task() first.")
         self.step_count = 0
             raise RuntimeError(f"Invalid task: {self.task}")
         return self._get_observation()
     def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
         if self.done:
         if action.action_type == "write_comment":
             self.agent_comment = action.comment_text or ""
             reward = 0.2
             quality_score = self._grade_comment(self.agent_comment)
             reward += quality_score
             if reward > 1.0:
                 reward = 1.0
             self.done = True
         return obs, Reward(value=reward), self.done, info
     def _grade_comment(self, comment: str) -> float:
         if self.task == "easy":
             keywords = ["null", "key", "missing", "check", "exists", "handle"]
             matched = sum(1 for kw in keywords if kw in comment.lower())
+            return min(1.0, matched / 3)
         elif self.task == "medium":
             keywords = ["enumerate", "for item in", "range", "inefficient", "optimize"]
             matched = sum(1 for kw in keywords if kw in comment.lower())