100XZX001 commited on
Commit
8be8d83
·
verified ·
1 Parent(s): 3ec5e61

Update environment.py

Browse files
Files changed (1) hide show
  1. environment.py +1 -11
environment.py CHANGED
@@ -7,13 +7,11 @@ class CodeReviewEnv:
7
  self.reset()
8
 
9
  def set_task(self, task: str):
10
- """Set the task before resetting."""
11
  if task not in ["easy", "medium", "hard"]:
12
  raise ValueError(f"Unknown task: {task}")
13
  self.task = task
14
 
15
  def reset(self) -> Observation:
16
- """Reset environment to start a new episode for the current task."""
17
  if self.task is None:
18
  raise RuntimeError("Task not set. Call set_task() first.")
19
  self.step_count = 0
@@ -33,7 +31,6 @@ class CodeReviewEnv:
33
  raise RuntimeError(f"Invalid task: {self.task}")
34
 
35
  return self._get_observation()
36
-
37
 
38
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
39
  if self.done:
@@ -44,12 +41,9 @@ class CodeReviewEnv:
44
 
45
  if action.action_type == "write_comment":
46
  self.agent_comment = action.comment_text or ""
47
- # Dense reward: +0.2 for writing any comment
48
  reward = 0.2
49
- # Then evaluate quality and add the final score (0.0–1.0)
50
  quality_score = self._grade_comment(self.agent_comment)
51
  reward += quality_score
52
- # But cap at 1.0
53
  if reward > 1.0:
54
  reward = 1.0
55
  self.done = True
@@ -68,14 +62,10 @@ class CodeReviewEnv:
68
  return obs, Reward(value=reward), self.done, info
69
 
70
  def _grade_comment(self, comment: str) -> float:
71
- """
72
- Return a score in [0.0, 1.0] based on how helpful the comment is.
73
- Uses keyword matching with partial credit.
74
- """
75
  if self.task == "easy":
76
  keywords = ["null", "key", "missing", "check", "exists", "handle"]
77
  matched = sum(1 for kw in keywords if kw in comment.lower())
78
- return min(1.0, matched / 3) # up to 3 keywords for full score
79
  elif self.task == "medium":
80
  keywords = ["enumerate", "for item in", "range", "inefficient", "optimize"]
81
  matched = sum(1 for kw in keywords if kw in comment.lower())
 
7
  self.reset()
8
 
9
  def set_task(self, task: str):
 
10
  if task not in ["easy", "medium", "hard"]:
11
  raise ValueError(f"Unknown task: {task}")
12
  self.task = task
13
 
14
  def reset(self) -> Observation:
 
15
  if self.task is None:
16
  raise RuntimeError("Task not set. Call set_task() first.")
17
  self.step_count = 0
 
31
  raise RuntimeError(f"Invalid task: {self.task}")
32
 
33
  return self._get_observation()
 
34
 
35
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
36
  if self.done:
 
41
 
42
  if action.action_type == "write_comment":
43
  self.agent_comment = action.comment_text or ""
 
44
  reward = 0.2
 
45
  quality_score = self._grade_comment(self.agent_comment)
46
  reward += quality_score
 
47
  if reward > 1.0:
48
  reward = 1.0
49
  self.done = True
 
62
  return obs, Reward(value=reward), self.done, info
63
 
64
  def _grade_comment(self, comment: str) -> float:
 
 
 
 
65
  if self.task == "easy":
66
  keywords = ["null", "key", "missing", "check", "exists", "handle"]
67
  matched = sum(1 for kw in keywords if kw in comment.lower())
68
+ return min(1.0, matched / 3)
69
  elif self.task == "medium":
70
  keywords = ["enumerate", "for item in", "range", "inefficient", "optimize"]
71
  matched = sum(1 for kw in keywords if kw in comment.lower())