Spaces:

ArshVerma
/

CodeLens

Sleeping

ArshVerma commited on Apr 2

Commit

0320a8d

1 Parent(s): 99cbab5

Add action validation and reward recording

Add validation and tighten action handling across the env and inference code.

- codereview_env/models.py: add a pydantic model_validator on Action to enforce required fields for FLAG_ISSUE (body, filename, line_number, category, severity) and require verdict/body for APPROVE/REQUEST_CHANGES actions.
- codereview_env/env.py: record ActionRecord before applying action (initialize reward=0.0), append it to history, compute reward delta and update running_score, then update the history record's reward (rounded to 4 decimals). Also guard history access when determining termination reason and set a default terminated_reason.
- codereview_env/scenarios.py: change required_verdict for arch_003, arch_005, and arch_008 from NEEDS_DISCUSSION to REQUEST_CHANGES.
- inference.py: normalize verdict strings to lowercase ("lgtm" and "request_changes").

These changes improve data validation, ensure accurate per-action reward recording in history, and standardize verdict values.

Files changed (4) hide show

codereview_env/env.py +11 -6
codereview_env/models.py +15 -1
codereview_env/scenarios.py +3 -3
inference.py +2 -2

codereview_env/env.py CHANGED Viewed

@@ -49,8 +49,8 @@ class CodeReviewEnv:
         s = self._state
         s["step_count"] += 1
-        # Record action in history
-        s["history"].append(ActionRecord(
             action_type=action.action_type,
             body=action.body,
             filename=action.filename,
@@ -58,14 +58,19 @@ class CodeReviewEnv:
             severity=action.severity,
             category=action.category,
             verdict=action.verdict,
             timestamp=datetime.now(timezone.utc).isoformat()
-        ))
         # Apply action logic and compute incremental reward delta
         prev_score    = s["running_score"]
         reward_delta  = self._apply_action(action)
         s["running_score"] = prev_score + reward_delta
         # Check termination
         s["done"] = (
             action.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES)
@@ -150,13 +155,13 @@ class CodeReviewEnv:
         missed_ids = list(all_gt_ids - s["issues_found"])
         final_score = self._grade(sc, s)
-        terminated_reason = "max_steps"
         if s["done"]:
             if s["noise_budget"] <= 0:
                 terminated_reason = "noise_exhausted"
-            elif s["history"][-1].action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES):
                 terminated_reason = "terminal_action"
-            elif s["step_count"] >= s["max_steps"]:
                 terminated_reason = "max_steps"
         return EpisodeResult(

         s = self._state
         s["step_count"] += 1
+        # Record action in history (reward will be updated after calculation)
+        record = ActionRecord(
             action_type=action.action_type,
             body=action.body,
             filename=action.filename,
             severity=action.severity,
             category=action.category,
             verdict=action.verdict,
+            reward=0.0,
             timestamp=datetime.now(timezone.utc).isoformat()
+        )
+        s["history"].append(record)
         # Apply action logic and compute incremental reward delta
         prev_score    = s["running_score"]
         reward_delta  = self._apply_action(action)
         s["running_score"] = prev_score + reward_delta
+        # Update the history record with the actual reward
+        record.reward = round(reward_delta, 4)
         # Check termination
         s["done"] = (
             action.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES)
         missed_ids = list(all_gt_ids - s["issues_found"])
         final_score = self._grade(sc, s)
+        terminated_reason = ""
         if s["done"]:
             if s["noise_budget"] <= 0:
                 terminated_reason = "noise_exhausted"
+            elif s["history"] and s["history"][-1].action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES):
                 terminated_reason = "terminal_action"
+            else:
                 terminated_reason = "max_steps"
         return EpisodeResult(

codereview_env/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from enum import Enum
 from typing import List, Optional, Union
-from pydantic import BaseModel
 class TaskId(str, Enum):
     BUG_DETECTION = "bug_detection"
@@ -73,6 +73,20 @@ class Action(BaseModel):
     severity: Optional[Severity] = None
     verdict: Optional[Verdict] = None
 class ActionRecord(BaseModel):
     """Immutable record of a step taken — stored in episode history."""
     action_type: ActionType

 from enum import Enum
 from typing import List, Optional, Union
+from pydantic import BaseModel, model_validator
 class TaskId(str, Enum):
     BUG_DETECTION = "bug_detection"
     severity: Optional[Severity] = None
     verdict: Optional[Verdict] = None
+    @model_validator(mode="after")
+    def validate_action_fields(self) -> "Action":
+        if self.action_type == ActionType.FLAG_ISSUE:
+            if not self.body or not self.filename or self.line_number is None:
+                raise ValueError("flag_issue requires body, filename, and line_number")
+            if not self.category or not self.severity:
+                raise ValueError("flag_issue requires category and severity")
+        elif self.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES):
+            if not self.verdict:
+                raise ValueError(f"{self.action_type.value} action requires a verdict")
+            if not self.body:
+                raise ValueError(f"{self.action_type.value} action requires a body summary")
+        return self
 class ActionRecord(BaseModel):
     """Immutable record of a step taken — stored in episode history."""
     action_type: ActionType

codereview_env/scenarios.py CHANGED Viewed

@@ -810,7 +810,7 @@ arch_003 = Scenario(
             line_number=2,
             description="Tight coupling: service depends on concrete implementation instead of abstraction",
             keywords=["tight coupling", "dependency injection"],
-            required_verdict=Verdict.NEEDS_DISCUSSION
         )
     ],
     hash="arch_003",
@@ -881,7 +881,7 @@ arch_005 = Scenario(
             line_number=6,
             description="Missing resilience (retry, timeout, circuit breaker) on external API dependency",
             keywords=["retry", "resilience"],
-            required_verdict=Verdict.NEEDS_DISCUSSION
         )
     ],
     hash="arch_005",
@@ -984,7 +984,7 @@ arch_008 = Scenario(
             line_number=2,
             description="Secret leaked in code comment; should be in environment variables only",
             keywords=["secret", "comment"],
-            required_verdict=Verdict.NEEDS_DISCUSSION
         )
     ],
     hash="arch_008",

             line_number=2,
             description="Tight coupling: service depends on concrete implementation instead of abstraction",
             keywords=["tight coupling", "dependency injection"],
+            required_verdict=Verdict.REQUEST_CHANGES
         )
     ],
     hash="arch_003",
             line_number=6,
             description="Missing resilience (retry, timeout, circuit breaker) on external API dependency",
             keywords=["retry", "resilience"],
+            required_verdict=Verdict.REQUEST_CHANGES
         )
     ],
     hash="arch_005",
             line_number=2,
             description="Secret leaked in code comment; should be in environment variables only",
             keywords=["secret", "comment"],
+            required_verdict=Verdict.REQUEST_CHANGES
         )
     ],
     hash="arch_008",

inference.py CHANGED Viewed

@@ -165,9 +165,9 @@ def sanitize_action(action_dict: dict, task_id: str) -> dict:
     elif action_type in ("approve", "request_changes"):
         if action_type == "approve":
-            action_dict["verdict"] = "LGTM"
         else:
-            action_dict["verdict"] = "REQUEST_CHANGES"
         if "body" not in action_dict:
             action_dict["body"] = "Review complete."

     elif action_type in ("approve", "request_changes"):
         if action_type == "approve":
+            action_dict["verdict"] = "lgtm"
         else:
+            action_dict["verdict"] = "request_changes"
         if "body" not in action_dict:
             action_dict["body"] = "Review complete."