Add codereview_env/models.py
Browse files- codereview_env/models.py +123 -0
codereview_env/models.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
from typing import List, Optional, Dict, Any
|
| 3 |
+
from pydantic import BaseModel, model_validator
|
| 4 |
+
|
| 5 |
+
class TaskId(str, Enum):
|
| 6 |
+
BUG_DETECTION = "bug_detection"
|
| 7 |
+
SECURITY_AUDIT = "security_audit"
|
| 8 |
+
ARCHITECTURAL_REVIEW = "architectural_review"
|
| 9 |
+
|
| 10 |
+
class ActionType(str, Enum):
|
| 11 |
+
COMMENT = "comment"
|
| 12 |
+
FLAG_ISSUE = "flag_issue"
|
| 13 |
+
REQUEST_CHANGES = "request_changes"
|
| 14 |
+
APPROVE = "approve"
|
| 15 |
+
ASK_QUESTION = "ask_question"
|
| 16 |
+
|
| 17 |
+
class Severity(str, Enum):
|
| 18 |
+
LOW = "low"
|
| 19 |
+
MEDIUM = "medium"
|
| 20 |
+
HIGH = "high"
|
| 21 |
+
CRITICAL = "critical"
|
| 22 |
+
|
| 23 |
+
class Category(str, Enum):
|
| 24 |
+
BUG = "bug"
|
| 25 |
+
SECURITY = "security"
|
| 26 |
+
STYLE = "style"
|
| 27 |
+
PERFORMANCE = "performance"
|
| 28 |
+
ARCHITECTURE = "architecture"
|
| 29 |
+
DESIGN = "design"
|
| 30 |
+
|
| 31 |
+
class Verdict(str, Enum):
|
| 32 |
+
LGTM = "LGTM"
|
| 33 |
+
REQUEST_CHANGES = "REQUEST_CHANGES"
|
| 34 |
+
NEEDS_DISCUSSION = "NEEDS_DISCUSSION"
|
| 35 |
+
|
| 36 |
+
class FileChange(BaseModel):
|
| 37 |
+
filename: str
|
| 38 |
+
patch: str
|
| 39 |
+
additions: int = 0
|
| 40 |
+
deletions: int = 0
|
| 41 |
+
|
| 42 |
+
class GroundTruthIssue(BaseModel):
|
| 43 |
+
id: str
|
| 44 |
+
category: Category
|
| 45 |
+
severity: Severity
|
| 46 |
+
filename: str
|
| 47 |
+
line_number: int
|
| 48 |
+
description: str
|
| 49 |
+
keywords: List[str]
|
| 50 |
+
required_verdict: Optional[Verdict] = None
|
| 51 |
+
|
| 52 |
+
class ActionRecord(BaseModel):
|
| 53 |
+
action_type: ActionType
|
| 54 |
+
body: str
|
| 55 |
+
filename: Optional[str] = None
|
| 56 |
+
line_number: Optional[int] = None
|
| 57 |
+
severity: Optional[Severity] = None
|
| 58 |
+
category: Optional[Category] = None
|
| 59 |
+
verdict: Optional[Verdict] = None
|
| 60 |
+
|
| 61 |
+
class Action(BaseModel):
|
| 62 |
+
action_type: ActionType
|
| 63 |
+
body: str
|
| 64 |
+
filename: Optional[str] = None
|
| 65 |
+
line_number: Optional[int] = None
|
| 66 |
+
severity: Optional[Severity] = None
|
| 67 |
+
category: Optional[Category] = None
|
| 68 |
+
verdict: Optional[Verdict] = None
|
| 69 |
+
|
| 70 |
+
@model_validator(mode='after')
|
| 71 |
+
def validate_action(self) -> 'Action':
|
| 72 |
+
if self.action_type == ActionType.FLAG_ISSUE:
|
| 73 |
+
if not self.severity or not self.category:
|
| 74 |
+
raise ValueError("flag_issue requires severity and category")
|
| 75 |
+
if not self.filename or not self.line_number:
|
| 76 |
+
raise ValueError("flag_issue requires filename and line_number")
|
| 77 |
+
|
| 78 |
+
if self.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES):
|
| 79 |
+
if not self.verdict:
|
| 80 |
+
raise ValueError(f"{self.action_type.value} requires a verdict")
|
| 81 |
+
|
| 82 |
+
return self
|
| 83 |
+
|
| 84 |
+
class Observation(BaseModel):
|
| 85 |
+
task_id: TaskId
|
| 86 |
+
pr_title: str
|
| 87 |
+
pr_description: str
|
| 88 |
+
diff: str
|
| 89 |
+
files_changed: List[FileChange]
|
| 90 |
+
step_count: int
|
| 91 |
+
max_steps: int
|
| 92 |
+
history: List[ActionRecord]
|
| 93 |
+
noise_budget: int
|
| 94 |
+
|
| 95 |
+
class ResetResult(BaseModel):
|
| 96 |
+
observation: Observation
|
| 97 |
+
task_id: TaskId
|
| 98 |
+
seed: int
|
| 99 |
+
scenario_hash: str
|
| 100 |
+
|
| 101 |
+
class StepResult(BaseModel):
|
| 102 |
+
observation: Observation
|
| 103 |
+
reward: float
|
| 104 |
+
done: bool
|
| 105 |
+
info: Dict[str, Any]
|
| 106 |
+
|
| 107 |
+
class EpisodeResult(BaseModel):
|
| 108 |
+
task_id: TaskId
|
| 109 |
+
seed: int
|
| 110 |
+
total_steps: int
|
| 111 |
+
final_score: float
|
| 112 |
+
issues_found: List[str] # IDs of ground truth issues found
|
| 113 |
+
issues_missed: List[str] # IDs of ground truth issues missed
|
| 114 |
+
false_positives: List[str] # descriptions of actions that were FP
|
| 115 |
+
verdict_correct: Optional[bool] = None
|
| 116 |
+
|
| 117 |
+
class Scenario(BaseModel):
|
| 118 |
+
task_id: TaskId
|
| 119 |
+
pr_title: str
|
| 120 |
+
pr_description: str
|
| 121 |
+
files_changed: List[FileChange]
|
| 122 |
+
ground_truth_issues: List[GroundTruthIssue]
|
| 123 |
+
hash: str
|