File size: 4,799 Bytes
f78e5d2 fc6ff5a 0320a8d d8ee465 f78e5d2 fc6ff5a f78e5d2 fc6ff5a f78e5d2 fc6ff5a d8ee465 f78e5d2 fc6ff5a f78e5d2 fc6ff5a d8ee465 fc6ff5a f78e5d2 fc6ff5a f78e5d2 fc6ff5a d8ee465 fc6ff5a f78e5d2 fc6ff5a d8ee465 fc6ff5a f78e5d2 fc6ff5a f78e5d2 0320a8d fc6ff5a d8ee465 f78e5d2 fc6ff5a f78e5d2 f8670cd f78e5d2 fc6ff5a f78e5d2 fc6ff5a f8670cd fc6ff5a f78e5d2 fc6ff5a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | from enum import Enum
from typing import List, Optional, Union
from pydantic import BaseModel, model_validator
class TaskId(str, Enum):
BUG_DETECTION = "bug_detection"
SECURITY_AUDIT = "security_audit"
ARCHITECTURAL_REVIEW = "architectural_review"
class ActionType(str, Enum):
FLAG_ISSUE = "flag_issue"
COMMENT = "comment"
APPROVE = "approve"
REQUEST_CHANGES = "request_changes"
ASK_QUESTION = "ask_question"
class Category(str, Enum):
BUG = "bug"
SECURITY = "security"
ARCHITECTURE = "architecture"
STYLE = "style"
PERFORMANCE = "performance"
class Severity(str, Enum):
CRITICAL = "critical" # ordinal 4
HIGH = "high" # ordinal 3
MEDIUM = "medium" # ordinal 2
LOW = "low" # ordinal 1
INFO = "info" # ordinal 0
@classmethod
def ordinal(cls, sev: "Severity") -> int:
return {"critical": 4, "high": 3, "medium": 2, "low": 1, "info": 0}[sev.value]
class Verdict(str, Enum):
LGTM = "lgtm"
REQUEST_CHANGES = "request_changes"
NEEDS_DISCUSSION = "needs_discussion"
class FileChanged(BaseModel):
filename: str
language: str
patch: str # unified diff of this file
additions: int = 0
deletions: int = 0
class GroundTruthIssue(BaseModel):
id: str
category: Category
severity: Severity
filename: str
line_number: int
description: str
keywords: List[str] # at least 2 keywords the agent body must contain
required_verdict: Optional[Verdict] = None # if set, terminal verdict is graded
class Scenario(BaseModel):
task_id: TaskId
pr_title: str
pr_description: str
files_changed: List[FileChanged]
ground_truth_issues: List[GroundTruthIssue]
hash: str # deterministic identifier, e.g. "bug_001"
difficulty: str = "medium" # easy | medium | hard
tags: List[str] = []
class Action(BaseModel):
action_type: ActionType
body: str = ""
filename: Optional[str] = None
line_number: Optional[int] = None
category: Optional[Category] = None
severity: Optional[Severity] = None
verdict: Optional[Verdict] = None
@model_validator(mode="after")
def validate_action_fields(self) -> "Action":
if self.action_type == ActionType.FLAG_ISSUE:
if not self.body or not self.filename or self.line_number is None:
raise ValueError("flag_issue requires body, filename, and line_number")
if not self.category or not self.severity:
raise ValueError("flag_issue requires category and severity")
elif self.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES):
if not self.verdict:
raise ValueError(f"{self.action_type.value} action requires a verdict")
if not self.body:
raise ValueError(f"{self.action_type.value} action requires a body summary")
return self
class ActionRecord(BaseModel):
"""Immutable record of a step taken — stored in episode history."""
action_type: ActionType
body: str = ""
filename: Optional[str] = None
line_number: Optional[int] = None
category: Optional[Category] = None
severity: Optional[Severity] = None
verdict: Optional[Verdict] = None
reward: float = 0.0
timestamp: str = "" # ISO format, set by env
class Observation(BaseModel):
task_id: TaskId
scenario_hash: str
pr_title: str
pr_description: str
diff: str # full unified diff (all files concatenated)
files_changed: List[FileChanged]
step_count: int
max_steps: int
noise_budget: int
max_noise_budget: int = 5
issues_flagged: int = 0
done: bool = False
class Reward(BaseModel):
"""
Typed reward signal returned at each step (OpenEnv spec).
All values are normalized in the 0.0 – 1.0 range.
"""
value: float # 0.0 – 1.0 normalised score
reason: str = "" # human-readable explanation
is_terminal: bool = False # True on the final step
class ResetResult(BaseModel):
task_id: TaskId
seed: int
scenario_hash: str
observation: Observation
class StepResult(BaseModel):
observation: Observation
reward: float
reward_info: Reward # typed Reward model (OpenEnv spec)
done: bool
info: dict = {}
class EpisodeResult(BaseModel):
episode_id: str = ""
task_id: TaskId
scenario_hash: str
seed: int
final_score: float
steps_taken: int
issues_found: int
issues_total: int
noise_penalties: int
history: List[ActionRecord] = []
terminated_reason: str = "" # "terminal_action"|"max_steps"|"noise_exhausted"
|