from enum import Enum from typing import List, Optional, Union from pydantic import BaseModel, model_validator class TaskId(str, Enum): BUG_DETECTION = "bug_detection" SECURITY_AUDIT = "security_audit" ARCHITECTURAL_REVIEW = "architectural_review" class ActionType(str, Enum): FLAG_ISSUE = "flag_issue" COMMENT = "comment" APPROVE = "approve" REQUEST_CHANGES = "request_changes" ASK_QUESTION = "ask_question" class Category(str, Enum): BUG = "bug" SECURITY = "security" ARCHITECTURE = "architecture" STYLE = "style" PERFORMANCE = "performance" class Severity(str, Enum): CRITICAL = "critical" # ordinal 4 HIGH = "high" # ordinal 3 MEDIUM = "medium" # ordinal 2 LOW = "low" # ordinal 1 INFO = "info" # ordinal 0 @classmethod def ordinal(cls, sev: "Severity") -> int: return {"critical": 4, "high": 3, "medium": 2, "low": 1, "info": 0}[sev.value] class Verdict(str, Enum): LGTM = "lgtm" REQUEST_CHANGES = "request_changes" NEEDS_DISCUSSION = "needs_discussion" class FileChanged(BaseModel): filename: str language: str patch: str # unified diff of this file additions: int = 0 deletions: int = 0 class GroundTruthIssue(BaseModel): id: str category: Category severity: Severity filename: str line_number: int description: str keywords: List[str] # at least 2 keywords the agent body must contain required_verdict: Optional[Verdict] = None # if set, terminal verdict is graded class Scenario(BaseModel): task_id: TaskId pr_title: str pr_description: str files_changed: List[FileChanged] ground_truth_issues: List[GroundTruthIssue] hash: str # deterministic identifier, e.g. "bug_001" difficulty: str = "medium" # easy | medium | hard tags: List[str] = [] class Action(BaseModel): action_type: ActionType body: str = "" filename: Optional[str] = None line_number: Optional[int] = None category: Optional[Category] = None severity: Optional[Severity] = None verdict: Optional[Verdict] = None @model_validator(mode="after") def validate_action_fields(self) -> "Action": if self.action_type == ActionType.FLAG_ISSUE: if not self.body or not self.filename or self.line_number is None: raise ValueError("flag_issue requires body, filename, and line_number") if not self.category or not self.severity: raise ValueError("flag_issue requires category and severity") elif self.action_type in (ActionType.APPROVE, ActionType.REQUEST_CHANGES): if not self.verdict: raise ValueError(f"{self.action_type.value} action requires a verdict") if not self.body: raise ValueError(f"{self.action_type.value} action requires a body summary") return self class ActionRecord(BaseModel): """Immutable record of a step taken — stored in episode history.""" action_type: ActionType body: str = "" filename: Optional[str] = None line_number: Optional[int] = None category: Optional[Category] = None severity: Optional[Severity] = None verdict: Optional[Verdict] = None reward: float = 0.0 timestamp: str = "" # ISO format, set by env class Observation(BaseModel): task_id: TaskId scenario_hash: str pr_title: str pr_description: str diff: str # full unified diff (all files concatenated) files_changed: List[FileChanged] step_count: int max_steps: int noise_budget: int max_noise_budget: int = 5 issues_flagged: int = 0 done: bool = False class Reward(BaseModel): """ Typed reward signal returned at each step (OpenEnv spec). All values are normalized in the 0.0 – 1.0 range. """ value: float # 0.0 – 1.0 normalised score reason: str = "" # human-readable explanation is_terminal: bool = False # True on the final step class ResetResult(BaseModel): task_id: TaskId seed: int scenario_hash: str observation: Observation class StepResult(BaseModel): observation: Observation reward: float reward_info: Reward # typed Reward model (OpenEnv spec) done: bool info: dict = {} class EpisodeResult(BaseModel): episode_id: str = "" task_id: TaskId scenario_hash: str seed: int final_score: float steps_taken: int issues_found: int issues_total: int noise_penalties: int history: List[ActionRecord] = [] terminated_reason: str = "" # "terminal_action"|"max_steps"|"noise_exhausted"