Spaces:
Runtime error
Runtime error
File size: 5,765 Bytes
12d85aa c80e50a 12d85aa c80e50a 12d85aa ad2823c 12d85aa ad2823c 12d85aa c80e50a 12d85aa c80e50a 12d85aa 104c835 12d85aa c80e50a 12d85aa c80e50a 12d85aa 104c835 ad2823c c80e50a 12d85aa ad2823c c80e50a ad2823c c80e50a ad2823c c80e50a 12d85aa c80e50a 104c835 12d85aa c80e50a 12d85aa c80e50a 12d85aa c80e50a 12d85aa fa66cd4 ad2823c 12d85aa c80e50a 12d85aa ad2823c c80e50a ad2823c 104c835 c80e50a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | """
Data models for the PRobe Environment.
An agent reviews Python source files, identifies bugs, security issues,
and design problems, then submits a structured review.
"""
from __future__ import annotations
from enum import Enum
from typing import Any
from openenv.core.env_server.types import Action, Observation
from pydantic import BaseModel, ConfigDict, Field
class ActionType(str, Enum):
"""All actions the agent may take during a review episode."""
ADD_COMMENT = "add_comment"
GET_CONTEXT = "get_context" # probe a line for deeper causal context
RUN_SCANNER = "run_scanner" # invoke external static-analysis tool
REQUEST_CHANGES = "request_changes"
APPROVE = "approve"
SUBMIT_REVIEW = "submit_review"
ESCALATE_TO_SECURITY_REVIEW = "escalate_to_security_review" # adversarial tasks
class Severity(str, Enum):
"""Severity levels for review comments."""
INFO = "info"
WARNING = "warning"
ERROR = "error"
CRITICAL = "critical"
class IssueCategory(str, Enum):
"""Issue category taxonomy used in review comments."""
BUG = "bug"
SECURITY = "security"
PERFORMANCE = "performance"
STYLE = "style"
DESIGN = "design"
class IssueClassification(str, Enum):
"""Whether an issue is an honest mistake or a deliberate backdoor."""
ACCIDENTAL_BUG = "accidental_bug"
INTENTIONAL_BACKDOOR = "intentional_backdoor"
class RewardType(BaseModel):
"""
Structured reward returned by ``step()``.
Attributes:
total: Final clamped score in ``[-1.0, 1.0]``.
components: Named sub-scores before clamping (may sum outside ``[-1, 1]``).
passed: ``True`` when the action produced a clear positive signal.
explanation: Human-readable breakdown for logging / debugging.
step: Environment step at which this reward was issued.
terminal: ``True`` only on the ``SUBMIT_REVIEW`` step.
"""
model_config = ConfigDict(frozen=True)
total: float = Field(..., ge=-1.0, le=1.0)
components: dict[str, float] = Field(default_factory=dict)
passed: bool = Field(default=False)
explanation: str = Field(default="")
step: int = Field(default=0, ge=0)
terminal: bool = Field(default=False)
class ProbeAction(Action):
"""
An action the agent submits during a review episode.
Action types:
ADD_COMMENT β annotate a specific line with a review comment.
GET_CONTEXT β reveal Β±5 lines of context around a line number.
RUN_SCANNER β invoke a simulated static-analysis tool; returns
noisy findings (partial recall, possible FPs) that
the agent must verify before commenting.
REQUEST_CHANGES β mark the PR as requiring changes before merge.
APPROVE β approve the PR (penalised if issues remain).
SUBMIT_REVIEW β finalise and submit the review (ends the episode).
"""
action_type: ActionType = Field(..., description="Type of review action")
line_number: int | None = Field(
default=None,
ge=1,
description="1-based source line being commented on or probed",
)
comment: str | None = Field(default=None, description="Review comment text")
severity: Severity | None = Field(default=None, description="Issue severity level")
category: IssueCategory | None = Field(default=None, description="Issue category")
classification: IssueClassification | None = Field(
default=None,
description="Whether this issue is an accidental_bug or intentional_backdoor",
)
class ProbeObservation(Observation):
"""
The observation returned to the agent after every ``reset()`` / ``step()``.
The ``reward`` field mirrors ``RewardType.total`` for the most recent step
as a convenience; the authoritative reward object is returned by ``step()``.
"""
code_snippet: str = Field(default="", description="Python source code to review (mutated each episode)")
task_description: str = Field(default="", description="Review instructions and goals")
file_name: str = Field(default="", description="Name of the file being reviewed")
task_id: int = Field(default=0, ge=0, description="Current task index (0β9)")
task_difficulty: str = Field(default="ultra-easy", description="Task difficulty label")
review_history: list[dict[str, Any]] = Field(
default_factory=list,
description="Ordered list of all actions taken so far this episode",
)
step_count: int = Field(default=0, ge=0, description="Steps taken in current episode")
max_steps: int = Field(default=6, ge=1, description="Step budget for this task")
issues_found_count: int = Field(default=0, ge=0, description="Distinct issues identified so far")
total_issues: int = Field(default=0, ge=0, description="Total ground-truth issues in this task")
context_hints: list[str] = Field(
default_factory=list,
description="Causal context unlocked by finding key issues β read these before continuing",
)
done: bool = Field(default=False, description="Whether the episode has ended")
reward: float = Field(
default=0.0,
ge=-1.0,
le=1.0,
description="Most recent step reward (mirrors RewardType.total)",
)
metadata: dict[str, Any] = Field(default_factory=dict, description="Extra episode metadata")
adversarial_hint: str = Field(
default="",
description="Contributor context hint for adversarial tasks (partial observability)",
)
__all__ = [
"ActionType",
"IssueCategory",
"ProbeAction",
"ProbeObservation",
"RewardType",
"Severity",
]
|