File size: 5,765 Bytes
12d85aa
c80e50a
12d85aa
 
 
 
 
c80e50a
 
12d85aa
ad2823c
12d85aa
 
ad2823c
12d85aa
 
 
c80e50a
 
12d85aa
c80e50a
 
12d85aa
 
 
104c835
12d85aa
 
 
c80e50a
 
12d85aa
 
 
 
 
 
 
c80e50a
 
12d85aa
 
 
 
 
 
 
104c835
 
 
 
 
 
 
ad2823c
 
c80e50a
 
 
 
 
 
 
 
 
12d85aa
 
ad2823c
 
 
 
c80e50a
 
 
 
ad2823c
 
c80e50a
ad2823c
c80e50a
 
 
 
 
 
 
 
 
 
 
12d85aa
 
 
c80e50a
 
 
 
 
 
 
 
104c835
 
 
 
12d85aa
 
c80e50a
12d85aa
c80e50a
 
 
 
12d85aa
 
c80e50a
12d85aa
 
fa66cd4
ad2823c
 
12d85aa
c80e50a
 
 
 
 
 
 
 
 
12d85aa
ad2823c
c80e50a
 
 
 
 
 
ad2823c
104c835
 
 
 
c80e50a
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Data models for the PRobe Environment.

An agent reviews Python source files, identifies bugs, security issues,
and design problems, then submits a structured review.
"""

from __future__ import annotations

from enum import Enum
from typing import Any

from openenv.core.env_server.types import Action, Observation
from pydantic import BaseModel, ConfigDict, Field


class ActionType(str, Enum):
    """All actions the agent may take during a review episode."""

    ADD_COMMENT = "add_comment"
    GET_CONTEXT = "get_context"       # probe a line for deeper causal context
    RUN_SCANNER = "run_scanner"       # invoke external static-analysis tool
    REQUEST_CHANGES = "request_changes"
    APPROVE = "approve"
    SUBMIT_REVIEW = "submit_review"
    ESCALATE_TO_SECURITY_REVIEW = "escalate_to_security_review"  # adversarial tasks


class Severity(str, Enum):
    """Severity levels for review comments."""

    INFO = "info"
    WARNING = "warning"
    ERROR = "error"
    CRITICAL = "critical"


class IssueCategory(str, Enum):
    """Issue category taxonomy used in review comments."""

    BUG = "bug"
    SECURITY = "security"
    PERFORMANCE = "performance"
    STYLE = "style"
    DESIGN = "design"


class IssueClassification(str, Enum):
    """Whether an issue is an honest mistake or a deliberate backdoor."""

    ACCIDENTAL_BUG = "accidental_bug"
    INTENTIONAL_BACKDOOR = "intentional_backdoor"


class RewardType(BaseModel):
    """
    Structured reward returned by ``step()``.

    Attributes:
        total:       Final clamped score in ``[-1.0, 1.0]``.
        components:  Named sub-scores before clamping (may sum outside ``[-1, 1]``).
        passed:      ``True`` when the action produced a clear positive signal.
        explanation: Human-readable breakdown for logging / debugging.
        step:        Environment step at which this reward was issued.
        terminal:    ``True`` only on the ``SUBMIT_REVIEW`` step.
    """

    model_config = ConfigDict(frozen=True)

    total: float = Field(..., ge=-1.0, le=1.0)
    components: dict[str, float] = Field(default_factory=dict)
    passed: bool = Field(default=False)
    explanation: str = Field(default="")
    step: int = Field(default=0, ge=0)
    terminal: bool = Field(default=False)


class ProbeAction(Action):
    """
    An action the agent submits during a review episode.

    Action types:
        ADD_COMMENT     β€” annotate a specific line with a review comment.
        GET_CONTEXT     β€” reveal Β±5 lines of context around a line number.
        RUN_SCANNER     β€” invoke a simulated static-analysis tool; returns
                          noisy findings (partial recall, possible FPs) that
                          the agent must verify before commenting.
        REQUEST_CHANGES β€” mark the PR as requiring changes before merge.
        APPROVE         β€” approve the PR (penalised if issues remain).
        SUBMIT_REVIEW   β€” finalise and submit the review (ends the episode).
    """

    action_type: ActionType = Field(..., description="Type of review action")
    line_number: int | None = Field(
        default=None,
        ge=1,
        description="1-based source line being commented on or probed",
    )
    comment: str | None = Field(default=None, description="Review comment text")
    severity: Severity | None = Field(default=None, description="Issue severity level")
    category: IssueCategory | None = Field(default=None, description="Issue category")
    classification: IssueClassification | None = Field(
        default=None,
        description="Whether this issue is an accidental_bug or intentional_backdoor",
    )


class ProbeObservation(Observation):
    """
    The observation returned to the agent after every ``reset()`` / ``step()``.

    The ``reward`` field mirrors ``RewardType.total`` for the most recent step
    as a convenience; the authoritative reward object is returned by ``step()``.
    """

    code_snippet: str = Field(default="", description="Python source code to review (mutated each episode)")
    task_description: str = Field(default="", description="Review instructions and goals")
    file_name: str = Field(default="", description="Name of the file being reviewed")
    task_id: int = Field(default=0, ge=0, description="Current task index (0–9)")
    task_difficulty: str = Field(default="ultra-easy", description="Task difficulty label")
    review_history: list[dict[str, Any]] = Field(
        default_factory=list,
        description="Ordered list of all actions taken so far this episode",
    )
    step_count: int = Field(default=0, ge=0, description="Steps taken in current episode")
    max_steps: int = Field(default=6, ge=1, description="Step budget for this task")
    issues_found_count: int = Field(default=0, ge=0, description="Distinct issues identified so far")
    total_issues: int = Field(default=0, ge=0, description="Total ground-truth issues in this task")
    context_hints: list[str] = Field(
        default_factory=list,
        description="Causal context unlocked by finding key issues β€” read these before continuing",
    )
    done: bool = Field(default=False, description="Whether the episode has ended")
    reward: float = Field(
        default=0.0,
        ge=-1.0,
        le=1.0,
        description="Most recent step reward (mirrors RewardType.total)",
    )
    metadata: dict[str, Any] = Field(default_factory=dict, description="Extra episode metadata")
    adversarial_hint: str = Field(
        default="",
        description="Contributor context hint for adversarial tasks (partial observability)",
    )


__all__ = [
    "ActionType",
    "IssueCategory",
    "ProbeAction",
    "ProbeObservation",
    "RewardType",
    "Severity",
]