File size: 13,503 Bytes
38ab410
666b49a
 
 
38ab410
703aa57
 
38ab410
 
6ff231e
ca4e18e
b8396ef
ca4e18e
703aa57
 
46680d3
703aa57
 
7eb0325
703aa57
 
 
 
7eb0325
703aa57
 
 
 
7eb0325
703aa57
 
46680d3
 
703aa57
 
 
38ab410
703aa57
38ab410
46680d3
 
38ab410
6174aa3
b8396ef
 
703aa57
 
 
 
 
 
 
 
 
 
 
 
a6bc27b
 
 
 
703aa57
a6bc27b
703aa57
6ff231e
38ab410
46680d3
 
 
 
 
703aa57
 
 
46680d3
 
 
 
 
 
703aa57
 
46680d3
 
 
 
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8396ef
 
 
 
 
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
a6bc27b
46680d3
b8396ef
6ff231e
703aa57
6ff231e
703aa57
6ff231e
703aa57
 
 
 
 
 
 
 
 
 
 
38ab410
 
 
703aa57
ca4e18e
703aa57
 
ca4e18e
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca4e18e
 
703aa57
 
 
 
 
 
 
 
 
38ab410
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8396ef
38ab410
703aa57
b8396ef
38ab410
703aa57
 
38ab410
 
 
 
ca4e18e
 
 
703aa57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# server/environment.py
import sys
sys.path.insert(0, "/app")
sys.path.insert(0, "/app/server")
import uuid
import time
from typing import Dict, Optional, Tuple
from openenv.core.env_server.interfaces import Environment
from model import TriageAction, TriageObservation, TriageState, BugReport
from task import grade_action, sample_bug

VALID_TASKS = ["easy", "medium", "hard"]

MAX_STEPS_PER_TASK = {"easy": 4, "medium": 5, "hard": 6}

TASKS_META = [
    {"id": "easy", "name": "Priority Assignment",
     "grader": "server.task:priority_match",
     "difficulty": "easy", "reward_range": [0.0, 1.0],
     "description": "Investigate a bug report and assign a P0-P3 priority. "
                    "Use investigation actions to gather info before submitting."},
    {"id": "medium", "name": "Priority Labels and Team",
     "grader": "server.task:priority_label_team",
     "difficulty": "medium", "reward_range": [0.0, 1.0],
     "description": "Investigate and assign priority, labels, and team routing. "
                    "More investigation steps available."},
    {"id": "hard", "name": "Full Triage",
     "grader": "server.task:full_triage",
     "difficulty": "hard", "reward_range": [0.0, 1.0],
     "description": "Full triage with priority, labels, team, milestone, "
                    "and security escalation penalty. Investigation is critical."},
]

INVESTIGATION_ACTIONS = {"read_body", "read_comments", "check_logs", "check_similar"}


class BugTriageEnvironment(Environment):
    """Multi-step bug triage environment with progressive information reveal."""

    SUPPORTS_CONCURRENT_SESSIONS = True

    def __init__(self):
        super().__init__()
        self._current_task_key: str = "easy"
        self._episode_done: bool = False
        self._current_bug: Optional[BugReport] = None
        self._current_answer: Optional[dict] = None
        self._step_count: int = 0
        self._max_steps: int = 4
        self._actions_taken: list = []

        # Progressive visibility
        self._body_visible: bool = False
        self._comments_visible: bool = False
        self._logs_visible: bool = False
        self._similar_visible: bool = False

        self._state = TriageState(
            episode_id=str(uuid.uuid4()),
            current_task="easy",
            step_count=0,
            total_score=0.0,
            tasks_completed=[],
            actions_taken=[],
        )

    def get_metadata(self):
        try:
            from openenv.core.env_server.types import EnvironmentMetadata
            return EnvironmentMetadata(
                name="bug-triage-env",
                description="Multi-step bug triage RL environment with progressive "
                            "information reveal and 3 difficulty levels",
                version="2.0.0",
                author="Siteshcodes",
                tasks=TASKS_META,
            )
        except Exception:
            return {
                "name": "bug-triage-env",
                "description": "Multi-step bug triage RL environment",
                "version": "2.0.0",
                "author": "Siteshcodes",
                "tasks": TASKS_META,
            }

    def _build_observation(self, score=0.0, feedback="", done=False,
                           reward=0.0) -> TriageObservation:
        """Build observation with current visibility state."""
        bug = self._current_bug

        # Create a visibility-filtered view of the bug
        visible_bug = BugReport(
            id=bug.id,
            title=bug.title,
            body=bug.body if self._body_visible else bug.body[:120] + "..." if len(bug.body) > 120 else bug.body,
            author=bug.author,
            labels_hint=bug.labels_hint,
            comments=bug.comments if self._comments_visible else [],
            severity_signals=bug.severity_signals if self._logs_visible else [],
            related_bugs=bug.related_bugs if self._similar_visible else [],
            stack_trace=bug.stack_trace if self._logs_visible else "",
            affected_component=bug.affected_component if self._logs_visible else "",
        )

        return TriageObservation(
            bug_report=visible_bug,
            task_id=self._current_task_key,
            score=round(score, 3),
            feedback=feedback,
            done=done,
            reward=round(reward, 3),
            body_visible=self._body_visible,
            comments_visible=self._comments_visible,
            logs_visible=self._logs_visible,
            similar_visible=self._similar_visible,
            steps_taken=self._step_count,
            max_steps=self._max_steps,
        )

    def reset(self, task_id: str = "easy", seed: int = None,
              episode_id: str = None, **kwargs) -> TriageObservation:
        """Start a fresh episode for the given task."""
        if task_id not in VALID_TASKS:
            task_id = "easy"

        self._current_task_key = task_id
        self._episode_done = False
        self._step_count = 0
        self._max_steps = MAX_STEPS_PER_TASK.get(task_id, 4)
        self._actions_taken = []

        # Reset visibility β€” title + truncated body are always visible
        self._body_visible = False
        self._comments_visible = False
        self._logs_visible = False
        self._similar_visible = False

        # Sample a bug and its answer
        self._current_bug, self._current_answer = sample_bug(task_id, seed=seed)

        self._state = TriageState(
            episode_id=episode_id or str(uuid.uuid4()),
            current_task=task_id,
            step_count=0,
            total_score=0.0,
            tasks_completed=[],
            actions_taken=[],
        )

        feedback = (
            f"Episode started for task: {task_id}. "
            f"You see the bug title and a preview. "
            f"Use investigation actions (read_body, read_comments, check_logs, check_similar) "
            f"to reveal more information, then submit your triage. "
            f"You have {self._max_steps} steps max."
        )

        return self._build_observation(
            score=0.0, feedback=feedback, done=False, reward=0.0,
        )

    def step(self, action: TriageAction) -> TriageObservation:
        """Process agent's action β€” either investigate or submit final triage."""
        if self._episode_done:
            return self._build_observation(
                score=0.0,
                feedback="Episode already complete. Call reset() to start a new episode.",
                done=True, reward=0.0,
            )

        self._step_count += 1
        self._state.step_count = self._step_count
        action_type = getattr(action, "action_type", "submit")
        self._actions_taken.append(action_type)
        self._state.actions_taken = list(self._actions_taken)

        # Check if max steps reached β€” force submission
        if self._step_count >= self._max_steps and action_type != "submit":
            action_type = "submit"

        # --- Investigation actions ---
        if action_type in INVESTIGATION_ACTIONS:
            feedback = self._handle_investigation(action_type)
            return self._build_observation(
                score=0.0, feedback=feedback, done=False, reward=0.0,
            )

        # --- Submit action ---
        return self._handle_submission(action)

    def _handle_investigation(self, action_type: str) -> str:
        """Reveal information based on the investigation action."""
        if action_type == "read_body":
            if self._body_visible:
                return "Full body already revealed. Choose another action or submit."
            self._body_visible = True
            return (
                f"Full bug description revealed. "
                f"Steps used: {self._step_count}/{self._max_steps}."
            )

        elif action_type == "read_comments":
            if self._comments_visible:
                return "Comments already revealed. Choose another action or submit."
            self._comments_visible = True
            n = len(self._current_bug.comments)
            return (
                f"Revealed {n} comment(s). "
                f"Steps used: {self._step_count}/{self._max_steps}."
            )

        elif action_type == "check_logs":
            if self._logs_visible:
                return "Logs already revealed. Choose another action or submit."
            self._logs_visible = True
            has_trace = bool(self._current_bug.stack_trace)
            return (
                f"System logs revealed. {'Stack trace available.' if has_trace else 'No stack trace.'} "
                f"Steps used: {self._step_count}/{self._max_steps}."
            )

        elif action_type == "check_similar":
            if self._similar_visible:
                return "Similar bugs already revealed. Choose another action or submit."
            self._similar_visible = True
            n = len(self._current_bug.related_bugs)
            return (
                f"Found {n} related bug(s). "
                f"Steps used: {self._step_count}/{self._max_steps}."
            )

        return f"Unknown investigation action: {action_type}"

    def _handle_submission(self, action: TriageAction) -> TriageObservation:
        """Grade the agent's final triage submission."""
        score, feedback = grade_action(
            self._current_task_key, self._current_bug, action,
            answer=self._current_answer,
        )

        # Apply time efficiency bonus/penalty
        # Fewer steps = better (if the answer is good)
        investigation_steps = self._step_count - 1  # subtract the submit step
        if investigation_steps == 0 and score >= 0.7:
            # Got it right without investigating β€” impressive!
            efficiency_bonus = 0.05
            feedback += " | ⚑ Efficiency bonus: +0.05 (correct with minimal investigation)"
        elif investigation_steps >= 3 and score >= 0.7:
            # Took many steps but got it right β€” slight penalty for slowness
            efficiency_penalty = 0.02 * (investigation_steps - 2)
            score = score - efficiency_penalty
            feedback += f" | ⏱ Time penalty: -{efficiency_penalty:.2f} ({investigation_steps} investigation steps)"
        elif investigation_steps == 0 and score < 0.5:
            # Rushed and got it wrong β€” penalty
            feedback += " | ⚠ Consider investigating before submitting next time"

        if investigation_steps == 0 and score >= 0.7:
            score += 0.05

        score = max(0.01, min(0.99, score))

        self._state.total_score += score
        self._state.tasks_completed.append(self._current_task_key)
        self._episode_done = True

        return self._build_observation(
            score=score, feedback=feedback, done=True, reward=score,
        )

    @property
    def state(self) -> TriageState:
        return self._state

    def get_state(self) -> TriageState:
        return self._state


# ---------------------------------------------------------------------------
#  SESSION MANAGER β€” handles concurrent sessions safely
# ---------------------------------------------------------------------------

class SessionManager:
    """Thread-safe session management for multiple concurrent agents."""

    def __init__(self, max_sessions: int = 1000, ttl_seconds: int = 600):
        self._sessions: Dict[str, BugTriageEnvironment] = {}
        self._timestamps: Dict[str, float] = {}
        self._max_sessions = max_sessions
        self._ttl = ttl_seconds

    def create_session(self) -> Tuple[str, BugTriageEnvironment]:
        """Create a new session and return (session_id, env)."""
        self._cleanup_expired()
        session_id = str(uuid.uuid4())
        env = BugTriageEnvironment()
        self._sessions[session_id] = env
        self._timestamps[session_id] = time.time()
        # Enforce max after adding
        while len(self._sessions) > self._max_sessions:
            oldest = min(self._timestamps, key=self._timestamps.get)
            if oldest == session_id:
                break
            self._sessions.pop(oldest, None)
            self._timestamps.pop(oldest, None)
        return session_id, env

    def get_session(self, session_id: str) -> Optional[BugTriageEnvironment]:
        """Get an existing session's environment, or None if expired/missing."""
        if session_id not in self._sessions:
            return None
        # Refresh TTL on access
        self._timestamps[session_id] = time.time()
        return self._sessions[session_id]

    def remove_session(self, session_id: str) -> None:
        """Remove a session after episode completes."""
        self._sessions.pop(session_id, None)
        self._timestamps.pop(session_id, None)

    def _cleanup_expired(self) -> None:
        """Remove sessions that exceeded TTL."""
        now = time.time()
        expired = [
            sid for sid, ts in self._timestamps.items()
            if now - ts > self._ttl
        ]
        for sid in expired:
            self._sessions.pop(sid, None)
            self._timestamps.pop(sid, None)

        # Also enforce max sessions (remove oldest)
        while len(self._sessions) > self._max_sessions:
            oldest = min(self._timestamps, key=self._timestamps.get)
            self._sessions.pop(oldest, None)
            self._timestamps.pop(oldest, None)

    @property
    def active_count(self) -> int:
        return len(self._sessions)