RAHUL-13 commited on
Commit
afedf8c
ยท
verified ยท
1 Parent(s): 36c2b7d

Upload environment.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. environment.py +199 -0
environment.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bug Report Structuring Environment - Core Environment Logic
3
+
4
+ Implements the OpenEnv 3-method interface:
5
+ - reset(task_id) โ†’ initial observation with messy bug report
6
+ - step(action) โ†’ graded observation with score + feedback
7
+ - state() โ†’ episode metadata
8
+
9
+ The environment challenges an LLM agent to convert messy, unstructured
10
+ bug reports into well-organized structured formats.
11
+ """
12
+
13
+ import uuid
14
+ import random
15
+ from typing import Optional
16
+
17
+ from models import (
18
+ BugReportAction,
19
+ BugReportObservation,
20
+ BugReportState,
21
+ )
22
+ from tasks import get_task, get_all_task_ids
23
+ from graders import grade_submission
24
+
25
+
26
+ class BugReportEnvironment:
27
+ """
28
+ OpenEnv-compatible environment for bug report structuring.
29
+
30
+ Supports concurrent sessions via per-instance state.
31
+ """
32
+
33
+ SUPPORTS_CONCURRENT_SESSIONS = True
34
+
35
+ def __init__(self):
36
+ self._state = BugReportState()
37
+ self._task = None
38
+ self._done = False
39
+ self._best_score = 0.0
40
+ self._rewards = []
41
+ self._raw_report = ""
42
+
43
+ def reset(
44
+ self,
45
+ task_id: Optional[str] = None,
46
+ seed: Optional[int] = None,
47
+ episode_id: Optional[str] = None,
48
+ ) -> BugReportObservation:
49
+ """
50
+ Initialize a new episode.
51
+
52
+ Args:
53
+ task_id: 'easy', 'medium', or 'hard'. Random if None.
54
+ seed: Random seed for reproducibility.
55
+ episode_id: Custom episode ID.
56
+
57
+ Returns:
58
+ Initial observation containing the messy bug report.
59
+ """
60
+ # Set seed for reproducibility
61
+ if seed is not None:
62
+ random.seed(seed)
63
+
64
+ # Select task
65
+ if task_id is None:
66
+ task_id = random.choice(get_all_task_ids())
67
+
68
+ self._task = get_task(task_id)
69
+ self._done = False
70
+ self._best_score = 0.0
71
+ self._rewards = []
72
+ self._raw_report = self._task["raw_report"]
73
+
74
+ # Initialize state
75
+ eid = episode_id or str(uuid.uuid4())
76
+ self._state = BugReportState(
77
+ episode_id=eid,
78
+ step_count=0,
79
+ task_id=task_id,
80
+ max_steps=self._task["max_steps"],
81
+ current_score=0.0,
82
+ best_score=0.0,
83
+ done=False,
84
+ rewards=[],
85
+ )
86
+
87
+ return BugReportObservation(
88
+ raw_report=self._raw_report,
89
+ feedback=(
90
+ f"New episode started. Task: {task_id} "
91
+ f"(max {self._task['max_steps']} steps).\n"
92
+ f"Read the messy bug report below and submit a structured version.\n\n"
93
+ f"Required fields: title, steps_to_reproduce, expected_behavior, "
94
+ f"actual_behavior, severity (low/medium/high/critical), environment"
95
+ ),
96
+ score=0.0,
97
+ field_scores={},
98
+ done=False,
99
+ reward=0.0,
100
+ step_count=0,
101
+ task_id=task_id,
102
+ max_steps=self._task["max_steps"],
103
+ )
104
+
105
+ def step(self, action: BugReportAction) -> BugReportObservation:
106
+ """
107
+ Process an agent's structured bug report submission.
108
+
109
+ Args:
110
+ action: BugReportAction with structured fields.
111
+
112
+ Returns:
113
+ Observation with score, feedback, and done status.
114
+ """
115
+ # Check if episode is active
116
+ if self._task is None:
117
+ return BugReportObservation(
118
+ raw_report="",
119
+ feedback="Error: No active episode. Call reset() first.",
120
+ score=0.0,
121
+ field_scores={},
122
+ done=True,
123
+ reward=0.0,
124
+ step_count=0,
125
+ task_id="",
126
+ max_steps=0,
127
+ )
128
+
129
+ if self._done:
130
+ return BugReportObservation(
131
+ raw_report=self._raw_report,
132
+ feedback=(
133
+ f"Episode already completed. Best score: {self._best_score:.2f}. "
134
+ f"Call reset() for a new episode."
135
+ ),
136
+ score=self._best_score,
137
+ field_scores={},
138
+ done=True,
139
+ reward=0.0,
140
+ step_count=self._state.step_count,
141
+ task_id=self._state.task_id,
142
+ max_steps=self._state.max_steps,
143
+ )
144
+
145
+ # Increment step
146
+ self._state.step_count += 1
147
+
148
+ # Grade the submission
149
+ action_dict = action.model_dump()
150
+ score, field_scores, feedback = grade_submission(action_dict, self._task)
151
+
152
+ # Track best score and rewards
153
+ self._best_score = max(self._best_score, score)
154
+
155
+ # Reward = improvement over previous best (reward shaping)
156
+ prev_best = self._state.best_score
157
+ reward = max(0.0, score - prev_best) # Only reward improvement
158
+ if self._state.step_count == 1:
159
+ reward = score # First step gets full score as reward
160
+
161
+ self._rewards.append(round(reward, 2))
162
+
163
+ # Check if done
164
+ at_max_steps = self._state.step_count >= self._state.max_steps
165
+ high_score = score >= 0.95 # Near-perfect score ends early
166
+ self._done = at_max_steps or high_score
167
+
168
+ # Update state
169
+ self._state.current_score = score
170
+ self._state.best_score = self._best_score
171
+ self._state.done = self._done
172
+ self._state.rewards = self._rewards.copy()
173
+
174
+ # Add done info to feedback
175
+ if self._done:
176
+ if high_score:
177
+ feedback += f"\n\n๐ŸŽ‰ Excellent! Score {score:.2f} achieved. Episode complete."
178
+ else:
179
+ feedback += (
180
+ f"\n\nMax steps reached. Best score: {self._best_score:.2f}. "
181
+ f"Episode complete."
182
+ )
183
+
184
+ return BugReportObservation(
185
+ raw_report=self._raw_report,
186
+ feedback=feedback,
187
+ score=score,
188
+ field_scores=field_scores,
189
+ done=self._done,
190
+ reward=round(reward, 2),
191
+ step_count=self._state.step_count,
192
+ task_id=self._state.task_id,
193
+ max_steps=self._state.max_steps,
194
+ )
195
+
196
+ @property
197
+ def state(self) -> BugReportState:
198
+ """Return current episode state."""
199
+ return self._state