RAHUL-13 commited on
Commit
3888b41
Β·
verified Β·
1 Parent(s): 11fd923

Upload tasks.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. tasks.py +247 -0
tasks.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bug Report Structuring Environment - Task Definitions
3
+
4
+ Defines 3 tasks (easy, medium, hard) with:
5
+ - Raw messy bug reports (input to the agent)
6
+ - Ground truth structured versions (for grading)
7
+ - Keyword lists per field (for deterministic scoring)
8
+ """
9
+
10
+ SEVERITY_LEVELS = ["low", "medium", "high", "critical"]
11
+
12
+ SEVERITY_ADJACENCY = {
13
+ "low": {"low": 1.0, "medium": 0.5, "high": 0.0, "critical": 0.0},
14
+ "medium": {"low": 0.5, "medium": 1.0, "high": 0.5, "critical": 0.0},
15
+ "high": {"low": 0.0, "medium": 0.5, "high": 1.0, "critical": 0.5},
16
+ "critical": {"low": 0.0, "medium": 0.0, "high": 0.5, "critical": 1.0},
17
+ }
18
+
19
+ TASKS = {
20
+ # ─────────────────────────────────────────────────────────────
21
+ # EASY: Single clear bug, all info present but messy formatting
22
+ # ─────────────────────────────────────────────────────────────
23
+ "easy": {
24
+ "task_id": "easy",
25
+ "max_steps": 3,
26
+ "raw_report": (
27
+ "hey so the login button doesnt work. im on chrome on windows 10 "
28
+ "(version 120.0.6099.130). when i click the login button after entering "
29
+ "my username and password the whole page just freezes. cant click anything, "
30
+ "nothing responds at all. i have to force close the tab and reopen the browser. "
31
+ "i expected it to log me in and redirect to the dashboard like it used to. "
32
+ "this has been happening since the last update on monday. my friend on firefox "
33
+ "says its fine for him so it might be a chrome-only issue. this is pretty bad since "
34
+ "nobody on chrome can login right now. my email is user@test.com if you need to "
35
+ "test with my account."
36
+ ),
37
+ "ground_truth": {
38
+ "title": "Login button causes page freeze on Chrome",
39
+ "steps_to_reproduce": (
40
+ "1. Open the login page in Chrome\n"
41
+ "2. Enter username and password\n"
42
+ "3. Click the login button\n"
43
+ "4. Observe page becomes unresponsive"
44
+ ),
45
+ "expected_behavior": (
46
+ "User should be logged in successfully and redirected to the dashboard"
47
+ ),
48
+ "actual_behavior": (
49
+ "Page freezes completely after clicking login button, "
50
+ "requiring force close of the browser tab"
51
+ ),
52
+ "severity": "high",
53
+ "environment": "Windows 10, Chrome 120.0.6099.130",
54
+ },
55
+ "keywords": {
56
+ "title": ["login", "button", "freeze", "chrome"],
57
+ "steps_to_reproduce": [
58
+ "login", "page", "click", "button", "username", "password",
59
+ "enter", "freeze"
60
+ ],
61
+ "expected_behavior": [
62
+ "log in", "redirect", "dashboard", "success"
63
+ ],
64
+ "actual_behavior": [
65
+ "freeze", "unresponsive", "close", "not working", "stuck"
66
+ ],
67
+ "severity": "high",
68
+ "environment": ["windows", "chrome", "120"],
69
+ },
70
+ },
71
+
72
+ # ─────────────────────────────────────────────────────────────
73
+ # MEDIUM: Multiple symptoms, some ambiguity, partial info
74
+ # ─────────────────────────────────────────────────────────────
75
+ "medium": {
76
+ "task_id": "medium",
77
+ "max_steps": 4,
78
+ "raw_report": (
79
+ "Search is acting weird. Sometimes when I search for products the results are "
80
+ "from like 2 days ago?? Not showing the newest items we added yesterday. Also "
81
+ "the price filter seems broken - I set max price to $50 but still get items "
82
+ "showing $75, $100 etc in the results. Oh and one more thing the search "
83
+ "suggestions dropdown sometimes shows raw HTML tags like <b>keyword</b> instead "
84
+ "of just the keyword text.\n\n"
85
+ "I'm on a MacBook Pro, using Safari 17.2. Not sure about the exact macOS version "
86
+ "but it's the latest one I think (Sonoma maybe?). My colleague on Chrome doesn't "
87
+ "see the stale results issue but does see the filter problem so that seems "
88
+ "cross-browser.\n\n"
89
+ "This is medium priority I guess? But the stale results thing could be really bad "
90
+ "if customers see old prices that we've already updated. The HTML tags thing looks "
91
+ "unprofessional too. We first noticed this after the search index migration last week."
92
+ ),
93
+ "ground_truth": {
94
+ "title": "Search returns stale results, price filter ignores max value, and HTML tags visible in suggestions",
95
+ "steps_to_reproduce": (
96
+ "1. Search for any product in the search bar\n"
97
+ "2. Observe results showing items from 2+ days ago instead of current data\n"
98
+ "3. Set price filter maximum to $50\n"
99
+ "4. Observe results still showing items priced above $50\n"
100
+ "5. Type in search bar and observe suggestion dropdown\n"
101
+ "6. Notice raw HTML tags (<b></b>) appearing in suggestions"
102
+ ),
103
+ "expected_behavior": (
104
+ "Search should return current/up-to-date results, price filter should "
105
+ "correctly exclude items above the set maximum, and search suggestions "
106
+ "should display clean text without HTML markup"
107
+ ),
108
+ "actual_behavior": (
109
+ "Three issues: (1) Search results are stale/outdated by ~2 days, "
110
+ "(2) Price filter max value is ignored showing items above the limit, "
111
+ "(3) Search suggestion dropdown shows raw HTML tags instead of formatted text"
112
+ ),
113
+ "severity": "medium",
114
+ "environment": "macOS Sonoma, Safari 17.2, MacBook Pro",
115
+ },
116
+ "keywords": {
117
+ "title": [
118
+ "search", "stale", "filter", "price", "html", "result"
119
+ ],
120
+ "steps_to_reproduce": [
121
+ "search", "product", "result", "filter", "price", "$50",
122
+ "suggestion", "html", "tag", "dropdown"
123
+ ],
124
+ "expected_behavior": [
125
+ "current", "up-to-date", "filter", "maximum",
126
+ "clean", "text", "result"
127
+ ],
128
+ "actual_behavior": [
129
+ "stale", "outdated", "old", "filter", "ignored", "html",
130
+ "tag", "price", "above"
131
+ ],
132
+ "severity": "medium",
133
+ "environment": ["mac", "safari", "17"],
134
+ },
135
+ },
136
+
137
+ # ─────────────────────────────────────────────────────────────
138
+ # HARD: Multiple distinct bugs, technical details, compound report
139
+ # ─────────────────────────────────────────────────────────────
140
+ "hard": {
141
+ "task_id": "hard",
142
+ "max_steps": 5,
143
+ "raw_report": (
144
+ "OK so there's multiple issues with the analytics dashboard that I need to report.\n\n"
145
+ "1) The real-time chart stopped updating after we deployed v2.3.1 last Thursday "
146
+ "(March 15th). It used to refresh every 5 seconds showing live user counts but now "
147
+ "it's completely frozen showing data from March 15th 10:00 AM. I checked the browser "
148
+ "DevTools and the WebSocket connection is throwing 1006 errors (abnormal closure). "
149
+ "This affects all users not just me. Before v2.3.1 (on v2.3.0) it worked perfectly.\n\n"
150
+ "2) Export to CSV is generating corrupted number formats. For example a value that "
151
+ "should be 1,234.56 shows up as 1234.56000000001 in the exported file. This is "
152
+ "clearly a floating point precision issue. It happens with all numeric columns but "
153
+ "only in the CSV export - the on-screen values look fine. We use this for our "
154
+ "quarterly finance report so accuracy is critical.\n\n"
155
+ "3) CRITICAL BUG: When switching between the 'Daily' and 'Monthly' view tabs WHILE "
156
+ "data is still loading (you can see the spinner), the entire application crashes to "
157
+ "a white screen. Console shows: \"Uncaught TypeError: Cannot read properties of "
158
+ "undefined (reading 'map')\" at DashboardView.jsx line 142. This is reproducible "
159
+ "100%% of the time. If you wait for data to finish loading before switching tabs "
160
+ "it works fine.\n\n"
161
+ "Environment: React 18.2.0, Node.js 18.17.0, deployed on AWS ECS (Fargate), "
162
+ "Chrome 122.0.6261.94 on Ubuntu 22.04. The dashboard is accessed at "
163
+ "https://analytics.internal.company.com\n\n"
164
+ "I'd rate this as critical overall since issue #3 causes complete app failure "
165
+ "and issue #2 is blocking our finance team. Issue #1 is high priority since "
166
+ "we need real-time monitoring for ops.\n\n"
167
+ "Contact: devops-team@company.com, slack channel: #dashboard-issues"
168
+ ),
169
+ "ground_truth": {
170
+ "title": (
171
+ "Analytics Dashboard: Real-time chart frozen after v2.3.1, "
172
+ "CSV export floating point corruption, crash on view tab switching during load"
173
+ ),
174
+ "steps_to_reproduce": (
175
+ "Issue 1 - Chart Frozen:\n"
176
+ "1. Open analytics dashboard\n"
177
+ "2. Observe real-time chart is not updating (stuck at March 15th 10:00 AM data)\n"
178
+ "3. Check DevTools Network tab - WebSocket shows 1006 errors\n\n"
179
+ "Issue 2 - CSV Export Corruption:\n"
180
+ "1. Navigate to any data view with numeric values\n"
181
+ "2. Click Export to CSV\n"
182
+ "3. Open CSV file and check numeric columns\n"
183
+ "4. Observe floating point precision errors (e.g., 1234.56000000001)\n\n"
184
+ "Issue 3 - Crash on Tab Switch:\n"
185
+ "1. Open dashboard with Daily view\n"
186
+ "2. While data is still loading (spinner visible), click Monthly tab\n"
187
+ "3. Application crashes to white screen\n"
188
+ "4. Console shows TypeError: Cannot read properties of undefined (reading 'map')"
189
+ ),
190
+ "expected_behavior": (
191
+ "1. Real-time chart should refresh every 5 seconds with live data\n"
192
+ "2. CSV export should preserve exact numeric formatting (e.g., 1,234.56)\n"
193
+ "3. Switching between Daily/Monthly views should work smoothly even during loading"
194
+ ),
195
+ "actual_behavior": (
196
+ "1. Chart is frozen since v2.3.1 deployment, WebSocket connection fails with 1006\n"
197
+ "2. CSV numbers have floating point errors (1234.56000000001 instead of 1,234.56)\n"
198
+ "3. App crashes with white screen and TypeError when switching tabs during data load"
199
+ ),
200
+ "severity": "critical",
201
+ "environment": (
202
+ "React 18.2.0, Node.js 18.17.0, AWS ECS (Fargate), "
203
+ "Chrome 122.0.6261.94, Ubuntu 22.04"
204
+ ),
205
+ },
206
+ "keywords": {
207
+ "title": [
208
+ "dashboard", "chart", "csv", "export", "crash",
209
+ "frozen", "tab", "switch"
210
+ ],
211
+ "steps_to_reproduce": [
212
+ "chart", "update", "websocket", "1006", "csv", "export",
213
+ "numeric", "floating point", "precision", "daily", "monthly",
214
+ "switch", "tab", "loading", "spinner", "white screen",
215
+ "typeerror", "map", "v2.3.1"
216
+ ],
217
+ "expected_behavior": [
218
+ "refresh", "5 seconds", "live", "data", "csv", "accurate",
219
+ "numeric", "format", "switch", "smooth", "loading"
220
+ ],
221
+ "actual_behavior": [
222
+ "frozen", "stuck", "1006", "websocket", "floating point",
223
+ "precision", "1234.56000000001", "white screen", "crash",
224
+ "typeerror", "undefined", "map"
225
+ ],
226
+ "severity": "critical",
227
+ "environment": [
228
+ "react", "18.2", "node", "18.17", "aws", "ecs", "fargate",
229
+ "chrome", "122", "ubuntu", "22.04"
230
+ ],
231
+ },
232
+ },
233
+ }
234
+
235
+
236
+ def get_task(task_id: str) -> dict:
237
+ """Get a task definition by ID. Raises ValueError if invalid."""
238
+ if task_id not in TASKS:
239
+ raise ValueError(
240
+ f"Invalid task_id '{task_id}'. Must be one of: {list(TASKS.keys())}"
241
+ )
242
+ return TASKS[task_id]
243
+
244
+
245
+ def get_all_task_ids() -> list:
246
+ """Return all available task IDs."""
247
+ return list(TASKS.keys())