RAHUL-13 commited on
Commit
1fc49ff
Β·
verified Β·
1 Parent(s): 2f5775d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +220 -0
app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bug Report Structuring Environment - FastAPI Server
3
+
4
+ Exposes the environment via HTTP endpoints:
5
+ POST /reset β†’ start new episode
6
+ POST /step β†’ submit structured bug report
7
+ GET /state β†’ get episode metadata
8
+ GET /health β†’ health check
9
+ GET / β†’ landing page
10
+ """
11
+
12
+ import os
13
+ import uuid
14
+ from contextlib import asynccontextmanager
15
+ from typing import Dict
16
+
17
+ from fastapi import FastAPI, HTTPException
18
+ from fastapi.responses import HTMLResponse, JSONResponse
19
+
20
+ from models import (
21
+ ResetRequest,
22
+ StepRequest,
23
+ BugReportAction,
24
+ BugReportObservation,
25
+ BugReportState,
26
+ )
27
+ from environment import BugReportEnvironment
28
+ from tasks import get_all_task_ids
29
+
30
+
31
+ # ─── Session Management ──────────────────────────────────────────
32
+ # Each session gets its own environment instance
33
+ _sessions: Dict[str, BugReportEnvironment] = {}
34
+ _default_session_id = "default"
35
+
36
+
37
+ def get_or_create_env(session_id: str = None) -> BugReportEnvironment:
38
+ """Get or create an environment for a session."""
39
+ sid = session_id or _default_session_id
40
+ if sid not in _sessions:
41
+ _sessions[sid] = BugReportEnvironment()
42
+ return _sessions[sid]
43
+
44
+
45
+ # ─── FastAPI App ──────────────────────────────────────────────────
46
+
47
+ @asynccontextmanager
48
+ async def lifespan(app: FastAPI):
49
+ """Startup and shutdown events."""
50
+ print("πŸš€ Bug Report Structuring Environment starting up...")
51
+ print(f"πŸ“‹ Available tasks: {get_all_task_ids()}")
52
+ yield
53
+ print("πŸ‘‹ Shutting down...")
54
+ _sessions.clear()
55
+
56
+
57
+ app = FastAPI(
58
+ title="Bug Report Structuring Environment",
59
+ description=(
60
+ "An OpenEnv environment that challenges LLM agents to convert "
61
+ "messy, unstructured bug reports into well-organized structured formats. "
62
+ "Supports 3 difficulty levels: easy, medium, hard."
63
+ ),
64
+ version="1.0.0",
65
+ lifespan=lifespan,
66
+ )
67
+
68
+
69
+ # ─── Endpoints ────────────────────────────────────────────────────
70
+
71
+ @app.get("/", response_class=HTMLResponse)
72
+ async def landing_page():
73
+ """Landing page with environment info."""
74
+ return """
75
+ <!DOCTYPE html>
76
+ <html>
77
+ <head>
78
+ <title>Bug Report Structuring Environment</title>
79
+ <style>
80
+ body {
81
+ font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
82
+ max-width: 800px; margin: 50px auto; padding: 20px;
83
+ background: #0f0f23; color: #e0e0e0;
84
+ }
85
+ h1 { color: #00d4aa; font-size: 2em; }
86
+ h2 { color: #ffd700; margin-top: 30px; }
87
+ .endpoint { background: #1a1a3e; padding: 15px; border-radius: 8px;
88
+ margin: 10px 0; border-left: 4px solid #00d4aa; }
89
+ code { background: #2a2a4e; padding: 2px 8px; border-radius: 4px;
90
+ color: #00d4aa; font-size: 0.95em; }
91
+ .badge { display: inline-block; padding: 3px 10px; border-radius: 4px;
92
+ font-size: 0.8em; font-weight: 600; margin-right: 8px; }
93
+ .get { background: #1e4620; color: #4ade80; }
94
+ .post { background: #1e3a5f; color: #60a5fa; }
95
+ a { color: #00d4aa; }
96
+ .task { background: #1a1a3e; padding: 10px 15px; border-radius: 6px;
97
+ margin: 5px 0; }
98
+ .easy { border-left: 4px solid #4ade80; }
99
+ .medium { border-left: 4px solid #fbbf24; }
100
+ .hard { border-left: 4px solid #ef4444; }
101
+ </style>
102
+ </head>
103
+ <body>
104
+ <h1>πŸ› Bug Report Structuring Environment</h1>
105
+ <p>An OpenEnv environment that challenges LLM agents to convert messy,
106
+ unstructured bug reports into well-organized structured formats.</p>
107
+
108
+ <h2>πŸ“‹ Tasks</h2>
109
+ <div class="task easy"><strong>Easy</strong> β€” Single clear bug, all info present but unstructured</div>
110
+ <div class="task medium"><strong>Medium</strong> β€” Multiple symptoms, some ambiguity, partial info</div>
111
+ <div class="task hard"><strong>Hard</strong> β€” Multiple distinct bugs, technical details, compound report</div>
112
+
113
+ <h2>πŸ”Œ API Endpoints</h2>
114
+ <div class="endpoint">
115
+ <span class="badge post">POST</span> <code>/reset</code>
116
+ <p>Start a new episode. Body: <code>{"task_id": "easy|medium|hard"}</code></p>
117
+ </div>
118
+ <div class="endpoint">
119
+ <span class="badge post">POST</span> <code>/step</code>
120
+ <p>Submit a structured bug report. Returns score and feedback.</p>
121
+ </div>
122
+ <div class="endpoint">
123
+ <span class="badge get">GET</span> <code>/state</code>
124
+ <p>Get current episode metadata.</p>
125
+ </div>
126
+ <div class="endpoint">
127
+ <span class="badge get">GET</span> <code>/health</code>
128
+ <p>Health check endpoint.</p>
129
+ </div>
130
+
131
+ <h2>πŸ“– Docs</h2>
132
+ <p>Interactive API docs: <a href="/docs">/docs</a></p>
133
+
134
+ <h2>πŸ“Š Scoring</h2>
135
+ <p>Reports are graded on 7 dimensions (0.0–1.0 each):</p>
136
+ <ul>
137
+ <li><strong>Title</strong> (15%) β€” Clear, descriptive title</li>
138
+ <li><strong>Steps to Reproduce</strong> (25%) β€” Complete reproduction steps</li>
139
+ <li><strong>Expected Behavior</strong> (15%) β€” What should happen</li>
140
+ <li><strong>Actual Behavior</strong> (15%) β€” What actually happens</li>
141
+ <li><strong>Severity</strong> (15%) β€” Correct classification</li>
142
+ <li><strong>Environment</strong> (10%) β€” Platform/version info</li>
143
+ <li><strong>Format</strong> (5%) β€” Structural completeness</li>
144
+ </ul>
145
+ </body>
146
+ </html>
147
+ """
148
+
149
+
150
+ @app.get("/health")
151
+ async def health_check():
152
+ """Health check β€” returns 200 OK if the service is running."""
153
+ return {
154
+ "status": "healthy",
155
+ "environment": "bug_report_structuring",
156
+ "version": "1.0.0",
157
+ "tasks": get_all_task_ids(),
158
+ }
159
+
160
+
161
+ @app.post("/reset", response_model=BugReportObservation)
162
+ async def reset_endpoint(request: ResetRequest = None):
163
+ """
164
+ Start a new episode.
165
+
166
+ Resets the environment with the specified task (or random).
167
+ Returns the messy bug report as the initial observation.
168
+ """
169
+ if request is None:
170
+ request = ResetRequest()
171
+
172
+ try:
173
+ env = get_or_create_env(request.episode_id)
174
+ observation = env.reset(
175
+ task_id=request.task_id,
176
+ seed=request.seed,
177
+ episode_id=request.episode_id,
178
+ )
179
+ return observation
180
+ except ValueError as e:
181
+ raise HTTPException(status_code=400, detail=str(e))
182
+ except Exception as e:
183
+ raise HTTPException(status_code=500, detail=f"Reset failed: {str(e)}")
184
+
185
+
186
+ @app.post("/step", response_model=BugReportObservation)
187
+ async def step_endpoint(request: StepRequest):
188
+ """
189
+ Submit a structured bug report and receive grading.
190
+
191
+ The agent sends a structured version of the messy bug report.
192
+ The environment returns a score (0.0-1.0) with detailed feedback.
193
+ """
194
+ try:
195
+ env = get_or_create_env()
196
+ observation = env.step(request.action)
197
+ return observation
198
+ except Exception as e:
199
+ raise HTTPException(status_code=500, detail=f"Step failed: {str(e)}")
200
+
201
+
202
+ @app.get("/state", response_model=BugReportState)
203
+ async def state_endpoint():
204
+ """
205
+ Get current episode state metadata.
206
+
207
+ Returns episode_id, step_count, task_id, scores, and done status.
208
+ """
209
+ try:
210
+ env = get_or_create_env()
211
+ return env.state
212
+ except Exception as e:
213
+ raise HTTPException(status_code=500, detail=f"State retrieval failed: {str(e)}")
214
+
215
+
216
+ # ─── Run directly ─────────────────────────────────────────────────
217
+ if __name__ == "__main__":
218
+ import uvicorn
219
+ port = int(os.environ.get("PORT", 7860))
220
+ uvicorn.run(app, host="0.0.0.0", port=port)