Spaces:
Sleeping
Sleeping
| """ | |
| Bug Report Structuring Environment - FastAPI Server | |
| Exposes the environment via HTTP endpoints: | |
| POST /reset β start new episode | |
| POST /step β submit structured bug report | |
| GET /state β get episode metadata | |
| GET /health β health check | |
| GET / β landing page | |
| """ | |
| import os | |
| import uuid | |
| from contextlib import asynccontextmanager | |
| from typing import Dict | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from models import ( | |
| ResetRequest, | |
| StepRequest, | |
| BugReportAction, | |
| BugReportObservation, | |
| BugReportState, | |
| ) | |
| from environment import BugReportEnvironment | |
| from tasks import get_all_task_ids | |
| # βββ Session Management ββββββββββββββββββββββββββββββββββββββββββ | |
| # Each session gets its own environment instance | |
| _sessions: Dict[str, BugReportEnvironment] = {} | |
| _default_session_id = "default" | |
| def get_or_create_env(session_id: str = None) -> BugReportEnvironment: | |
| """Get or create an environment for a session.""" | |
| sid = session_id or _default_session_id | |
| if sid not in _sessions: | |
| _sessions[sid] = BugReportEnvironment() | |
| return _sessions[sid] | |
| # βββ FastAPI App ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def lifespan(app: FastAPI): | |
| """Startup and shutdown events.""" | |
| print("π Bug Report Structuring Environment starting up...") | |
| print(f"π Available tasks: {get_all_task_ids()}") | |
| yield | |
| print("π Shutting down...") | |
| _sessions.clear() | |
| app = FastAPI( | |
| title="Bug Report Structuring Environment", | |
| description=( | |
| "An OpenEnv environment that challenges LLM agents to convert " | |
| "messy, unstructured bug reports into well-organized structured formats. " | |
| "Supports 3 difficulty levels: easy, medium, hard." | |
| ), | |
| version="1.0.0", | |
| lifespan=lifespan, | |
| ) | |
| # βββ Endpoints ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def landing_page(): | |
| """Landing page with environment info.""" | |
| return """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Bug Report Structuring Environment</title> | |
| <style> | |
| body { | |
| font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; | |
| max-width: 800px; margin: 50px auto; padding: 20px; | |
| background: #0f0f23; color: #e0e0e0; | |
| } | |
| h1 { color: #00d4aa; font-size: 2em; } | |
| h2 { color: #ffd700; margin-top: 30px; } | |
| .endpoint { background: #1a1a3e; padding: 15px; border-radius: 8px; | |
| margin: 10px 0; border-left: 4px solid #00d4aa; } | |
| code { background: #2a2a4e; padding: 2px 8px; border-radius: 4px; | |
| color: #00d4aa; font-size: 0.95em; } | |
| .badge { display: inline-block; padding: 3px 10px; border-radius: 4px; | |
| font-size: 0.8em; font-weight: 600; margin-right: 8px; } | |
| .get { background: #1e4620; color: #4ade80; } | |
| .post { background: #1e3a5f; color: #60a5fa; } | |
| a { color: #00d4aa; } | |
| .task { background: #1a1a3e; padding: 10px 15px; border-radius: 6px; | |
| margin: 5px 0; } | |
| .easy { border-left: 4px solid #4ade80; } | |
| .medium { border-left: 4px solid #fbbf24; } | |
| .hard { border-left: 4px solid #ef4444; } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>π Bug Report Structuring Environment</h1> | |
| <p>An OpenEnv environment that challenges LLM agents to convert messy, | |
| unstructured bug reports into well-organized structured formats.</p> | |
| <h2>π Tasks</h2> | |
| <div class="task easy"><strong>Easy</strong> β Single clear bug, all info present but unstructured</div> | |
| <div class="task medium"><strong>Medium</strong> β Multiple symptoms, some ambiguity, partial info</div> | |
| <div class="task hard"><strong>Hard</strong> β Multiple distinct bugs, technical details, compound report</div> | |
| <h2>π API Endpoints</h2> | |
| <div class="endpoint"> | |
| <span class="badge post">POST</span> <code>/reset</code> | |
| <p>Start a new episode. Body: <code>{"task_id": "easy|medium|hard"}</code></p> | |
| </div> | |
| <div class="endpoint"> | |
| <span class="badge post">POST</span> <code>/step</code> | |
| <p>Submit a structured bug report. Returns score and feedback.</p> | |
| </div> | |
| <div class="endpoint"> | |
| <span class="badge get">GET</span> <code>/state</code> | |
| <p>Get current episode metadata.</p> | |
| </div> | |
| <div class="endpoint"> | |
| <span class="badge get">GET</span> <code>/health</code> | |
| <p>Health check endpoint.</p> | |
| </div> | |
| <h2>π Docs</h2> | |
| <p>Interactive API docs: <a href="/docs">/docs</a></p> | |
| <h2>π Scoring</h2> | |
| <p>Reports are graded on 7 dimensions (0.0β1.0 each):</p> | |
| <ul> | |
| <li><strong>Title</strong> (15%) β Clear, descriptive title</li> | |
| <li><strong>Steps to Reproduce</strong> (25%) β Complete reproduction steps</li> | |
| <li><strong>Expected Behavior</strong> (15%) β What should happen</li> | |
| <li><strong>Actual Behavior</strong> (15%) β What actually happens</li> | |
| <li><strong>Severity</strong> (15%) β Correct classification</li> | |
| <li><strong>Environment</strong> (10%) β Platform/version info</li> | |
| <li><strong>Format</strong> (5%) β Structural completeness</li> | |
| </ul> | |
| </body> | |
| </html> | |
| """ | |
| async def health_check(): | |
| """Health check β returns 200 OK if the service is running.""" | |
| return { | |
| "status": "healthy", | |
| "environment": "bug_report_structuring", | |
| "version": "1.0.0", | |
| "tasks": get_all_task_ids(), | |
| } | |
| async def reset_endpoint(request: ResetRequest = None): | |
| """ | |
| Start a new episode. | |
| Resets the environment with the specified task (or random). | |
| Returns the messy bug report as the initial observation. | |
| """ | |
| if request is None: | |
| request = ResetRequest() | |
| try: | |
| env = get_or_create_env(request.episode_id) | |
| observation = env.reset( | |
| task_id=request.task_id, | |
| seed=request.seed, | |
| episode_id=request.episode_id, | |
| ) | |
| return observation | |
| except ValueError as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Reset failed: {str(e)}") | |
| async def step_endpoint(request: StepRequest): | |
| """ | |
| Submit a structured bug report and receive grading. | |
| The agent sends a structured version of the messy bug report. | |
| The environment returns a score (0.0-1.0) with detailed feedback. | |
| """ | |
| try: | |
| env = get_or_create_env() | |
| observation = env.step(request.action) | |
| return observation | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Step failed: {str(e)}") | |
| async def state_endpoint(): | |
| """ | |
| Get current episode state metadata. | |
| Returns episode_id, step_count, task_id, scores, and done status. | |
| """ | |
| try: | |
| env = get_or_create_env() | |
| return env.state | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"State retrieval failed: {str(e)}") | |
| # βββ Run directly βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) | |