""" Bug Report Structuring Environment - FastAPI Server Exposes the environment via HTTP endpoints: POST /reset → start new episode POST /step → submit structured bug report GET /state → get episode metadata GET /health → health check GET / → landing page """ import os import uuid from contextlib import asynccontextmanager from typing import Dict from fastapi import FastAPI, HTTPException from fastapi.responses import HTMLResponse, JSONResponse from models import ( ResetRequest, StepRequest, BugReportAction, BugReportObservation, BugReportState, ) from environment import BugReportEnvironment from tasks import get_all_task_ids # ─── Session Management ────────────────────────────────────────── # Each session gets its own environment instance _sessions: Dict[str, BugReportEnvironment] = {} _default_session_id = "default" def get_or_create_env(session_id: str = None) -> BugReportEnvironment: """Get or create an environment for a session.""" sid = session_id or _default_session_id if sid not in _sessions: _sessions[sid] = BugReportEnvironment() return _sessions[sid] # ─── FastAPI App ────────────────────────────────────────────────── @asynccontextmanager async def lifespan(app: FastAPI): """Startup and shutdown events.""" print("🚀 Bug Report Structuring Environment starting up...") print(f"📋 Available tasks: {get_all_task_ids()}") yield print("👋 Shutting down...") _sessions.clear() app = FastAPI( title="Bug Report Structuring Environment", description=( "An OpenEnv environment that challenges LLM agents to convert " "messy, unstructured bug reports into well-organized structured formats. " "Supports 3 difficulty levels: easy, medium, hard." ), version="1.0.0", lifespan=lifespan, ) # ─── Endpoints ──────────────────────────────────────────────────── @app.get("/", response_class=HTMLResponse) async def landing_page(): """Landing page with environment info.""" return """ Bug Report Structuring Environment

🐛 Bug Report Structuring Environment

An OpenEnv environment that challenges LLM agents to convert messy, unstructured bug reports into well-organized structured formats.

📋 Tasks

Easy — Single clear bug, all info present but unstructured

Medium — Multiple symptoms, some ambiguity, partial info

Hard — Multiple distinct bugs, technical details, compound report

🔌 API Endpoints

POST /reset

Start a new episode. Body: {"task_id": "easy|medium|hard"}

POST /step

Submit a structured bug report. Returns score and feedback.

GET /state

Get current episode metadata.

GET /health

Health check endpoint.

📖 Docs

Interactive API docs: /docs

📊 Scoring

Reports are graded on 7 dimensions (0.0–1.0 each):

Title (15%) — Clear, descriptive title
Steps to Reproduce (25%) — Complete reproduction steps
Expected Behavior (15%) — What should happen
Actual Behavior (15%) — What actually happens
Severity (15%) — Correct classification
Environment (10%) — Platform/version info
Format (5%) — Structural completeness

""" @app.get("/health") async def health_check(): """Health check — returns 200 OK if the service is running.""" return { "status": "healthy", "environment": "bug_report_structuring", "version": "1.0.0", "tasks": get_all_task_ids(), } @app.post("/reset", response_model=BugReportObservation) async def reset_endpoint(request: ResetRequest = None): """ Start a new episode. Resets the environment with the specified task (or random). Returns the messy bug report as the initial observation. """ if request is None: request = ResetRequest() try: env = get_or_create_env(request.episode_id) observation = env.reset( task_id=request.task_id, seed=request.seed, episode_id=request.episode_id, ) return observation except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"Reset failed: {str(e)}") @app.post("/step", response_model=BugReportObservation) async def step_endpoint(request: StepRequest): """ Submit a structured bug report and receive grading. The agent sends a structured version of the messy bug report. The environment returns a score (0.0-1.0) with detailed feedback. """ try: env = get_or_create_env() observation = env.step(request.action) return observation except Exception as e: raise HTTPException(status_code=500, detail=f"Step failed: {str(e)}") @app.get("/state", response_model=BugReportState) async def state_endpoint(): """ Get current episode state metadata. Returns episode_id, step_count, task_id, scores, and done status. """ try: env = get_or_create_env() return env.state except Exception as e: raise HTTPException(status_code=500, detail=f"State retrieval failed: {str(e)}") # ─── Run directly ───────────────────────────────────────────────── if __name__ == "__main__": import uvicorn port = int(os.environ.get("PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port)