""" server/app.py -- FastAPI application for the Hypothesis Lab Environment. Uses openenv's create_app() to produce the standard HTTP + WebSocket server with /reset, /step, /state, /health, /schema, and /ws endpoints. Additional endpoints for hackathon submission: /tasks -- list all tasks and action schema /grader -- score an episode result for a given task /baseline -- trigger baseline inference and return scores """ import traceback from typing import Any, Dict, Optional from fastapi import Body try: from openenv.core.env_server.http_server import create_app except ImportError: raise ImportError( "openenv-core is required. Install with: pip install openenv-core" ) try: from ..models import HypLabAction, HypLabObservation from .hypothesis_lab_environment import HypothesisLabEnvironment except ImportError: from models import HypLabAction, HypLabObservation from server.hypothesis_lab_environment import HypothesisLabEnvironment app = create_app( HypothesisLabEnvironment, HypLabAction, HypLabObservation, env_name="hypothesis_lab", max_concurrent_envs=200, ) # --------------------------------------------------------------------------- # /tasks -- list available tasks and the action schema # --------------------------------------------------------------------------- @app.get("/tasks", tags=["Hackathon"]) def list_tasks() -> Dict[str, Any]: try: from tasks import ALL_TASKS except ImportError: from tasks import ALL_TASKS # noqa: F811 action_schema = HypLabAction.model_json_schema() return { "tasks": [ { "id": t["id"], "name": t["name"], "description": t["description"], "difficulty": t["difficulty"], "reset_kwargs": t["reset_kwargs"], } for t in ALL_TASKS ], "action_schema": action_schema, } # --------------------------------------------------------------------------- # /grader -- score an episode result for a specific task # --------------------------------------------------------------------------- @app.post("/grader", tags=["Hackathon"]) def grade_episode( body: Dict[str, Any] = Body( ..., examples=[{ "task_id": "easy", "episode_result": { "accuracy_score": 0.7, "precision_bonus": 0.1, "calibration_score": 0.15, "efficiency_bonus": 0.1, "contradiction_penalty": 0.0, }, }], ), ) -> Dict[str, Any]: from tasks.task_easy import grade_easy from tasks.task_medium import grade_medium from tasks.task_hard import grade_hard graders = {"easy": grade_easy, "medium": grade_medium, "hard": grade_hard} task_id = body.get("task_id", "") episode_result = body.get("episode_result", {}) if task_id not in graders: return {"error": f"Unknown task_id '{task_id}'. Choose from: {list(graders.keys())}"} score = graders[task_id](episode_result) return {"task_id": task_id, "score": score} # --------------------------------------------------------------------------- # /baseline -- run the baseline agent on all tasks and return scores # --------------------------------------------------------------------------- @app.post("/baseline", tags=["Hackathon"]) def run_baseline( body: Optional[Dict[str, Any]] = Body(default=None), ) -> Dict[str, Any]: try: from baseline_inference import run_all_tasks except ImportError: return {"error": "baseline_inference module not found or missing dependencies (openai)."} except Exception as e: return {"error": f"Failed to import baseline: {e}"} try: results = run_all_tasks() return {"status": "ok", "results": results} except Exception as e: return {"error": str(e), "traceback": traceback.format_exc()} def main(host: str = "0.0.0.0", port: int = 8000): import uvicorn uvicorn.run(app, host=host, port=port) if __name__ == "__main__": main()