Spaces:
Sleeping
Sleeping
| """ | |
| server/app.py -- FastAPI application for the Hypothesis Lab Environment. | |
| Uses openenv's create_app() to produce the standard HTTP + WebSocket server | |
| with /reset, /step, /state, /health, /schema, and /ws endpoints. | |
| Additional endpoints for hackathon submission: | |
| /tasks -- list all tasks and action schema | |
| /grader -- score an episode result for a given task | |
| /baseline -- trigger baseline inference and return scores | |
| """ | |
| import traceback | |
| from typing import Any, Dict, Optional | |
| from fastapi import Body | |
| try: | |
| from openenv.core.env_server.http_server import create_app | |
| except ImportError: | |
| raise ImportError( | |
| "openenv-core is required. Install with: pip install openenv-core" | |
| ) | |
| try: | |
| from ..models import HypLabAction, HypLabObservation | |
| from .hypothesis_lab_environment import HypothesisLabEnvironment | |
| except ImportError: | |
| from models import HypLabAction, HypLabObservation | |
| from server.hypothesis_lab_environment import HypothesisLabEnvironment | |
| app = create_app( | |
| HypothesisLabEnvironment, | |
| HypLabAction, | |
| HypLabObservation, | |
| env_name="hypothesis_lab", | |
| max_concurrent_envs=200, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # /tasks -- list available tasks and the action schema | |
| # --------------------------------------------------------------------------- | |
| def list_tasks() -> Dict[str, Any]: | |
| try: | |
| from tasks import ALL_TASKS | |
| except ImportError: | |
| from tasks import ALL_TASKS # noqa: F811 | |
| action_schema = HypLabAction.model_json_schema() | |
| return { | |
| "tasks": [ | |
| { | |
| "id": t["id"], | |
| "name": t["name"], | |
| "description": t["description"], | |
| "difficulty": t["difficulty"], | |
| "reset_kwargs": t["reset_kwargs"], | |
| } | |
| for t in ALL_TASKS | |
| ], | |
| "action_schema": action_schema, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # /grader -- score an episode result for a specific task | |
| # --------------------------------------------------------------------------- | |
| def grade_episode( | |
| body: Dict[str, Any] = Body( | |
| ..., | |
| examples=[{ | |
| "task_id": "easy", | |
| "episode_result": { | |
| "accuracy_score": 0.7, | |
| "precision_bonus": 0.1, | |
| "calibration_score": 0.15, | |
| "efficiency_bonus": 0.1, | |
| "contradiction_penalty": 0.0, | |
| }, | |
| }], | |
| ), | |
| ) -> Dict[str, Any]: | |
| from tasks.task_easy import grade_easy | |
| from tasks.task_medium import grade_medium | |
| from tasks.task_hard import grade_hard | |
| graders = {"easy": grade_easy, "medium": grade_medium, "hard": grade_hard} | |
| task_id = body.get("task_id", "") | |
| episode_result = body.get("episode_result", {}) | |
| if task_id not in graders: | |
| return {"error": f"Unknown task_id '{task_id}'. Choose from: {list(graders.keys())}"} | |
| score = graders[task_id](episode_result) | |
| return {"task_id": task_id, "score": score} | |
| # --------------------------------------------------------------------------- | |
| # /baseline -- run the baseline agent on all tasks and return scores | |
| # --------------------------------------------------------------------------- | |
| def run_baseline( | |
| body: Optional[Dict[str, Any]] = Body(default=None), | |
| ) -> Dict[str, Any]: | |
| try: | |
| from baseline_inference import run_all_tasks | |
| except ImportError: | |
| return {"error": "baseline_inference module not found or missing dependencies (openai)."} | |
| except Exception as e: | |
| return {"error": f"Failed to import baseline: {e}"} | |
| try: | |
| results = run_all_tasks() | |
| return {"status": "ok", "results": results} | |
| except Exception as e: | |
| return {"error": str(e), "traceback": traceback.format_exc()} | |
| def main(host: str = "0.0.0.0", port: int = 8000): | |
| import uvicorn | |
| uvicorn.run(app, host=host, port=port) | |
| if __name__ == "__main__": | |
| main() | |