Spaces:
Sleeping
Sleeping
File size: 4,146 Bytes
aab0192 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | """
server/app.py -- FastAPI application for the Hypothesis Lab Environment.
Uses openenv's create_app() to produce the standard HTTP + WebSocket server
with /reset, /step, /state, /health, /schema, and /ws endpoints.
Additional endpoints for hackathon submission:
/tasks -- list all tasks and action schema
/grader -- score an episode result for a given task
/baseline -- trigger baseline inference and return scores
"""
import traceback
from typing import Any, Dict, Optional
from fastapi import Body
try:
from openenv.core.env_server.http_server import create_app
except ImportError:
raise ImportError(
"openenv-core is required. Install with: pip install openenv-core"
)
try:
from ..models import HypLabAction, HypLabObservation
from .hypothesis_lab_environment import HypothesisLabEnvironment
except ImportError:
from models import HypLabAction, HypLabObservation
from server.hypothesis_lab_environment import HypothesisLabEnvironment
app = create_app(
HypothesisLabEnvironment,
HypLabAction,
HypLabObservation,
env_name="hypothesis_lab",
max_concurrent_envs=200,
)
# ---------------------------------------------------------------------------
# /tasks -- list available tasks and the action schema
# ---------------------------------------------------------------------------
@app.get("/tasks", tags=["Hackathon"])
def list_tasks() -> Dict[str, Any]:
try:
from tasks import ALL_TASKS
except ImportError:
from tasks import ALL_TASKS # noqa: F811
action_schema = HypLabAction.model_json_schema()
return {
"tasks": [
{
"id": t["id"],
"name": t["name"],
"description": t["description"],
"difficulty": t["difficulty"],
"reset_kwargs": t["reset_kwargs"],
}
for t in ALL_TASKS
],
"action_schema": action_schema,
}
# ---------------------------------------------------------------------------
# /grader -- score an episode result for a specific task
# ---------------------------------------------------------------------------
@app.post("/grader", tags=["Hackathon"])
def grade_episode(
body: Dict[str, Any] = Body(
...,
examples=[{
"task_id": "easy",
"episode_result": {
"accuracy_score": 0.7,
"precision_bonus": 0.1,
"calibration_score": 0.15,
"efficiency_bonus": 0.1,
"contradiction_penalty": 0.0,
},
}],
),
) -> Dict[str, Any]:
from tasks.task_easy import grade_easy
from tasks.task_medium import grade_medium
from tasks.task_hard import grade_hard
graders = {"easy": grade_easy, "medium": grade_medium, "hard": grade_hard}
task_id = body.get("task_id", "")
episode_result = body.get("episode_result", {})
if task_id not in graders:
return {"error": f"Unknown task_id '{task_id}'. Choose from: {list(graders.keys())}"}
score = graders[task_id](episode_result)
return {"task_id": task_id, "score": score}
# ---------------------------------------------------------------------------
# /baseline -- run the baseline agent on all tasks and return scores
# ---------------------------------------------------------------------------
@app.post("/baseline", tags=["Hackathon"])
def run_baseline(
body: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
try:
from baseline_inference import run_all_tasks
except ImportError:
return {"error": "baseline_inference module not found or missing dependencies (openai)."}
except Exception as e:
return {"error": f"Failed to import baseline: {e}"}
try:
results = run_all_tasks()
return {"status": "ok", "results": results}
except Exception as e:
return {"error": str(e), "traceback": traceback.format_exc()}
def main(host: str = "0.0.0.0", port: int = 8000):
import uvicorn
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
main()
|