File size: 4,146 Bytes
aab0192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
server/app.py -- FastAPI application for the Hypothesis Lab Environment.

Uses openenv's create_app() to produce the standard HTTP + WebSocket server
with /reset, /step, /state, /health, /schema, and /ws endpoints.

Additional endpoints for hackathon submission:
  /tasks    -- list all tasks and action schema
  /grader   -- score an episode result for a given task
  /baseline -- trigger baseline inference and return scores
"""

import traceback
from typing import Any, Dict, Optional

from fastapi import Body

try:
    from openenv.core.env_server.http_server import create_app
except ImportError:
    raise ImportError(
        "openenv-core is required. Install with: pip install openenv-core"
    )

try:
    from ..models import HypLabAction, HypLabObservation
    from .hypothesis_lab_environment import HypothesisLabEnvironment
except ImportError:
    from models import HypLabAction, HypLabObservation
    from server.hypothesis_lab_environment import HypothesisLabEnvironment


app = create_app(
    HypothesisLabEnvironment,
    HypLabAction,
    HypLabObservation,
    env_name="hypothesis_lab",
    max_concurrent_envs=200,
)


# ---------------------------------------------------------------------------
# /tasks -- list available tasks and the action schema
# ---------------------------------------------------------------------------
@app.get("/tasks", tags=["Hackathon"])
def list_tasks() -> Dict[str, Any]:
    try:
        from tasks import ALL_TASKS
    except ImportError:
        from tasks import ALL_TASKS  # noqa: F811

    action_schema = HypLabAction.model_json_schema()

    return {
        "tasks": [
            {
                "id": t["id"],
                "name": t["name"],
                "description": t["description"],
                "difficulty": t["difficulty"],
                "reset_kwargs": t["reset_kwargs"],
            }
            for t in ALL_TASKS
        ],
        "action_schema": action_schema,
    }


# ---------------------------------------------------------------------------
# /grader -- score an episode result for a specific task
# ---------------------------------------------------------------------------
@app.post("/grader", tags=["Hackathon"])
def grade_episode(
    body: Dict[str, Any] = Body(
        ...,
        examples=[{
            "task_id": "easy",
            "episode_result": {
                "accuracy_score": 0.7,
                "precision_bonus": 0.1,
                "calibration_score": 0.15,
                "efficiency_bonus": 0.1,
                "contradiction_penalty": 0.0,
            },
        }],
    ),
) -> Dict[str, Any]:
    from tasks.task_easy import grade_easy
    from tasks.task_medium import grade_medium
    from tasks.task_hard import grade_hard

    graders = {"easy": grade_easy, "medium": grade_medium, "hard": grade_hard}

    task_id = body.get("task_id", "")
    episode_result = body.get("episode_result", {})

    if task_id not in graders:
        return {"error": f"Unknown task_id '{task_id}'. Choose from: {list(graders.keys())}"}

    score = graders[task_id](episode_result)
    return {"task_id": task_id, "score": score}


# ---------------------------------------------------------------------------
# /baseline -- run the baseline agent on all tasks and return scores
# ---------------------------------------------------------------------------
@app.post("/baseline", tags=["Hackathon"])
def run_baseline(
    body: Optional[Dict[str, Any]] = Body(default=None),
) -> Dict[str, Any]:
    try:
        from baseline_inference import run_all_tasks
    except ImportError:
        return {"error": "baseline_inference module not found or missing dependencies (openai)."}
    except Exception as e:
        return {"error": f"Failed to import baseline: {e}"}

    try:
        results = run_all_tasks()
        return {"status": "ok", "results": results}
    except Exception as e:
        return {"error": str(e), "traceback": traceback.format_exc()}


def main(host: str = "0.0.0.0", port: int = 8000):
    import uvicorn
    uvicorn.run(app, host=host, port=port)


if __name__ == "__main__":
    main()