Spaces:
Sleeping
Sleeping
| """ | |
| app.py β FastAPI server for the BEACON reinforcement learning environment. | |
| Exposes the BEACON environment as a REST API so that agents, dashboards, | |
| and evaluation pipelines can interact with it over HTTP. | |
| Endpoints: | |
| POST /reset β initialise / reset the environment | |
| POST /step β submit an action and advance one period | |
| GET /state β inspect the full current environment state | |
| GET /tasks β list all available evaluation tasks | |
| POST /grader β run a specific grader and get a score | |
| GET /baseline β run all graders and return all scores | |
| GET /health β liveness check | |
| Usage: | |
| python app.py | |
| # or | |
| uvicorn beacon_env.app:app --reload | |
| """ | |
| import os | |
| import sys | |
| # --------------------------------------------------------------------------- | |
| # Ensure parent directory (d:/meta) is on the Python path so that | |
| # environment.py, models.py, and graders.py can be imported as top-level | |
| # modules from this subdirectory. | |
| # --------------------------------------------------------------------------- | |
| _PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| if _PARENT_DIR not in sys.path: | |
| sys.path.insert(0, _PARENT_DIR) | |
| # --------------------------------------------------------------------------- | |
| # BEACON imports (resolved via sys.path above) | |
| # --------------------------------------------------------------------------- | |
| from environment import BEACONEnvironment # noqa: E402 | |
| from models import Action # noqa: E402 | |
| from graders import ( # noqa: E402 | |
| grade_task1, | |
| grade_task2, | |
| grade_task3, | |
| run_all_graders, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # FastAPI imports | |
| # --------------------------------------------------------------------------- | |
| import uvicorn | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| # --------------------------------------------------------------------------- | |
| # App setup | |
| # --------------------------------------------------------------------------- | |
| app = FastAPI( | |
| title="BEACON Environment API", | |
| description=( | |
| "REST API for the BEACON dual-scale budget management " | |
| "reinforcement learning environment." | |
| ), | |
| version="1.0.0", | |
| ) | |
| # Allow all origins so browser-based agents and dashboards can connect freely | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Global environment instance | |
| # Starts as None; created / replaced on the first POST /reset call. | |
| # A default instance is also created at startup so GET endpoints work | |
| # immediately without requiring a prior reset. | |
| # --------------------------------------------------------------------------- | |
| _env: BEACONEnvironment = BEACONEnvironment(mode="household", seed=42) | |
| def _require_env() -> BEACONEnvironment: | |
| """Return the global environment, raising 503 if it is uninitialised.""" | |
| if _env is None: | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Environment not initialised. Call POST /reset first.", | |
| ) | |
| return _env | |
| # --------------------------------------------------------------------------- | |
| # Request / response schemas | |
| # --------------------------------------------------------------------------- | |
| class ResetRequest(BaseModel): | |
| """Request body for POST /reset.""" | |
| mode: str = Field(default="household", description="'household' or 'corporate'") | |
| seed: int = Field(default=42, description="Random seed for reproducibility") | |
| total_periods: int = Field(default=6, description="Number of budget periods per episode") | |
| class GraderRequest(BaseModel): | |
| """Request body for POST /grader.""" | |
| task_id: str = Field(description="One of: 'task1', 'task2', 'task3'") | |
| # --------------------------------------------------------------------------- | |
| # Task catalogue (static metadata) | |
| # --------------------------------------------------------------------------- | |
| ACTION_SCHEMA = { | |
| "allocations": "dict[str, float]", | |
| "savings_contribution": "float", | |
| } | |
| TASK_CATALOGUE = [ | |
| { | |
| "task_id": "task1", | |
| "name": "Bill Coverage", | |
| "difficulty": "easy", | |
| "description": "Allocate income to cover all essential bills in a single period.", | |
| "mode": "household", | |
| "periods": 1, | |
| "seed": 42, | |
| "action_schema": ACTION_SCHEMA, | |
| }, | |
| { | |
| "task_id": "task2", | |
| "name": "Shock Absorption", | |
| "difficulty": "medium", | |
| "description": ( | |
| "Maintain essential spending while absorbing unexpected " | |
| "financial shocks across 3 periods." | |
| ), | |
| "mode": "household", | |
| "periods": 3, | |
| "seed": 99, | |
| "action_schema": ACTION_SCHEMA, | |
| }, | |
| { | |
| "task_id": "task3", | |
| "name": "6-Month Goal Planning", | |
| "difficulty": "hard", | |
| "description": ( | |
| "Manage a corporate budget over 6 periods, covering all " | |
| "essential categories while reaching the savings goal." | |
| ), | |
| "mode": "corporate", | |
| "periods": 6, | |
| "seed": 7, | |
| "action_schema": ACTION_SCHEMA, | |
| }, | |
| ] | |
| # Map task_id β grader function for quick lookup | |
| _GRADER_MAP = { | |
| "task1": grade_task1, | |
| "task2": grade_task2, | |
| "task3": grade_task3, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Endpoints | |
| # --------------------------------------------------------------------------- | |
| def health(): | |
| """ | |
| Returns a simple status object confirming the service is running. | |
| """ | |
| return {"status": "ok", "environment": "BEACON"} | |
| def reset(body: ResetRequest = ResetRequest()): | |
| """ | |
| Create a fresh BEACONEnvironment with the given parameters and call | |
| reset(). Returns the initial Observation as JSON. | |
| - **mode**: `"household"` or `"corporate"` (default: `"household"`) | |
| - **seed**: random seed for reproducibility (default: `42`) | |
| - **total_periods**: episode length (default: `6`) | |
| """ | |
| global _env | |
| try: | |
| _env = BEACONEnvironment( | |
| mode=body.mode, | |
| total_periods=body.total_periods, | |
| seed=body.seed, | |
| ) | |
| obs = _env.reset() | |
| except ValueError as exc: | |
| raise HTTPException(status_code=400, detail=str(exc)) | |
| return obs.model_dump() | |
| def step(action: Action): | |
| """ | |
| Apply the agent's Action to the current environment and advance by one | |
| budget period. | |
| Returns the resulting Observation, Reward, done flag, and info dict. | |
| - **allocations**: `{category: amount, ...}` β must cover all categories | |
| - **savings_contribution**: amount added to savings this period | |
| """ | |
| env = _require_env() | |
| obs, reward, done, info = env.step(action) | |
| return { | |
| "observation": obs.model_dump(), | |
| "reward": reward.model_dump(), | |
| "done": done, | |
| "info": info, | |
| } | |
| def state(): | |
| """ | |
| Return the full internal state of the current environment as a plain | |
| dictionary. Does not advance the episode. | |
| """ | |
| env = _require_env() | |
| return env.state() | |
| def tasks(): | |
| """ | |
| Return metadata for all three BEACON evaluation tasks, including their | |
| difficulty, mode, episode length, and expected action schema. | |
| """ | |
| return TASK_CATALOGUE | |
| def grader(body: GraderRequest): | |
| """ | |
| Execute the grader for the requested task and return the normalised | |
| score in [0.0, 1.0]. | |
| - **task_id**: one of `"task1"`, `"task2"`, `"task3"` | |
| """ | |
| grader_fn = _GRADER_MAP.get(body.task_id) | |
| if grader_fn is None: | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"Unknown task_id '{body.task_id}'. " | |
| f"Valid options: {list(_GRADER_MAP.keys())}", | |
| ) | |
| score = grader_fn() | |
| return {"task_id": body.task_id, "score": score} | |
| def baseline(): | |
| """ | |
| Execute all three BEACON graders sequentially and return their scores. | |
| This endpoint is deterministic β scores are identical on every call. | |
| """ | |
| scores = run_all_graders() | |
| return scores | |
| # --------------------------------------------------------------------------- | |
| # Entry point | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |