beacon-env / server /app.py
sakthivarshans's picture
Initial BEACON environment
5a37ff6
"""
app.py β€” FastAPI server for the BEACON reinforcement learning environment.
Exposes the BEACON environment as a REST API so that agents, dashboards,
and evaluation pipelines can interact with it over HTTP.
Endpoints:
POST /reset β€” initialise / reset the environment
POST /step β€” submit an action and advance one period
GET /state β€” inspect the full current environment state
GET /tasks β€” list all available evaluation tasks
POST /grader β€” run a specific grader and get a score
GET /baseline β€” run all graders and return all scores
GET /health β€” liveness check
Usage:
python app.py
# or
uvicorn beacon_env.app:app --reload
"""
import os
import sys
# ---------------------------------------------------------------------------
# Ensure parent directory (d:/meta) is on the Python path so that
# environment.py, models.py, and graders.py can be imported as top-level
# modules from this subdirectory.
# ---------------------------------------------------------------------------
_PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PARENT_DIR not in sys.path:
sys.path.insert(0, _PARENT_DIR)
# ---------------------------------------------------------------------------
# BEACON imports (resolved via sys.path above)
# ---------------------------------------------------------------------------
from environment import BEACONEnvironment # noqa: E402
from models import Action # noqa: E402
from graders import ( # noqa: E402
grade_task1,
grade_task2,
grade_task3,
run_all_graders,
)
# ---------------------------------------------------------------------------
# FastAPI imports
# ---------------------------------------------------------------------------
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# App setup
# ---------------------------------------------------------------------------
app = FastAPI(
title="BEACON Environment API",
description=(
"REST API for the BEACON dual-scale budget management "
"reinforcement learning environment."
),
version="1.0.0",
)
# Allow all origins so browser-based agents and dashboards can connect freely
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ---------------------------------------------------------------------------
# Global environment instance
# Starts as None; created / replaced on the first POST /reset call.
# A default instance is also created at startup so GET endpoints work
# immediately without requiring a prior reset.
# ---------------------------------------------------------------------------
_env: BEACONEnvironment = BEACONEnvironment(mode="household", seed=42)
def _require_env() -> BEACONEnvironment:
"""Return the global environment, raising 503 if it is uninitialised."""
if _env is None:
raise HTTPException(
status_code=503,
detail="Environment not initialised. Call POST /reset first.",
)
return _env
# ---------------------------------------------------------------------------
# Request / response schemas
# ---------------------------------------------------------------------------
class ResetRequest(BaseModel):
"""Request body for POST /reset."""
mode: str = Field(default="household", description="'household' or 'corporate'")
seed: int = Field(default=42, description="Random seed for reproducibility")
total_periods: int = Field(default=6, description="Number of budget periods per episode")
class GraderRequest(BaseModel):
"""Request body for POST /grader."""
task_id: str = Field(description="One of: 'task1', 'task2', 'task3'")
# ---------------------------------------------------------------------------
# Task catalogue (static metadata)
# ---------------------------------------------------------------------------
ACTION_SCHEMA = {
"allocations": "dict[str, float]",
"savings_contribution": "float",
}
TASK_CATALOGUE = [
{
"task_id": "task1",
"name": "Bill Coverage",
"difficulty": "easy",
"description": "Allocate income to cover all essential bills in a single period.",
"mode": "household",
"periods": 1,
"seed": 42,
"action_schema": ACTION_SCHEMA,
},
{
"task_id": "task2",
"name": "Shock Absorption",
"difficulty": "medium",
"description": (
"Maintain essential spending while absorbing unexpected "
"financial shocks across 3 periods."
),
"mode": "household",
"periods": 3,
"seed": 99,
"action_schema": ACTION_SCHEMA,
},
{
"task_id": "task3",
"name": "6-Month Goal Planning",
"difficulty": "hard",
"description": (
"Manage a corporate budget over 6 periods, covering all "
"essential categories while reaching the savings goal."
),
"mode": "corporate",
"periods": 6,
"seed": 7,
"action_schema": ACTION_SCHEMA,
},
]
# Map task_id β†’ grader function for quick lookup
_GRADER_MAP = {
"task1": grade_task1,
"task2": grade_task2,
"task3": grade_task3,
}
# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------
@app.get("/health", summary="Liveness check")
def health():
"""
Returns a simple status object confirming the service is running.
"""
return {"status": "ok", "environment": "BEACON"}
@app.post("/reset", summary="Initialise or reset the environment")
def reset(body: ResetRequest = ResetRequest()):
"""
Create a fresh BEACONEnvironment with the given parameters and call
reset(). Returns the initial Observation as JSON.
- **mode**: `"household"` or `"corporate"` (default: `"household"`)
- **seed**: random seed for reproducibility (default: `42`)
- **total_periods**: episode length (default: `6`)
"""
global _env
try:
_env = BEACONEnvironment(
mode=body.mode,
total_periods=body.total_periods,
seed=body.seed,
)
obs = _env.reset()
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc))
return obs.model_dump()
@app.post("/step", summary="Submit an action and advance one period")
def step(action: Action):
"""
Apply the agent's Action to the current environment and advance by one
budget period.
Returns the resulting Observation, Reward, done flag, and info dict.
- **allocations**: `{category: amount, ...}` β€” must cover all categories
- **savings_contribution**: amount added to savings this period
"""
env = _require_env()
obs, reward, done, info = env.step(action)
return {
"observation": obs.model_dump(),
"reward": reward.model_dump(),
"done": done,
"info": info,
}
@app.get("/state", summary="Inspect the current environment state")
def state():
"""
Return the full internal state of the current environment as a plain
dictionary. Does not advance the episode.
"""
env = _require_env()
return env.state()
@app.get("/tasks", summary="List all available evaluation tasks")
def tasks():
"""
Return metadata for all three BEACON evaluation tasks, including their
difficulty, mode, episode length, and expected action schema.
"""
return TASK_CATALOGUE
@app.post("/grader", summary="Run a specific grader and return its score")
def grader(body: GraderRequest):
"""
Execute the grader for the requested task and return the normalised
score in [0.0, 1.0].
- **task_id**: one of `"task1"`, `"task2"`, `"task3"`
"""
grader_fn = _GRADER_MAP.get(body.task_id)
if grader_fn is None:
raise HTTPException(
status_code=404,
detail=f"Unknown task_id '{body.task_id}'. "
f"Valid options: {list(_GRADER_MAP.keys())}",
)
score = grader_fn()
return {"task_id": body.task_id, "score": score}
@app.get("/baseline", summary="Run all graders and return all scores")
def baseline():
"""
Execute all three BEACON graders sequentially and return their scores.
This endpoint is deterministic β€” scores are identical on every call.
"""
scores = run_all_graders()
return scores
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)