Spaces:

sakthivarshans
/

beacon-env

Sleeping

App Files Files Community

beacon-env / server /app.py

sakthivarshans

Initial BEACON environment

5a37ff6 11 days ago

raw

history blame contribute delete

9.21 kB

	"""
	app.py — FastAPI server for the BEACON reinforcement learning environment.

	Exposes the BEACON environment as a REST API so that agents, dashboards,
	and evaluation pipelines can interact with it over HTTP.

	Endpoints:
	POST /reset — initialise / reset the environment
	POST /step — submit an action and advance one period
	GET /state — inspect the full current environment state
	GET /tasks — list all available evaluation tasks
	POST /grader — run a specific grader and get a score
	GET /baseline — run all graders and return all scores
	GET /health — liveness check

	Usage:
	python app.py
	# or
	uvicorn beacon_env.app:app --reload
	"""

	import os
	import sys

	# ---------------------------------------------------------------------------
	# Ensure parent directory (d:/meta) is on the Python path so that
	# environment.py, models.py, and graders.py can be imported as top-level
	# modules from this subdirectory.
	# ---------------------------------------------------------------------------
	_PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	if _PARENT_DIR not in sys.path:
	sys.path.insert(0, _PARENT_DIR)

	# ---------------------------------------------------------------------------
	# BEACON imports (resolved via sys.path above)
	# ---------------------------------------------------------------------------
	from environment import BEACONEnvironment # noqa: E402
	from models import Action # noqa: E402
	from graders import ( # noqa: E402
	grade_task1,
	grade_task2,
	grade_task3,
	run_all_graders,
	)

	# ---------------------------------------------------------------------------
	# FastAPI imports
	# ---------------------------------------------------------------------------
	import uvicorn
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field


	# ---------------------------------------------------------------------------
	# App setup
	# ---------------------------------------------------------------------------

	app = FastAPI(
	title="BEACON Environment API",
	description=(
	"REST API for the BEACON dual-scale budget management "
	"reinforcement learning environment."
	),
	version="1.0.0",
	)

	# Allow all origins so browser-based agents and dashboards can connect freely
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# ---------------------------------------------------------------------------
	# Global environment instance
	# Starts as None; created / replaced on the first POST /reset call.
	# A default instance is also created at startup so GET endpoints work
	# immediately without requiring a prior reset.
	# ---------------------------------------------------------------------------

	_env: BEACONEnvironment = BEACONEnvironment(mode="household", seed=42)


	def _require_env() -> BEACONEnvironment:
	"""Return the global environment, raising 503 if it is uninitialised."""
	if _env is None:
	raise HTTPException(
	status_code=503,
	detail="Environment not initialised. Call POST /reset first.",
	)
	return _env


	# ---------------------------------------------------------------------------
	# Request / response schemas
	# ---------------------------------------------------------------------------

	class ResetRequest(BaseModel):
	"""Request body for POST /reset."""
	mode: str = Field(default="household", description="'household' or 'corporate'")
	seed: int = Field(default=42, description="Random seed for reproducibility")
	total_periods: int = Field(default=6, description="Number of budget periods per episode")


	class GraderRequest(BaseModel):
	"""Request body for POST /grader."""
	task_id: str = Field(description="One of: 'task1', 'task2', 'task3'")


	# ---------------------------------------------------------------------------
	# Task catalogue (static metadata)
	# ---------------------------------------------------------------------------

	ACTION_SCHEMA = {
	"allocations": "dict[str, float]",
	"savings_contribution": "float",
	}

	TASK_CATALOGUE = [
	{
	"task_id": "task1",
	"name": "Bill Coverage",
	"difficulty": "easy",
	"description": "Allocate income to cover all essential bills in a single period.",
	"mode": "household",
	"periods": 1,
	"seed": 42,
	"action_schema": ACTION_SCHEMA,
	},
	{
	"task_id": "task2",
	"name": "Shock Absorption",
	"difficulty": "medium",
	"description": (
	"Maintain essential spending while absorbing unexpected "
	"financial shocks across 3 periods."
	),
	"mode": "household",
	"periods": 3,
	"seed": 99,
	"action_schema": ACTION_SCHEMA,
	},
	{
	"task_id": "task3",
	"name": "6-Month Goal Planning",
	"difficulty": "hard",
	"description": (
	"Manage a corporate budget over 6 periods, covering all "
	"essential categories while reaching the savings goal."
	),
	"mode": "corporate",
	"periods": 6,
	"seed": 7,
	"action_schema": ACTION_SCHEMA,
	},
	]

	# Map task_id → grader function for quick lookup
	_GRADER_MAP = {
	"task1": grade_task1,
	"task2": grade_task2,
	"task3": grade_task3,
	}


	# ---------------------------------------------------------------------------
	# Endpoints
	# ---------------------------------------------------------------------------

	@app.get("/health", summary="Liveness check")
	def health():
	"""
	Returns a simple status object confirming the service is running.
	"""
	return {"status": "ok", "environment": "BEACON"}


	@app.post("/reset", summary="Initialise or reset the environment")
	def reset(body: ResetRequest = ResetRequest()):
	"""
	Create a fresh BEACONEnvironment with the given parameters and call
	reset(). Returns the initial Observation as JSON.

	- mode: `"household"` or `"corporate"` (default: `"household"`)
	- seed: random seed for reproducibility (default: `42`)
	- total_periods: episode length (default: `6`)
	"""
	global _env
	try:
	_env = BEACONEnvironment(
	mode=body.mode,
	total_periods=body.total_periods,
	seed=body.seed,
	)
	obs = _env.reset()
	except ValueError as exc:
	raise HTTPException(status_code=400, detail=str(exc))

	return obs.model_dump()


	@app.post("/step", summary="Submit an action and advance one period")
	def step(action: Action):
	"""
	Apply the agent's Action to the current environment and advance by one
	budget period.

	Returns the resulting Observation, Reward, done flag, and info dict.

	- allocations: `{category: amount, ...}` — must cover all categories
	- savings_contribution: amount added to savings this period
	"""
	env = _require_env()
	obs, reward, done, info = env.step(action)

	return {
	"observation": obs.model_dump(),
	"reward": reward.model_dump(),
	"done": done,
	"info": info,
	}


	@app.get("/state", summary="Inspect the current environment state")
	def state():
	"""
	Return the full internal state of the current environment as a plain
	dictionary. Does not advance the episode.
	"""
	env = _require_env()
	return env.state()


	@app.get("/tasks", summary="List all available evaluation tasks")
	def tasks():
	"""
	Return metadata for all three BEACON evaluation tasks, including their
	difficulty, mode, episode length, and expected action schema.
	"""
	return TASK_CATALOGUE


	@app.post("/grader", summary="Run a specific grader and return its score")
	def grader(body: GraderRequest):
	"""
	Execute the grader for the requested task and return the normalised
	score in [0.0, 1.0].

	- task_id: one of `"task1"`, `"task2"`, `"task3"`
	"""
	grader_fn = _GRADER_MAP.get(body.task_id)
	if grader_fn is None:
	raise HTTPException(
	status_code=404,
	detail=f"Unknown task_id '{body.task_id}'. "
	f"Valid options: {list(_GRADER_MAP.keys())}",
	)

	score = grader_fn()
	return {"task_id": body.task_id, "score": score}


	@app.get("/baseline", summary="Run all graders and return all scores")
	def baseline():
	"""
	Execute all three BEACON graders sequentially and return their scores.

	This endpoint is deterministic — scores are identical on every call.
	"""
	scores = run_all_graders()
	return scores


	# ---------------------------------------------------------------------------
	# Entry point
	# ---------------------------------------------------------------------------

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)