"""
app.py — FastAPI server for the BEACON reinforcement learning environment.

Exposes the BEACON environment as a REST API so that agents, dashboards,
and evaluation pipelines can interact with it over HTTP.

Endpoints:
    POST /reset      — initialise / reset the environment
    POST /step       — submit an action and advance one period
    GET  /state      — inspect the full current environment state
    GET  /tasks      — list all available evaluation tasks
    POST /grader     — run a specific grader and get a score
    GET  /baseline   — run all graders and return all scores
    GET  /health     — liveness check

Usage:
    python app.py
    # or
    uvicorn beacon_env.app:app --reload
"""

import os
import sys

# ---------------------------------------------------------------------------
# Ensure parent directory (d:/meta) is on the Python path so that
# environment.py, models.py, and graders.py can be imported as top-level
# modules from this subdirectory.
# ---------------------------------------------------------------------------
_PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if _PARENT_DIR not in sys.path:
    sys.path.insert(0, _PARENT_DIR)

# ---------------------------------------------------------------------------
# BEACON imports (resolved via sys.path above)
# ---------------------------------------------------------------------------
from environment import BEACONEnvironment          # noqa: E402
from models import Action                           # noqa: E402
from graders import (                               # noqa: E402
    grade_task1,
    grade_task2,
    grade_task3,
    run_all_graders,
)

# ---------------------------------------------------------------------------
# FastAPI imports
# ---------------------------------------------------------------------------
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# App setup
# ---------------------------------------------------------------------------

app = FastAPI(
    title="BEACON Environment API",
    description=(
        "REST API for the BEACON dual-scale budget management "
        "reinforcement learning environment."
    ),
    version="1.0.0",
)

# Allow all origins so browser-based agents and dashboards can connect freely
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# ---------------------------------------------------------------------------
# Global environment instance
# Starts as None; created / replaced on the first POST /reset call.
# A default instance is also created at startup so GET endpoints work
# immediately without requiring a prior reset.
# ---------------------------------------------------------------------------

_env: BEACONEnvironment = BEACONEnvironment(mode="household", seed=42)


def _require_env() -> BEACONEnvironment:
    """Return the global environment, raising 503 if it is uninitialised."""
    if _env is None:
        raise HTTPException(
            status_code=503,
            detail="Environment not initialised. Call POST /reset first.",
        )
    return _env


# ---------------------------------------------------------------------------
# Request / response schemas
# ---------------------------------------------------------------------------

class ResetRequest(BaseModel):
    """Request body for POST /reset."""
    mode:          str = Field(default="household", description="'household' or 'corporate'")
    seed:          int = Field(default=42,          description="Random seed for reproducibility")
    total_periods: int = Field(default=6,           description="Number of budget periods per episode")


class GraderRequest(BaseModel):
    """Request body for POST /grader."""
    task_id: str = Field(description="One of: 'task1', 'task2', 'task3'")


# ---------------------------------------------------------------------------
# Task catalogue (static metadata)
# ---------------------------------------------------------------------------

ACTION_SCHEMA = {
    "allocations":          "dict[str, float]",
    "savings_contribution": "float",
}

TASK_CATALOGUE = [
    {
        "task_id":     "task1",
        "name":        "Bill Coverage",
        "difficulty":  "easy",
        "description": "Allocate income to cover all essential bills in a single period.",
        "mode":        "household",
        "periods":     1,
        "seed":        42,
        "action_schema": ACTION_SCHEMA,
    },
    {
        "task_id":     "task2",
        "name":        "Shock Absorption",
        "difficulty":  "medium",
        "description": (
            "Maintain essential spending while absorbing unexpected "
            "financial shocks across 3 periods."
        ),
        "mode":        "household",
        "periods":     3,
        "seed":        99,
        "action_schema": ACTION_SCHEMA,
    },
    {
        "task_id":     "task3",
        "name":        "6-Month Goal Planning",
        "difficulty":  "hard",
        "description": (
            "Manage a corporate budget over 6 periods, covering all "
            "essential categories while reaching the savings goal."
        ),
        "mode":        "corporate",
        "periods":     6,
        "seed":        7,
        "action_schema": ACTION_SCHEMA,
    },
]

# Map task_id → grader function for quick lookup
_GRADER_MAP = {
    "task1": grade_task1,
    "task2": grade_task2,
    "task3": grade_task3,
}


# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------

@app.get("/health", summary="Liveness check")
def health():
    """
    Returns a simple status object confirming the service is running.
    """
    return {"status": "ok", "environment": "BEACON"}


@app.post("/reset", summary="Initialise or reset the environment")
def reset(body: ResetRequest = ResetRequest()):
    """
    Create a fresh BEACONEnvironment with the given parameters and call
    reset(). Returns the initial Observation as JSON.

    - **mode**: `"household"` or `"corporate"` (default: `"household"`)
    - **seed**: random seed for reproducibility (default: `42`)
    - **total_periods**: episode length (default: `6`)
    """
    global _env
    try:
        _env = BEACONEnvironment(
            mode=body.mode,
            total_periods=body.total_periods,
            seed=body.seed,
        )
        obs = _env.reset()
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc))

    return obs.model_dump()


@app.post("/step", summary="Submit an action and advance one period")
def step(action: Action):
    """
    Apply the agent's Action to the current environment and advance by one
    budget period.

    Returns the resulting Observation, Reward, done flag, and info dict.

    - **allocations**: `{category: amount, ...}` — must cover all categories
    - **savings_contribution**: amount added to savings this period
    """
    env = _require_env()
    obs, reward, done, info = env.step(action)

    return {
        "observation": obs.model_dump(),
        "reward":      reward.model_dump(),
        "done":        done,
        "info":        info,
    }


@app.get("/state", summary="Inspect the current environment state")
def state():
    """
    Return the full internal state of the current environment as a plain
    dictionary. Does not advance the episode.
    """
    env = _require_env()
    return env.state()


@app.get("/tasks", summary="List all available evaluation tasks")
def tasks():
    """
    Return metadata for all three BEACON evaluation tasks, including their
    difficulty, mode, episode length, and expected action schema.
    """
    return TASK_CATALOGUE


@app.post("/grader", summary="Run a specific grader and return its score")
def grader(body: GraderRequest):
    """
    Execute the grader for the requested task and return the normalised
    score in [0.0, 1.0].

    - **task_id**: one of `"task1"`, `"task2"`, `"task3"`
    """
    grader_fn = _GRADER_MAP.get(body.task_id)
    if grader_fn is None:
        raise HTTPException(
            status_code=404,
            detail=f"Unknown task_id '{body.task_id}'. "
                   f"Valid options: {list(_GRADER_MAP.keys())}",
        )

    score = grader_fn()
    return {"task_id": body.task_id, "score": score}


@app.get("/baseline", summary="Run all graders and return all scores")
def baseline():
    """
    Execute all three BEACON graders sequentially and return their scores.

    This endpoint is deterministic — scores are identical on every call.
    """
    scores = run_all_graders()
    return scores


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)