""" server/app.py FastAPI server exposing the OpenEnv HTTP interface: POST /reset POST /step GET /state GET /tasks POST /grade """ from __future__ import annotations import html import importlib import os import re from pathlib import Path from typing import Any, Dict from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from pydantic import BaseModel from env.environment import ExecAssistEnv from env.models import ExecAssistAction, ExecAssistObservation, StepResult import graders.task_easy as grader_easy import graders.task_medium as grader_medium import graders.task_hard as grader_hard # --------------------------------------------------------------------------- # App setup # --------------------------------------------------------------------------- app = FastAPI( title="Enterprise Agents", description="OpenEnv-compliant closed-loop enterprise agent environment.", version="1.0.0", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # --------------------------------------------------------------------------- # Global env registry (one env per task) # --------------------------------------------------------------------------- ENVS: Dict[str, ExecAssistEnv] = { "easy": ExecAssistEnv(task_name="easy", seed=42), "medium": ExecAssistEnv(task_name="medium", seed=42), "hard": ExecAssistEnv(task_name="hard", seed=42), } GRADERS = { "easy": grader_easy.grade, "medium": grader_medium.grade, "hard": grader_hard.grade, } _active_task: str = "easy" _active_obs: ExecAssistObservation | None = None PROJECT_ROOT = Path(__file__).resolve().parents[1] README_PATH = PROJECT_ROOT / "README.md" # Must not use str.format() on the landing HTML: embedded JavaScript contains `{` / `}` # which breaks format() and causes 500 errors on `/`. _README_HTML_PLACEHOLDER = "__OFFICEAGENT_README_HTML__" def _strip_front_matter(text: str) -> str: if text.startswith("---\n"): parts = text.split("\n---\n", 1) if len(parts) == 2: return parts[1] return text def _render_readme_to_html() -> str: try: readme_text = README_PATH.read_text(encoding="utf-8") readme_text = _strip_front_matter(readme_text) except Exception: readme_text = "# Enterprise Agents\n\nREADME.md not found." try: md = importlib.import_module("markdown") rendered = md.markdown( readme_text, extensions=["fenced_code", "tables", "toc", "sane_lists"], ) except Exception: # Fallback keeps content visible even if markdown package is unavailable. escaped = html.escape(readme_text) rendered = f"
{escaped}
" # Force links in README to open safely in new tab. rendered = re.sub(r" str: readme_html = _render_readme_to_html() return ( """ Enterprise Agents - Closed-Loop OpenEnv Benchmark

📋 Available Tasks

easy Easy
10
Max Steps
~0.70
Baseline

Deterministic classification of 5 emails into correct categories (meeting_request, urgent_task, spam, general_query).

medium Medium
15
Max Steps
~0.50
Baseline

Mixed inbox triage with classification + conflict-aware meeting scheduling. Tests planning and constraint reasoning.

hard Hard
12
Max Steps
~0.38
Baseline

Full assistant workflow: classify, reply, schedule, and ignore spam. The ultimate test of multi-step reasoning.

⚙️ API Endpoints

Quick Reference
POST /reset
Reset environment for task. Request: {{\"task\": \"easy\"|\"medium\"|\"hard\", \"seed\": 42}}
POST /step
Execute one action: classify_email, reply_email, schedule_meeting, or ignore_email
GET /state
Get current environment state (pending_emails, calendar_events, step count)
GET /tasks
List all available tasks with difficulty, max_steps, and descriptions
POST /grade
Score the current episode. Returns score (0-1) for the task

📚 Full Documentation

""" + _README_HTML_PLACEHOLDER + """
""" ).replace("{{", "{").replace("}}", "}").replace(_README_HTML_PLACEHOLDER, readme_html) @app.post("/reset") def reset(req: ResetRequest | None = None) -> Dict[str, Any]: global _active_task, _active_obs if req is None: req = ResetRequest() requested_task = req.task_name or req.task if requested_task not in ENVS: raise HTTPException(status_code=400, detail=f"Unknown task '{requested_task}'. Choose from: {list(ENVS)}") _active_task = requested_task ENVS[requested_task] = ExecAssistEnv(task_name=requested_task, seed=req.seed) obs = ENVS[requested_task].reset() _active_obs = obs return {"observation": obs.model_dump(), "done": False, "reward": 0.0, "info": {}} @app.post("/step") def step(action: ExecAssistAction) -> Dict[str, Any]: global _active_obs env = ENVS.get(_active_task) if env is None: raise HTTPException(status_code=400, detail="No active environment. Call /reset first.") result: StepResult = env.step(action) _active_obs = result.observation return result.model_dump() @app.get("/state") def state() -> Dict[str, Any]: env = ENVS.get(_active_task) if env is None: raise HTTPException(status_code=400, detail="No active environment.") return env.state() @app.get("/tasks") def list_tasks() -> Dict[str, Any]: return { "tasks": [ { "name": "easy", "description": "Classify 5 deterministic emails into correct categories.", "difficulty": "easy", "max_steps": 10, }, { "name": "medium", "description": "Classify emails AND schedule conflict-free meetings from a mixed inbox.", "difficulty": "medium", "max_steps": 15, }, { "name": "hard", "description": "Full workflow: classify, reply, schedule, and ignore spam across a noisy inbox.", "difficulty": "hard", "max_steps": 12, }, ] } @app.post("/grade") def grade(req: GradeRequest | None = None) -> Dict[str, Any]: if req is None: req = GradeRequest() requested_task = req.task_name or req.task env = ENVS.get(requested_task) if env is None: raise HTTPException(status_code=400, detail=f"Unknown task '{requested_task}'.") obs = env._make_obs_internal() # internal state for deterministic grading grader = GRADERS[requested_task] score = grader(obs) return { "task": requested_task, "score": score, "state": env.state(), } def main() -> None: """CLI entrypoint required by OpenEnv validation for server launch.""" import uvicorn host = os.getenv("HOST", "0.0.0.0") port = int(os.getenv("PORT", "7860")) uvicorn.run("server.app:app", host=host, port=port) if __name__ == "__main__": main()