""" server/app.py FastAPI server exposing the OpenEnv HTTP interface: POST /reset POST /step GET /state GET /tasks POST /grade """ from __future__ import annotations import html import importlib import os import re from pathlib import Path from typing import Any, Dict from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from pydantic import BaseModel from env.environment import ExecAssistEnv from env.models import ExecAssistAction, ExecAssistObservation, StepResult import graders.task_easy as grader_easy import graders.task_medium as grader_medium import graders.task_hard as grader_hard # --------------------------------------------------------------------------- # App setup # --------------------------------------------------------------------------- app = FastAPI( title="Enterprise Agents", description="OpenEnv-compliant closed-loop enterprise agent environment.", version="1.0.0", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # --------------------------------------------------------------------------- # Global env registry (one env per task) # --------------------------------------------------------------------------- ENVS: Dict[str, ExecAssistEnv] = { "easy": ExecAssistEnv(task_name="easy", seed=42), "medium": ExecAssistEnv(task_name="medium", seed=42), "hard": ExecAssistEnv(task_name="hard", seed=42), } GRADERS = { "easy": grader_easy.grade, "medium": grader_medium.grade, "hard": grader_hard.grade, } _active_task: str = "easy" _active_obs: ExecAssistObservation | None = None PROJECT_ROOT = Path(__file__).resolve().parents[1] README_PATH = PROJECT_ROOT / "README.md" # Must not use str.format() on the landing HTML: embedded JavaScript contains `{` / `}` # which breaks format() and causes 500 errors on `/`. _README_HTML_PLACEHOLDER = "__OFFICEAGENT_README_HTML__" def _strip_front_matter(text: str) -> str: if text.startswith("---\n"): parts = text.split("\n---\n", 1) if len(parts) == 2: return parts[1] return text def _render_readme_to_html() -> str: try: readme_text = README_PATH.read_text(encoding="utf-8") readme_text = _strip_front_matter(readme_text) except Exception: readme_text = "# Enterprise Agents\n\nREADME.md not found." try: md = importlib.import_module("markdown") rendered = md.markdown( readme_text, extensions=["fenced_code", "tables", "toc", "sane_lists"], ) except Exception: # Fallback keeps content visible even if markdown package is unavailable. escaped = html.escape(readme_text) rendered = f"
{escaped}"
# Force links in README to open safely in new tab.
rendered = re.sub(r" str:
readme_html = _render_readme_to_html()
return (
"""
A closed-loop OpenEnv benchmark for training LLM agents on enterprise workflows with partial observability, delayed consequences, and multi-objective rewards.
Unlike workflow demos that follow fixed scripts, this environment keeps changing as the agent acts, so the model is pushed to make practical decisions under pressure instead of just producing fluent answers.
Deterministic classification of 5 emails into correct categories (meeting_request, urgent_task, spam, general_query).
Mixed inbox triage with classification + conflict-aware meeting scheduling. Tests planning and constraint reasoning.
Full assistant workflow: classify, reply, schedule, and ignore spam. The ultimate test of multi-step reasoning.