"""FastAPI server exposing HelpdeskEnv over HTTP.""" import json from typing import Any, Dict, List, Literal, Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn from .helpdesk_environment import HelpdeskEnv from ..models import Action, Reward, normalize_action app = FastAPI(title="Helpdesk OpenEnv") _env: Optional[HelpdeskEnv] = None CATEGORIES = [ "payment_failure", "refund_delay", "fraud_complaint", "kyc_account_restriction", "upi_pin_or_bank_linking", ] FAQ_OPTIONS = [f"faq_{idx:03d}" for idx in range(1, 41)] TASKS: List[Dict[str, Any]] = [ { "id": "easy", "difficulty": "easy", "description": "Classify the customer's issue into the correct support category.", "max_steps": 1, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the easy helpdesk task on a scale " "from 0.001 to 0.999. Reward correct issue classification, safe " "behavior, and efficient completion. Penalize incorrect categories, " "unsafe requests for sensitive information, or invalid actions. " "Return only a numeric score." ), }, }, { "id": "medium", "difficulty": "medium", "description": "Select the correct FAQ or escalate cases that require manual handling.", "max_steps": 3, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the medium helpdesk task on a scale " "from 0.001 to 0.999. Reward selecting the correct FAQ or making the " "correct escalation decision, while maintaining safe guidance and good " "efficiency. Penalize incorrect retrieval, missed escalation, unsafe " "behavior, or unnecessary extra steps. Return only a numeric score." ), }, }, { "id": "hard", "difficulty": "hard", "description": ( "Run a multi-turn support conversation with clarification, guidance, " "and safe closure." ), "max_steps": 8, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the hard helpdesk task on a scale " "from 0.001 to 0.999. Reward appropriate clarification, correct FAQ " "retrieval, safe and useful guidance, and closing the case only when " "the issue is actually resolved. Penalize unsafe behavior, premature " "closure, missing clarification, or poor multi-turn handling. Return " "only a numeric score." ), }, }, ] def get_env() -> HelpdeskEnv: global _env if _env is None: _env = HelpdeskEnv() return _env class ResetBody(BaseModel): task_id: Literal["easy", "medium", "hard"] = "easy" def _zero_reward() -> Dict[str, Any]: return Reward( value=0.0, correctness=0.0, safety=1.0, resolution=0.0, efficiency=0.0, penalties=0.0, done=False, info={}, ).model_dump() def _ui_template() -> str: tasks_json = json.dumps(TASKS) categories_json = json.dumps(CATEGORIES) faq_json = json.dumps(FAQ_OPTIONS) return f"""
Run the benchmark like an operator: reset an episode, choose the exact action your agent would take, and inspect the live observation, conversation, and current reward after each step.
{{\n "done": false\n}}
-