"""FastAPI server exposing HelpdeskEnv over HTTP.""" import json from typing import Any, Dict, List, Literal, Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn from .helpdesk_environment import HelpdeskEnv from ..models import Action, Reward, normalize_action app = FastAPI(title="Helpdesk OpenEnv") _env: Optional[HelpdeskEnv] = None CATEGORIES = [ "payment_failure", "refund_delay", "fraud_complaint", "kyc_account_restriction", "upi_pin_or_bank_linking", ] FAQ_OPTIONS = [f"faq_{idx:03d}" for idx in range(1, 41)] TASKS: List[Dict[str, Any]] = [ { "id": "easy", "difficulty": "easy", "description": "Classify the customer's issue into the correct support category.", "max_steps": 1, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the easy helpdesk task on a scale " "from 0.001 to 0.999. Reward correct issue classification, safe " "behavior, and efficient completion. Penalize incorrect categories, " "unsafe requests for sensitive information, or invalid actions. " "Return only a numeric score." ), }, }, { "id": "medium", "difficulty": "medium", "description": "Select the correct FAQ or escalate cases that require manual handling.", "max_steps": 3, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the medium helpdesk task on a scale " "from 0.001 to 0.999. Reward selecting the correct FAQ or making the " "correct escalation decision, while maintaining safe guidance and good " "efficiency. Penalize incorrect retrieval, missed escalation, unsafe " "behavior, or unnecessary extra steps. Return only a numeric score." ), }, }, { "id": "hard", "difficulty": "hard", "description": ( "Run a multi-turn support conversation with clarification, guidance, " "and safe closure." ), "max_steps": 8, "grader": { "type": "llm", "prompt_template": ( "Score the agent's performance for the hard helpdesk task on a scale " "from 0.001 to 0.999. Reward appropriate clarification, correct FAQ " "retrieval, safe and useful guidance, and closing the case only when " "the issue is actually resolved. Penalize unsafe behavior, premature " "closure, missing clarification, or poor multi-turn handling. Return " "only a numeric score." ), }, }, ] def get_env() -> HelpdeskEnv: global _env if _env is None: _env = HelpdeskEnv() return _env class ResetBody(BaseModel): task_id: Literal["easy", "medium", "hard"] = "easy" def _zero_reward() -> Dict[str, Any]: return Reward( value=0.0, correctness=0.0, safety=1.0, resolution=0.0, efficiency=0.0, penalties=0.0, done=False, info={}, ).model_dump() def _ui_template() -> str: tasks_json = json.dumps(TASKS) categories_json = json.dumps(CATEGORIES) faq_json = json.dumps(FAQ_OPTIONS) return f""" UPI Banking Support Environment

HF Space Dashboard

UPI Banking Support Environment

Run the benchmark like an operator: reset an episode, choose the exact action your agent would take, and inspect the live observation, conversation, and current reward after each step.

Health API Docs Raw State

Environment

checking

FastAPI + OpenEnv-style benchmark runtime

Current Reward

0.000

Most recent reward value

Difficulty

-

Current episode track

Turn

0

Current step count

Status

Idle

Episode completion

Current Ticket

Customer Message

Reset the environment to load a ticket.

Case

-

Required Slots

None

Available Actions

None

Collected Facts

-

Action Console

Difficulty

Action Type

Category

Message

Predict the banking issue category for the current ticket.

Conversation Timeline

Waiting

No actions yet. Reset the env, then execute a step.

Step Details

Current Reward Breakdown

No step executed yet.

Episode Info

{{\n  "done": false\n}}

Observation Snapshot

""" @app.get("/health") def health() -> Dict[str, str]: return {"status": "healthy"} @app.get("/", response_class=HTMLResponse) def root() -> HTMLResponse: return HTMLResponse(_ui_template()) @app.get("/web", response_class=HTMLResponse) def web_dashboard() -> HTMLResponse: return HTMLResponse(_ui_template()) @app.get("/metadata") def metadata() -> Dict[str, Any]: return { "name": "helpdesk_env", "description": "UPI banking customer support environment with 3 graded tasks.", "task_count": len(TASKS), "tasks": TASKS, } @app.get("/tasks") def tasks() -> Dict[str, Any]: return {"tasks": TASKS} @app.post("/reset") def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]: obs = get_env().reset(body.task_id) return { "observation": obs.model_dump(), "reward": _zero_reward(), "done": False, "info": {}, } @app.post("/step") def step(body: Dict[str, Any]) -> Dict[str, Any]: action = normalize_action(body["action"]) obs, reward, done, info = get_env().step(action) return { "observation": obs.model_dump(), "reward": reward.model_dump(), "done": done, "info": info, } @app.get("/state") def state() -> Dict[str, Any]: obs = get_env().state() return {"observation": obs.model_dump()} def main() -> None: uvicorn.run("helpdesk_env.server.app:app", host="0.0.0.0", port=8000) if __name__ == "__main__": main()