Spaces:

Freakdivi
/

helpdesk_env

Sleeping

App Files Files Community

Freakdivi commited on Apr 8

Commit

456ff39

verified ·

1 Parent(s): 4591acc

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

openenv.yaml +0 -4
server/app.py +73 -3

openenv.yaml CHANGED Viewed

@@ -18,13 +18,11 @@ type: space
 runtime: fastapi
 app: server.app:app
 port: 8000
-default_task: medium
 tasks:
   - id: easy
     difficulty: easy
     description: Classify the customer's issue into the correct support category
-    dataset: data/tickets/easy.json
     max_steps: 1
     reward_range: [0.0, 1.0]
     grader:
@@ -38,7 +36,6 @@ tasks:
   - id: medium
     difficulty: medium
     description: Select the correct FAQ or escalate cases that require manual handling
-    dataset: data/tickets/medium.json
     max_steps: 3
     reward_range: [0.0, 1.0]
     grader:
@@ -53,7 +50,6 @@ tasks:
   - id: hard
     difficulty: hard
     description: Run a multi-turn support conversation with clarification, guidance, and safe closure
-    dataset: data/tickets/hard.json
     max_steps: 8
     reward_range: [0.0, 1.0]
     grader:

 runtime: fastapi
 app: server.app:app
 port: 8000
 tasks:
   - id: easy
     difficulty: easy
     description: Classify the customer's issue into the correct support category
     max_steps: 1
     reward_range: [0.0, 1.0]
     grader:
   - id: medium
     difficulty: medium
     description: Select the correct FAQ or escalate cases that require manual handling
     max_steps: 3
     reward_range: [0.0, 1.0]
     grader:
   - id: hard
     difficulty: hard
     description: Run a multi-turn support conversation with clarification, guidance, and safe closure
     max_steps: 8
     reward_range: [0.0, 1.0]
     grader:

server/app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """FastAPI server exposing HelpdeskEnv over HTTP."""
-from typing import Any, Dict, Optional
 from fastapi import FastAPI
 from pydantic import BaseModel
@@ -12,6 +12,61 @@ from ..models import Action, Reward, normalize_action
 app = FastAPI(title="Helpdesk OpenEnv")
 _env: Optional[HelpdeskEnv] = None
 def get_env() -> HelpdeskEnv:
     global _env
@@ -21,7 +76,7 @@ def get_env() -> HelpdeskEnv:
 class ResetBody(BaseModel):
-    task_id: str = "easy"
 def _zero_reward() -> Dict[str, Any]:
@@ -47,10 +102,25 @@ def root() -> Dict[str, Any]:
     return {
         "name": "UPI Banking Support Environment",
         "status": "running",
-        "endpoints": ["/health", "/reset", "/step", "/state"],
     }
 @app.post("/reset")
 def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]:
     obs = get_env().reset(body.task_id)

 """FastAPI server exposing HelpdeskEnv over HTTP."""
+from typing import Any, Dict, List, Literal, Optional
 from fastapi import FastAPI
 from pydantic import BaseModel
 app = FastAPI(title="Helpdesk OpenEnv")
 _env: Optional[HelpdeskEnv] = None
+TASKS: List[Dict[str, Any]] = [
+    {
+        "id": "easy",
+        "difficulty": "easy",
+        "description": "Classify the customer's issue into the correct support category.",
+        "max_steps": 1,
+        "grader": {
+            "type": "llm",
+            "prompt_template": (
+                "Score the agent's performance for the easy helpdesk task on a scale "
+                "from 0.001 to 0.999. Reward correct issue classification, safe "
+                "behavior, and efficient completion. Penalize incorrect categories, "
+                "unsafe requests for sensitive information, or invalid actions. "
+                "Return only a numeric score."
+            ),
+        },
+    },
+    {
+        "id": "medium",
+        "difficulty": "medium",
+        "description": "Select the correct FAQ or escalate cases that require manual handling.",
+        "max_steps": 3,
+        "grader": {
+            "type": "llm",
+            "prompt_template": (
+                "Score the agent's performance for the medium helpdesk task on a scale "
+                "from 0.001 to 0.999. Reward selecting the correct FAQ or making the "
+                "correct escalation decision, while maintaining safe guidance and good "
+                "efficiency. Penalize incorrect retrieval, missed escalation, unsafe "
+                "behavior, or unnecessary extra steps. Return only a numeric score."
+            ),
+        },
+    },
+    {
+        "id": "hard",
+        "difficulty": "hard",
+        "description": (
+            "Run a multi-turn support conversation with clarification, guidance, "
+            "and safe closure."
+        ),
+        "max_steps": 8,
+        "grader": {
+            "type": "llm",
+            "prompt_template": (
+                "Score the agent's performance for the hard helpdesk task on a scale "
+                "from 0.001 to 0.999. Reward appropriate clarification, correct FAQ "
+                "retrieval, safe and useful guidance, and closing the case only when "
+                "the issue is actually resolved. Penalize unsafe behavior, premature "
+                "closure, missing clarification, or poor multi-turn handling. Return "
+                "only a numeric score."
+            ),
+        },
+    },
+]
 def get_env() -> HelpdeskEnv:
     global _env
 class ResetBody(BaseModel):
+    task_id: Literal["easy", "medium", "hard"] = "easy"
 def _zero_reward() -> Dict[str, Any]:
     return {
         "name": "UPI Banking Support Environment",
         "status": "running",
+        "endpoints": ["/health", "/metadata", "/tasks", "/reset", "/step", "/state"],
     }
+@app.get("/metadata")
+def metadata() -> Dict[str, Any]:
+    return {
+        "name": "helpdesk_env",
+        "description": "UPI banking customer support environment with 3 graded tasks.",
+        "task_count": len(TASKS),
+        "tasks": TASKS,
+    }
+@app.get("/tasks")
+def tasks() -> Dict[str, Any]:
+    return {"tasks": TASKS}
 @app.post("/reset")
 def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]:
     obs = get_env().reset(body.task_id)