Freakdivi commited on
Commit
456ff39
·
verified ·
1 Parent(s): 4591acc

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. openenv.yaml +0 -4
  2. server/app.py +73 -3
openenv.yaml CHANGED
@@ -18,13 +18,11 @@ type: space
18
  runtime: fastapi
19
  app: server.app:app
20
  port: 8000
21
- default_task: medium
22
 
23
  tasks:
24
  - id: easy
25
  difficulty: easy
26
  description: Classify the customer's issue into the correct support category
27
- dataset: data/tickets/easy.json
28
  max_steps: 1
29
  reward_range: [0.0, 1.0]
30
  grader:
@@ -38,7 +36,6 @@ tasks:
38
  - id: medium
39
  difficulty: medium
40
  description: Select the correct FAQ or escalate cases that require manual handling
41
- dataset: data/tickets/medium.json
42
  max_steps: 3
43
  reward_range: [0.0, 1.0]
44
  grader:
@@ -53,7 +50,6 @@ tasks:
53
  - id: hard
54
  difficulty: hard
55
  description: Run a multi-turn support conversation with clarification, guidance, and safe closure
56
- dataset: data/tickets/hard.json
57
  max_steps: 8
58
  reward_range: [0.0, 1.0]
59
  grader:
 
18
  runtime: fastapi
19
  app: server.app:app
20
  port: 8000
 
21
 
22
  tasks:
23
  - id: easy
24
  difficulty: easy
25
  description: Classify the customer's issue into the correct support category
 
26
  max_steps: 1
27
  reward_range: [0.0, 1.0]
28
  grader:
 
36
  - id: medium
37
  difficulty: medium
38
  description: Select the correct FAQ or escalate cases that require manual handling
 
39
  max_steps: 3
40
  reward_range: [0.0, 1.0]
41
  grader:
 
50
  - id: hard
51
  difficulty: hard
52
  description: Run a multi-turn support conversation with clarification, guidance, and safe closure
 
53
  max_steps: 8
54
  reward_range: [0.0, 1.0]
55
  grader:
server/app.py CHANGED
@@ -1,6 +1,6 @@
1
  """FastAPI server exposing HelpdeskEnv over HTTP."""
2
 
3
- from typing import Any, Dict, Optional
4
 
5
  from fastapi import FastAPI
6
  from pydantic import BaseModel
@@ -12,6 +12,61 @@ from ..models import Action, Reward, normalize_action
12
  app = FastAPI(title="Helpdesk OpenEnv")
13
  _env: Optional[HelpdeskEnv] = None
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_env() -> HelpdeskEnv:
17
  global _env
@@ -21,7 +76,7 @@ def get_env() -> HelpdeskEnv:
21
 
22
 
23
  class ResetBody(BaseModel):
24
- task_id: str = "easy"
25
 
26
 
27
  def _zero_reward() -> Dict[str, Any]:
@@ -47,10 +102,25 @@ def root() -> Dict[str, Any]:
47
  return {
48
  "name": "UPI Banking Support Environment",
49
  "status": "running",
50
- "endpoints": ["/health", "/reset", "/step", "/state"],
51
  }
52
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  @app.post("/reset")
55
  def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]:
56
  obs = get_env().reset(body.task_id)
 
1
  """FastAPI server exposing HelpdeskEnv over HTTP."""
2
 
3
+ from typing import Any, Dict, List, Literal, Optional
4
 
5
  from fastapi import FastAPI
6
  from pydantic import BaseModel
 
12
  app = FastAPI(title="Helpdesk OpenEnv")
13
  _env: Optional[HelpdeskEnv] = None
14
 
15
+ TASKS: List[Dict[str, Any]] = [
16
+ {
17
+ "id": "easy",
18
+ "difficulty": "easy",
19
+ "description": "Classify the customer's issue into the correct support category.",
20
+ "max_steps": 1,
21
+ "grader": {
22
+ "type": "llm",
23
+ "prompt_template": (
24
+ "Score the agent's performance for the easy helpdesk task on a scale "
25
+ "from 0.001 to 0.999. Reward correct issue classification, safe "
26
+ "behavior, and efficient completion. Penalize incorrect categories, "
27
+ "unsafe requests for sensitive information, or invalid actions. "
28
+ "Return only a numeric score."
29
+ ),
30
+ },
31
+ },
32
+ {
33
+ "id": "medium",
34
+ "difficulty": "medium",
35
+ "description": "Select the correct FAQ or escalate cases that require manual handling.",
36
+ "max_steps": 3,
37
+ "grader": {
38
+ "type": "llm",
39
+ "prompt_template": (
40
+ "Score the agent's performance for the medium helpdesk task on a scale "
41
+ "from 0.001 to 0.999. Reward selecting the correct FAQ or making the "
42
+ "correct escalation decision, while maintaining safe guidance and good "
43
+ "efficiency. Penalize incorrect retrieval, missed escalation, unsafe "
44
+ "behavior, or unnecessary extra steps. Return only a numeric score."
45
+ ),
46
+ },
47
+ },
48
+ {
49
+ "id": "hard",
50
+ "difficulty": "hard",
51
+ "description": (
52
+ "Run a multi-turn support conversation with clarification, guidance, "
53
+ "and safe closure."
54
+ ),
55
+ "max_steps": 8,
56
+ "grader": {
57
+ "type": "llm",
58
+ "prompt_template": (
59
+ "Score the agent's performance for the hard helpdesk task on a scale "
60
+ "from 0.001 to 0.999. Reward appropriate clarification, correct FAQ "
61
+ "retrieval, safe and useful guidance, and closing the case only when "
62
+ "the issue is actually resolved. Penalize unsafe behavior, premature "
63
+ "closure, missing clarification, or poor multi-turn handling. Return "
64
+ "only a numeric score."
65
+ ),
66
+ },
67
+ },
68
+ ]
69
+
70
 
71
  def get_env() -> HelpdeskEnv:
72
  global _env
 
76
 
77
 
78
  class ResetBody(BaseModel):
79
+ task_id: Literal["easy", "medium", "hard"] = "easy"
80
 
81
 
82
  def _zero_reward() -> Dict[str, Any]:
 
102
  return {
103
  "name": "UPI Banking Support Environment",
104
  "status": "running",
105
+ "endpoints": ["/health", "/metadata", "/tasks", "/reset", "/step", "/state"],
106
  }
107
 
108
 
109
+ @app.get("/metadata")
110
+ def metadata() -> Dict[str, Any]:
111
+ return {
112
+ "name": "helpdesk_env",
113
+ "description": "UPI banking customer support environment with 3 graded tasks.",
114
+ "task_count": len(TASKS),
115
+ "tasks": TASKS,
116
+ }
117
+
118
+
119
+ @app.get("/tasks")
120
+ def tasks() -> Dict[str, Any]:
121
+ return {"tasks": TASKS}
122
+
123
+
124
  @app.post("/reset")
125
  def reset(body: ResetBody = ResetBody()) -> Dict[str, Any]:
126
  obs = get_env().reset(body.task_id)