File size: 5,656 Bytes
d34f0ce
 
 
 
5f2ce8f
 
 
 
d34f0ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f2ce8f
d34f0ce
 
 
 
 
5f2ce8f
d34f0ce
 
 
 
 
 
 
 
 
 
 
 
5f2ce8f
d34f0ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f2ce8f
d34f0ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c74d5fa
 
d34f0ce
c74d5fa
d34f0ce
 
 
 
c74d5fa
 
d34f0ce
c74d5fa
d34f0ce
 
 
 
c74d5fa
 
d34f0ce
c74d5fa
d34f0ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
name: customer_support_env
version: 1.0.0

description: >
  Multi-step Customer Support Email Workflow Environment.
  Agents must complete a 5-step workflow:
  classify → prioritize → decide_strategy → respond → escalate.
  Each episode requires sequential decision-making with memory of previous steps.

environment:
  type: episodic
  max_steps_per_episode: 5
  description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation."
  reward_range: [0.0, 1.0]
  deterministic: true
  action_space: EmailAction
  observation_space: EmailObservation
  state_space: EmailState
  task_count: 12
  episode_type: multi_step
  api_version: 1
  action_schema:
    tool_support: true

actions:
  type: EmailAction
  fields:
    - name: action_type
      type: string
      description: "Workflow step action type"
      valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"]
      required: true
    - name: content
      type: string
      description: "Action content or response text"
      min_length: 1
      max_length: 2000
      required: true
    - name: tool_action
      type: ToolAction
      description: "Optional tool action payload"
      required: false

observations:
  type: EmailObservation
  fields:
    - name: email_id
      type: string
      description: "Unique email identifier"
    - name: subject
      type: string
      description: "Email subject line"
    - name: body
      type: string
      description: "Email body content"
    - name: customer_history
      type: string
      description: "Summary of customer relationship history"
    - name: step_count
      type: integer
      description: "Current step count in the workflow"
    - name: workflow_step
      type: string
      description: "Current workflow step name"
      valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"]
    - name: available_actions
      type: list
      item_type: string
      description: "Permitted action types for the current step"
    - name: available_tools
      type: list
      item_type: string
      description: "Available tool names for the agent"
    - name: previous_decisions
      type: object
      description: "Agent decisions made so far in this episode"
    - name: customer_sentiment
      type: string
      description: "Detected sentiment of the customer email"
      valid_values: ["positive", "neutral", "negative", "angry"]
    - name: urgency_indicators
      type: list
      item_type: string
      description: "Detected urgency-related keywords from the email"

states:
  type: EmailState
  fields:
    - name: episode_id
      type: string
      description: "Unique identifier for current episode"
    - name: step_count
      type: integer
      description: "Number of steps taken"
    - name: done
      type: boolean
      description: "Whether episode is complete"
    - name: current_email
      type: string
      description: "Current email identifier"
    - name: total_reward
      type: float
      description: "Cumulative episode reward"
    - name: classification
      type: string
      description: "Classification decision"
      required: false
    - name: priority
      type: string
      description: "Priority decision"
      required: false
    - name: strategy
      type: string
      description: "Strategy decision"
      required: false
    - name: response
      type: string
      description: "Response content"
      required: false
    - name: escalation
      type: object
      description: "Escalation decision payload"
      required: false

reward:
  range: [0.0, 1.0]
  description: >
    Continuous reward signal combining multiple workflow components:
    - Classification correctness
    - Priority correctness
    - Strategy alignment
    - Response quality
    - Escalation suitability
  components:
    - name: classification_score
      weight: 0.30
      type: binary
      description: "Correct email category classification"
    - name: priority_score
      weight: 0.20
      type: binary
      description: "Correct urgency/priority selection"
    - name: strategy_score
      weight: 0.20
      type: continuous
      range: [0.0, 1.0]
      description: "Strategy choice alignment with deterministic rubric"
    - name: response_score
      weight: 0.20
      type: continuous
      range: [0.0, 1.0]
      description: "Response quality based on tone, relevance, and memory use"
    - name: escalation_bonus
      weight: 0.10
      type: continuous
      range: [-0.2, 0.1]
      description: "Escalation bonus or penalty for appropriate decision"

tasks:
  - id: easy_refund
    name: Easy Refund Task
    difficulty: easy
    description: Handle a straightforward billing refund request for a duplicate charge.
    ground_truth:
      category: billing
      priority: high
  
  - id: medium_tech
    name: Medium Tech Task
    difficulty: medium
    description: Resolve a technical issue regarding app crashes and provide instructions.
    ground_truth:
      category: tech
      priority: medium
  
  - id: hard_escalation
    name: Hard Escalation Task
    difficulty: hard
    description: Handle a high-value enterprise complaint that requires escalation or financial compensation.
    ground_truth:
      category: complaint
      priority: high

api:
  reset: POST /reset
  step: POST /step
  state: GET /state
  info: GET /info
  stats: GET /stats
  health: GET /health

evaluation_metric: average_reward
success_threshold: 0.5
episodes_per_run: 3