Spaces:
Sleeping
Sleeping
File size: 5,656 Bytes
d34f0ce 5f2ce8f d34f0ce 5f2ce8f d34f0ce 5f2ce8f d34f0ce 5f2ce8f d34f0ce 5f2ce8f d34f0ce c74d5fa d34f0ce c74d5fa d34f0ce c74d5fa d34f0ce c74d5fa d34f0ce c74d5fa d34f0ce c74d5fa d34f0ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | name: customer_support_env
version: 1.0.0
description: >
Multi-step Customer Support Email Workflow Environment.
Agents must complete a 5-step workflow:
classify → prioritize → decide_strategy → respond → escalate.
Each episode requires sequential decision-making with memory of previous steps.
environment:
type: episodic
max_steps_per_episode: 5
description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation."
reward_range: [0.0, 1.0]
deterministic: true
action_space: EmailAction
observation_space: EmailObservation
state_space: EmailState
task_count: 12
episode_type: multi_step
api_version: 1
action_schema:
tool_support: true
actions:
type: EmailAction
fields:
- name: action_type
type: string
description: "Workflow step action type"
valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"]
required: true
- name: content
type: string
description: "Action content or response text"
min_length: 1
max_length: 2000
required: true
- name: tool_action
type: ToolAction
description: "Optional tool action payload"
required: false
observations:
type: EmailObservation
fields:
- name: email_id
type: string
description: "Unique email identifier"
- name: subject
type: string
description: "Email subject line"
- name: body
type: string
description: "Email body content"
- name: customer_history
type: string
description: "Summary of customer relationship history"
- name: step_count
type: integer
description: "Current step count in the workflow"
- name: workflow_step
type: string
description: "Current workflow step name"
valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"]
- name: available_actions
type: list
item_type: string
description: "Permitted action types for the current step"
- name: available_tools
type: list
item_type: string
description: "Available tool names for the agent"
- name: previous_decisions
type: object
description: "Agent decisions made so far in this episode"
- name: customer_sentiment
type: string
description: "Detected sentiment of the customer email"
valid_values: ["positive", "neutral", "negative", "angry"]
- name: urgency_indicators
type: list
item_type: string
description: "Detected urgency-related keywords from the email"
states:
type: EmailState
fields:
- name: episode_id
type: string
description: "Unique identifier for current episode"
- name: step_count
type: integer
description: "Number of steps taken"
- name: done
type: boolean
description: "Whether episode is complete"
- name: current_email
type: string
description: "Current email identifier"
- name: total_reward
type: float
description: "Cumulative episode reward"
- name: classification
type: string
description: "Classification decision"
required: false
- name: priority
type: string
description: "Priority decision"
required: false
- name: strategy
type: string
description: "Strategy decision"
required: false
- name: response
type: string
description: "Response content"
required: false
- name: escalation
type: object
description: "Escalation decision payload"
required: false
reward:
range: [0.0, 1.0]
description: >
Continuous reward signal combining multiple workflow components:
- Classification correctness
- Priority correctness
- Strategy alignment
- Response quality
- Escalation suitability
components:
- name: classification_score
weight: 0.30
type: binary
description: "Correct email category classification"
- name: priority_score
weight: 0.20
type: binary
description: "Correct urgency/priority selection"
- name: strategy_score
weight: 0.20
type: continuous
range: [0.0, 1.0]
description: "Strategy choice alignment with deterministic rubric"
- name: response_score
weight: 0.20
type: continuous
range: [0.0, 1.0]
description: "Response quality based on tone, relevance, and memory use"
- name: escalation_bonus
weight: 0.10
type: continuous
range: [-0.2, 0.1]
description: "Escalation bonus or penalty for appropriate decision"
tasks:
- id: easy_refund
name: Easy Refund Task
difficulty: easy
description: Handle a straightforward billing refund request for a duplicate charge.
ground_truth:
category: billing
priority: high
- id: medium_tech
name: Medium Tech Task
difficulty: medium
description: Resolve a technical issue regarding app crashes and provide instructions.
ground_truth:
category: tech
priority: medium
- id: hard_escalation
name: Hard Escalation Task
difficulty: hard
description: Handle a high-value enterprise complaint that requires escalation or financial compensation.
ground_truth:
category: complaint
priority: high
api:
reset: POST /reset
step: POST /step
state: GET /state
info: GET /info
stats: GET /stats
health: GET /health
evaluation_metric: average_reward
success_threshold: 0.5
episodes_per_run: 3
|