Spaces:
Sleeping
Sleeping
| name: customer_support_env | |
| version: 1.0.0 | |
| description: > | |
| Multi-step Customer Support Email Workflow Environment. | |
| Agents must complete a 5-step workflow: | |
| classify → prioritize → decide_strategy → respond → escalate. | |
| Each episode requires sequential decision-making with memory of previous steps. | |
| environment: | |
| type: episodic | |
| max_steps_per_episode: 5 | |
| description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation." | |
| reward_range: [0.0, 1.0] | |
| deterministic: true | |
| action_space: EmailAction | |
| observation_space: EmailObservation | |
| state_space: EmailState | |
| task_count: 12 | |
| episode_type: multi_step | |
| api_version: 1 | |
| action_schema: | |
| tool_support: true | |
| actions: | |
| type: EmailAction | |
| fields: | |
| - name: action_type | |
| type: string | |
| description: "Workflow step action type" | |
| valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"] | |
| required: true | |
| - name: content | |
| type: string | |
| description: "Action content or response text" | |
| min_length: 1 | |
| max_length: 2000 | |
| required: true | |
| - name: tool_action | |
| type: ToolAction | |
| description: "Optional tool action payload" | |
| required: false | |
| observations: | |
| type: EmailObservation | |
| fields: | |
| - name: email_id | |
| type: string | |
| description: "Unique email identifier" | |
| - name: subject | |
| type: string | |
| description: "Email subject line" | |
| - name: body | |
| type: string | |
| description: "Email body content" | |
| - name: customer_history | |
| type: string | |
| description: "Summary of customer relationship history" | |
| - name: step_count | |
| type: integer | |
| description: "Current step count in the workflow" | |
| - name: workflow_step | |
| type: string | |
| description: "Current workflow step name" | |
| valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"] | |
| - name: available_actions | |
| type: list | |
| item_type: string | |
| description: "Permitted action types for the current step" | |
| - name: available_tools | |
| type: list | |
| item_type: string | |
| description: "Available tool names for the agent" | |
| - name: previous_decisions | |
| type: object | |
| description: "Agent decisions made so far in this episode" | |
| - name: customer_sentiment | |
| type: string | |
| description: "Detected sentiment of the customer email" | |
| valid_values: ["positive", "neutral", "negative", "angry"] | |
| - name: urgency_indicators | |
| type: list | |
| item_type: string | |
| description: "Detected urgency-related keywords from the email" | |
| states: | |
| type: EmailState | |
| fields: | |
| - name: episode_id | |
| type: string | |
| description: "Unique identifier for current episode" | |
| - name: step_count | |
| type: integer | |
| description: "Number of steps taken" | |
| - name: done | |
| type: boolean | |
| description: "Whether episode is complete" | |
| - name: current_email | |
| type: string | |
| description: "Current email identifier" | |
| - name: total_reward | |
| type: float | |
| description: "Cumulative episode reward" | |
| - name: classification | |
| type: string | |
| description: "Classification decision" | |
| required: false | |
| - name: priority | |
| type: string | |
| description: "Priority decision" | |
| required: false | |
| - name: strategy | |
| type: string | |
| description: "Strategy decision" | |
| required: false | |
| - name: response | |
| type: string | |
| description: "Response content" | |
| required: false | |
| - name: escalation | |
| type: object | |
| description: "Escalation decision payload" | |
| required: false | |
| reward: | |
| range: [0.0, 1.0] | |
| description: > | |
| Continuous reward signal combining multiple workflow components: | |
| - Classification correctness | |
| - Priority correctness | |
| - Strategy alignment | |
| - Response quality | |
| - Escalation suitability | |
| components: | |
| - name: classification_score | |
| weight: 0.30 | |
| type: binary | |
| description: "Correct email category classification" | |
| - name: priority_score | |
| weight: 0.20 | |
| type: binary | |
| description: "Correct urgency/priority selection" | |
| - name: strategy_score | |
| weight: 0.20 | |
| type: continuous | |
| range: [0.0, 1.0] | |
| description: "Strategy choice alignment with deterministic rubric" | |
| - name: response_score | |
| weight: 0.20 | |
| type: continuous | |
| range: [0.0, 1.0] | |
| description: "Response quality based on tone, relevance, and memory use" | |
| - name: escalation_bonus | |
| weight: 0.10 | |
| type: continuous | |
| range: [-0.2, 0.1] | |
| description: "Escalation bonus or penalty for appropriate decision" | |
| tasks: | |
| - id: easy_refund | |
| name: Easy Refund Task | |
| difficulty: easy | |
| description: Handle a straightforward billing refund request for a duplicate charge. | |
| ground_truth: | |
| category: billing | |
| priority: high | |
| - id: medium_tech | |
| name: Medium Tech Task | |
| difficulty: medium | |
| description: Resolve a technical issue regarding app crashes and provide instructions. | |
| ground_truth: | |
| category: tech | |
| priority: medium | |
| - id: hard_escalation | |
| name: Hard Escalation Task | |
| difficulty: hard | |
| description: Handle a high-value enterprise complaint that requires escalation or financial compensation. | |
| ground_truth: | |
| category: complaint | |
| priority: high | |
| api: | |
| reset: POST /reset | |
| step: POST /step | |
| state: GET /state | |
| info: GET /info | |
| stats: GET /stats | |
| health: GET /health | |
| evaluation_metric: average_reward | |
| success_threshold: 0.5 | |
| episodes_per_run: 3 | |