name: customer_support_env version: 1.0.0 description: > Multi-step Customer Support Email Workflow Environment. Agents must complete a 5-step workflow: classify → prioritize → decide_strategy → respond → escalate. Each episode requires sequential decision-making with memory of previous steps. environment: type: episodic max_steps_per_episode: 5 description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation." reward_range: [0.0, 1.0] deterministic: true action_space: EmailAction observation_space: EmailObservation state_space: EmailState task_count: 12 episode_type: multi_step api_version: 1 action_schema: tool_support: true actions: type: EmailAction fields: - name: action_type type: string description: "Workflow step action type" valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"] required: true - name: content type: string description: "Action content or response text" min_length: 1 max_length: 2000 required: true - name: tool_action type: ToolAction description: "Optional tool action payload" required: false observations: type: EmailObservation fields: - name: email_id type: string description: "Unique email identifier" - name: subject type: string description: "Email subject line" - name: body type: string description: "Email body content" - name: customer_history type: string description: "Summary of customer relationship history" - name: step_count type: integer description: "Current step count in the workflow" - name: workflow_step type: string description: "Current workflow step name" valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"] - name: available_actions type: list item_type: string description: "Permitted action types for the current step" - name: available_tools type: list item_type: string description: "Available tool names for the agent" - name: previous_decisions type: object description: "Agent decisions made so far in this episode" - name: customer_sentiment type: string description: "Detected sentiment of the customer email" valid_values: ["positive", "neutral", "negative", "angry"] - name: urgency_indicators type: list item_type: string description: "Detected urgency-related keywords from the email" states: type: EmailState fields: - name: episode_id type: string description: "Unique identifier for current episode" - name: step_count type: integer description: "Number of steps taken" - name: done type: boolean description: "Whether episode is complete" - name: current_email type: string description: "Current email identifier" - name: total_reward type: float description: "Cumulative episode reward" - name: classification type: string description: "Classification decision" required: false - name: priority type: string description: "Priority decision" required: false - name: strategy type: string description: "Strategy decision" required: false - name: response type: string description: "Response content" required: false - name: escalation type: object description: "Escalation decision payload" required: false reward: range: [0.0, 1.0] description: > Continuous reward signal combining multiple workflow components: - Classification correctness - Priority correctness - Strategy alignment - Response quality - Escalation suitability components: - name: classification_score weight: 0.30 type: binary description: "Correct email category classification" - name: priority_score weight: 0.20 type: binary description: "Correct urgency/priority selection" - name: strategy_score weight: 0.20 type: continuous range: [0.0, 1.0] description: "Strategy choice alignment with deterministic rubric" - name: response_score weight: 0.20 type: continuous range: [0.0, 1.0] description: "Response quality based on tone, relevance, and memory use" - name: escalation_bonus weight: 0.10 type: continuous range: [-0.2, 0.1] description: "Escalation bonus or penalty for appropriate decision" tasks: - id: easy_refund name: Easy Refund Task difficulty: easy description: Handle a straightforward billing refund request for a duplicate charge. ground_truth: category: billing priority: high - id: medium_tech name: Medium Tech Task difficulty: medium description: Resolve a technical issue regarding app crashes and provide instructions. ground_truth: category: tech priority: medium - id: hard_escalation name: Hard Escalation Task difficulty: hard description: Handle a high-value enterprise complaint that requires escalation or financial compensation. ground_truth: category: complaint priority: high api: reset: POST /reset step: POST /step state: GET /state info: GET /info stats: GET /stats health: GET /health evaluation_metric: average_reward success_threshold: 0.5 episodes_per_run: 3