name: customer_support_env
version: 1.0.0

description: >
  Multi-step Customer Support Email Workflow Environment.
  Agents must complete a 5-step workflow:
  classify → prioritize → decide_strategy → respond → escalate.
  Each episode requires sequential decision-making with memory of previous steps.

environment:
  type: episodic
  max_steps_per_episode: 5
  description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation."
  reward_range: [0.0, 1.0]
  deterministic: true
  action_space: EmailAction
  observation_space: EmailObservation
  state_space: EmailState
  task_count: 12
  episode_type: multi_step
  api_version: 1
  action_schema:
    tool_support: true

actions:
  type: EmailAction
  fields:
    - name: action_type
      type: string
      description: "Workflow step action type"
      valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"]
      required: true
    - name: content
      type: string
      description: "Action content or response text"
      min_length: 1
      max_length: 2000
      required: true
    - name: tool_action
      type: ToolAction
      description: "Optional tool action payload"
      required: false

observations:
  type: EmailObservation
  fields:
    - name: email_id
      type: string
      description: "Unique email identifier"
    - name: subject
      type: string
      description: "Email subject line"
    - name: body
      type: string
      description: "Email body content"
    - name: customer_history
      type: string
      description: "Summary of customer relationship history"
    - name: step_count
      type: integer
      description: "Current step count in the workflow"
    - name: workflow_step
      type: string
      description: "Current workflow step name"
      valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"]
    - name: available_actions
      type: list
      item_type: string
      description: "Permitted action types for the current step"
    - name: available_tools
      type: list
      item_type: string
      description: "Available tool names for the agent"
    - name: previous_decisions
      type: object
      description: "Agent decisions made so far in this episode"
    - name: customer_sentiment
      type: string
      description: "Detected sentiment of the customer email"
      valid_values: ["positive", "neutral", "negative", "angry"]
    - name: urgency_indicators
      type: list
      item_type: string
      description: "Detected urgency-related keywords from the email"

states:
  type: EmailState
  fields:
    - name: episode_id
      type: string
      description: "Unique identifier for current episode"
    - name: step_count
      type: integer
      description: "Number of steps taken"
    - name: done
      type: boolean
      description: "Whether episode is complete"
    - name: current_email
      type: string
      description: "Current email identifier"
    - name: total_reward
      type: float
      description: "Cumulative episode reward"
    - name: classification
      type: string
      description: "Classification decision"
      required: false
    - name: priority
      type: string
      description: "Priority decision"
      required: false
    - name: strategy
      type: string
      description: "Strategy decision"
      required: false
    - name: response
      type: string
      description: "Response content"
      required: false
    - name: escalation
      type: object
      description: "Escalation decision payload"
      required: false

reward:
  range: [0.0, 1.0]
  description: >
    Continuous reward signal combining multiple workflow components:
    - Classification correctness
    - Priority correctness
    - Strategy alignment
    - Response quality
    - Escalation suitability
  components:
    - name: classification_score
      weight: 0.30
      type: binary
      description: "Correct email category classification"
    - name: priority_score
      weight: 0.20
      type: binary
      description: "Correct urgency/priority selection"
    - name: strategy_score
      weight: 0.20
      type: continuous
      range: [0.0, 1.0]
      description: "Strategy choice alignment with deterministic rubric"
    - name: response_score
      weight: 0.20
      type: continuous
      range: [0.0, 1.0]
      description: "Response quality based on tone, relevance, and memory use"
    - name: escalation_bonus
      weight: 0.10
      type: continuous
      range: [-0.2, 0.1]
      description: "Escalation bonus or penalty for appropriate decision"

tasks:
  - id: easy_refund
    name: Easy Refund Task
    difficulty: easy
    description: Handle a straightforward billing refund request for a duplicate charge.
    ground_truth:
      category: billing
      priority: high
  
  - id: medium_tech
    name: Medium Tech Task
    difficulty: medium
    description: Resolve a technical issue regarding app crashes and provide instructions.
    ground_truth:
      category: tech
      priority: medium
  
  - id: hard_escalation
    name: Hard Escalation Task
    difficulty: hard
    description: Handle a high-value enterprise complaint that requires escalation or financial compensation.
    ground_truth:
      category: complaint
      priority: high

api:
  reset: POST /reset
  step: POST /step
  state: GET /state
  info: GET /info
  stats: GET /stats
  health: GET /health

evaluation_metric: average_reward
success_threshold: 0.5
episodes_per_run: 3