Spaces:

Mmanikandan
/

SupportFlowAI

Sleeping

App Files Files Community

SupportFlowAI / openenv.yaml

Mmanikandan

phase 2 fix

c74d5fa 29 days ago

raw

history blame contribute delete

5.66 kB

	name: customer_support_env
	version: 1.0.0

	description: >
	Multi-step Customer Support Email Workflow Environment.
	Agents must complete a 5-step workflow:
	classify → prioritize → decide_strategy → respond → escalate.
	Each episode requires sequential decision-making with memory of previous steps.

	environment:
	type: episodic
	max_steps_per_episode: 5
	description: "Multi-step customer support workflow with classification, prioritization, strategy, response, and optional escalation."
	reward_range: [0.0, 1.0]
	deterministic: true
	action_space: EmailAction
	observation_space: EmailObservation
	state_space: EmailState
	task_count: 12
	episode_type: multi_step
	api_version: 1
	action_schema:
	tool_support: true

	actions:
	type: EmailAction
	fields:
	- name: action_type
	type: string
	description: "Workflow step action type"
	valid_values: ["classify", "prioritize", "decide_strategy", "respond", "escalate", "use_tool"]
	required: true
	- name: content
	type: string
	description: "Action content or response text"
	min_length: 1
	max_length: 2000
	required: true
	- name: tool_action
	type: ToolAction
	description: "Optional tool action payload"
	required: false

	observations:
	type: EmailObservation
	fields:
	- name: email_id
	type: string
	description: "Unique email identifier"
	- name: subject
	type: string
	description: "Email subject line"
	- name: body
	type: string
	description: "Email body content"
	- name: customer_history
	type: string
	description: "Summary of customer relationship history"
	- name: step_count
	type: integer
	description: "Current step count in the workflow"
	- name: workflow_step
	type: string
	description: "Current workflow step name"
	valid_values: ["classification", "prioritization", "strategy_decision", "response_generation", "escalation_decision", "completed"]
	- name: available_actions
	type: list
	item_type: string
	description: "Permitted action types for the current step"
	- name: available_tools
	type: list
	item_type: string
	description: "Available tool names for the agent"
	- name: previous_decisions
	type: object
	description: "Agent decisions made so far in this episode"
	- name: customer_sentiment
	type: string
	description: "Detected sentiment of the customer email"
	valid_values: ["positive", "neutral", "negative", "angry"]
	- name: urgency_indicators
	type: list
	item_type: string
	description: "Detected urgency-related keywords from the email"

	states:
	type: EmailState
	fields:
	- name: episode_id
	type: string
	description: "Unique identifier for current episode"
	- name: step_count
	type: integer
	description: "Number of steps taken"
	- name: done
	type: boolean
	description: "Whether episode is complete"
	- name: current_email
	type: string
	description: "Current email identifier"
	- name: total_reward
	type: float
	description: "Cumulative episode reward"
	- name: classification
	type: string
	description: "Classification decision"
	required: false
	- name: priority
	type: string
	description: "Priority decision"
	required: false
	- name: strategy
	type: string
	description: "Strategy decision"
	required: false
	- name: response
	type: string
	description: "Response content"
	required: false
	- name: escalation
	type: object
	description: "Escalation decision payload"
	required: false

	reward:
	range: [0.0, 1.0]
	description: >
	Continuous reward signal combining multiple workflow components:
	- Classification correctness
	- Priority correctness
	- Strategy alignment
	- Response quality
	- Escalation suitability
	components:
	- name: classification_score
	weight: 0.30
	type: binary
	description: "Correct email category classification"
	- name: priority_score
	weight: 0.20
	type: binary
	description: "Correct urgency/priority selection"
	- name: strategy_score
	weight: 0.20
	type: continuous
	range: [0.0, 1.0]
	description: "Strategy choice alignment with deterministic rubric"
	- name: response_score
	weight: 0.20
	type: continuous
	range: [0.0, 1.0]
	description: "Response quality based on tone, relevance, and memory use"
	- name: escalation_bonus
	weight: 0.10
	type: continuous
	range: [-0.2, 0.1]
	description: "Escalation bonus or penalty for appropriate decision"

	tasks:
	- id: easy_refund
	name: Easy Refund Task
	difficulty: easy
	description: Handle a straightforward billing refund request for a duplicate charge.
	ground_truth:
	category: billing
	priority: high

	- id: medium_tech
	name: Medium Tech Task
	difficulty: medium
	description: Resolve a technical issue regarding app crashes and provide instructions.
	ground_truth:
	category: tech
	priority: medium

	- id: hard_escalation
	name: Hard Escalation Task
	difficulty: hard
	description: Handle a high-value enterprise complaint that requires escalation or financial compensation.
	ground_truth:
	category: complaint
	priority: high

	api:
	reset: POST /reset
	step: POST /step
	state: GET /state
	info: GET /info
	stats: GET /stats
	health: GET /health

	evaluation_metric: average_reward
	success_threshold: 0.5
	episodes_per_run: 3