Spaces:

exploring-solver
/

my-env

Sleeping

App Files Files Community

my-env / openenv.yaml

exploring-solver

scoring components rebuilt 0-1

a3c6db2 3 months ago

Raw

History Blame Contribute Delete

5.27 kB

	name: SupportEnv
	version: "1.0.0"
	description: >
	An OpenEnv-compliant customer support ticket triage environment for SaaS platforms.
	Agents learn to classify tickets, extract structured information, and generate
	professional resolutions — skills directly transferable to real-world support automation.

	domain: customer_support
	tags:
	- openenv
	- customer-support
	- nlp
	- information-extraction
	- classification
	- generation
	- real-world

	license: MIT
	author: SupportEnv Contributors

	# -------------------------------------------------
	# Environment interface
	# -------------------------------------------------
	interface:
	reset:
	method: POST
	path: /reset
	request:
	task_id: string # task1 \| task2 \| task3
	ticket_index: integer # optional, 0-4
	response: Observation

	step:
	method: POST
	path: /step
	request:
	episode_id: string
	action: Action
	response: StepResult # {observation, reward, done, info}

	state:
	method: GET
	path: /state
	params:
	episode_id: string
	response: State

	tasks:
	method: GET
	path: /tasks
	response: List[TaskInfo]

	grader:
	method: POST
	path: /grader
	request:
	episode_id: string
	response: GraderResponse

	health:
	method: GET
	path: /health

	# -------------------------------------------------
	# Typed models
	# -------------------------------------------------
	models:
	Observation:
	task_id: string
	task_description: string
	episode_id: string
	ticket: TicketInfo
	thread_history: list[dict]
	available_actions: list[string]
	step_number: integer
	max_steps: integer
	hint: string \| null

	TicketInfo:
	ticket_id: string
	subject: string
	body: string
	customer_tier: string # free \| pro \| enterprise
	account_age_days: integer
	previous_tickets: integer
	attachments: list[string]

	Action:
	action_type: string # classify \| extract \| respond \| resolve \| escalate \| submit
	category: string \| null
	priority: string \| null
	extracted_entities: dict \| null
	required_actions: list[string] \| null
	response_text: string \| null
	resolution_steps: list[string] \| null
	escalation_team: string \| null
	escalation_reason: string \| null

	Reward:
	step_reward: float
	total_reward: float
	explanation: string

	StepResult:
	observation: Observation
	reward: Reward
	done: boolean
	info: dict

	State:
	task_id: string
	episode_id: string
	step_number: integer
	max_steps: integer
	done: boolean
	total_reward: float
	history: list[dict]
	final_score: float \| null

	GraderResponse:
	episode_id: string
	task_id: string
	score: float # 0.0 – 1.0
	breakdown: dict[string, float]
	feedback: string

	# -------------------------------------------------
	# Tasks
	# -------------------------------------------------
	tasks:
	task1:
	name: "Ticket Classification"
	difficulty: easy
	max_steps: 3
	description: >
	Given a customer support ticket, classify it by category
	(billing \| technical \| account \| feature_request \| complaint \| general)
	and priority (low \| medium \| high \| critical).
	scoring:
	baseline: 0.01
	category_correct: 0.49
	priority_correct: 0.40
	efficiency: 0.09
	tickets: 5

	task2:
	name: "Information Extraction"
	difficulty: medium
	max_steps: 5
	description: >
	Extract structured entities (account IDs, names, amounts, dates, domains)
	from the ticket body and identify the list of required actions.
	scoring:
	baseline: 0.01
	entity_coverage: 0.59
	action_coverage: 0.30
	no_hallucination: 0.09
	tickets: 5

	task3:
	name: "Resolution Generation"
	difficulty: hard
	max_steps: 8
	description: >
	Generate a professional customer-facing response (response_text) and
	an ordered list of resolution steps. Scored on keyword coverage,
	step completeness, tone (apology, urgency, timeline), and response length.
	scoring:
	baseline: 0.01
	keyword_coverage: 0.29
	step_coverage: 0.30
	tone_compliance: 0.25
	length_adequate: 0.10
	no_empty_steps: 0.04
	tickets: 5

	# -------------------------------------------------
	# Reward design
	# -------------------------------------------------
	reward:
	type: dense
	step_cost: -0.02 # small cost per step (encourages efficiency)
	submit_bonus: 0.05 # bonus for explicit submit action
	max_step_penalty: -0.10 # penalty for exhausting max_steps
	grader_bonus: up_to_1.0 # grader score (0–1) added as terminal bonus

	# -------------------------------------------------
	# Reproducibility
	# -------------------------------------------------
	reproducibility:
	dataset: static # all 15 tickets are fixed, no randomisation
	graders: deterministic # rule-based, no LLM judge
	baseline_mode: heuristic # no API key required for reference scores

	# -------------------------------------------------
	# Deployment
	# -------------------------------------------------
	deployment:
	framework: FastAPI
	python: ">=3.10"
	port: 7860
	dockerfile: Dockerfile
	huggingface_space: true
	space_sdk: docker