my-env / openenv.yaml
exploring-solver's picture
scoring components rebuilt 0-1
a3c6db2
Raw
History Blame Contribute Delete
5.27 kB
name: SupportEnv
version: "1.0.0"
description: >
An OpenEnv-compliant customer support ticket triage environment for SaaS platforms.
Agents learn to classify tickets, extract structured information, and generate
professional resolutions — skills directly transferable to real-world support automation.
domain: customer_support
tags:
- openenv
- customer-support
- nlp
- information-extraction
- classification
- generation
- real-world
license: MIT
author: SupportEnv Contributors
# -------------------------------------------------
# Environment interface
# -------------------------------------------------
interface:
reset:
method: POST
path: /reset
request:
task_id: string # task1 | task2 | task3
ticket_index: integer # optional, 0-4
response: Observation
step:
method: POST
path: /step
request:
episode_id: string
action: Action
response: StepResult # {observation, reward, done, info}
state:
method: GET
path: /state
params:
episode_id: string
response: State
tasks:
method: GET
path: /tasks
response: List[TaskInfo]
grader:
method: POST
path: /grader
request:
episode_id: string
response: GraderResponse
health:
method: GET
path: /health
# -------------------------------------------------
# Typed models
# -------------------------------------------------
models:
Observation:
task_id: string
task_description: string
episode_id: string
ticket: TicketInfo
thread_history: list[dict]
available_actions: list[string]
step_number: integer
max_steps: integer
hint: string | null
TicketInfo:
ticket_id: string
subject: string
body: string
customer_tier: string # free | pro | enterprise
account_age_days: integer
previous_tickets: integer
attachments: list[string]
Action:
action_type: string # classify | extract | respond | resolve | escalate | submit
category: string | null
priority: string | null
extracted_entities: dict | null
required_actions: list[string] | null
response_text: string | null
resolution_steps: list[string] | null
escalation_team: string | null
escalation_reason: string | null
Reward:
step_reward: float
total_reward: float
explanation: string
StepResult:
observation: Observation
reward: Reward
done: boolean
info: dict
State:
task_id: string
episode_id: string
step_number: integer
max_steps: integer
done: boolean
total_reward: float
history: list[dict]
final_score: float | null
GraderResponse:
episode_id: string
task_id: string
score: float # 0.0 – 1.0
breakdown: dict[string, float]
feedback: string
# -------------------------------------------------
# Tasks
# -------------------------------------------------
tasks:
task1:
name: "Ticket Classification"
difficulty: easy
max_steps: 3
description: >
Given a customer support ticket, classify it by category
(billing | technical | account | feature_request | complaint | general)
and priority (low | medium | high | critical).
scoring:
baseline: 0.01
category_correct: 0.49
priority_correct: 0.40
efficiency: 0.09
tickets: 5
task2:
name: "Information Extraction"
difficulty: medium
max_steps: 5
description: >
Extract structured entities (account IDs, names, amounts, dates, domains)
from the ticket body and identify the list of required actions.
scoring:
baseline: 0.01
entity_coverage: 0.59
action_coverage: 0.30
no_hallucination: 0.09
tickets: 5
task3:
name: "Resolution Generation"
difficulty: hard
max_steps: 8
description: >
Generate a professional customer-facing response (response_text) and
an ordered list of resolution steps. Scored on keyword coverage,
step completeness, tone (apology, urgency, timeline), and response length.
scoring:
baseline: 0.01
keyword_coverage: 0.29
step_coverage: 0.30
tone_compliance: 0.25
length_adequate: 0.10
no_empty_steps: 0.04
tickets: 5
# -------------------------------------------------
# Reward design
# -------------------------------------------------
reward:
type: dense
step_cost: -0.02 # small cost per step (encourages efficiency)
submit_bonus: 0.05 # bonus for explicit submit action
max_step_penalty: -0.10 # penalty for exhausting max_steps
grader_bonus: up_to_1.0 # grader score (0–1) added as terminal bonus
# -------------------------------------------------
# Reproducibility
# -------------------------------------------------
reproducibility:
dataset: static # all 15 tickets are fixed, no randomisation
graders: deterministic # rule-based, no LLM judge
baseline_mode: heuristic # no API key required for reference scores
# -------------------------------------------------
# Deployment
# -------------------------------------------------
deployment:
framework: FastAPI
python: ">=3.10"
port: 7860
dockerfile: Dockerfile
huggingface_space: true
space_sdk: docker