teja944 commited on
Commit
9fdf681
·
verified ·
1 Parent(s): 94dcee0

Upload 13 files

Browse files
Files changed (13) hide show
  1. Dockerfile +28 -0
  2. README.md +39 -6
  3. env.py +121 -0
  4. full_validate.sh +185 -0
  5. inference.py +102 -0
  6. local_validate.sh +15 -0
  7. models.py +27 -0
  8. openenv.yaml +13 -0
  9. pyproject.toml +22 -0
  10. requirements.txt +6 -0
  11. server/app.py +21 -0
  12. tasks.py +46 -0
  13. uv.lock +0 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use lightweight python image
2
+ FROM python:3.10-slim
3
+
4
+ # Set strict limits awareness
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV API_BASE_URL="https://api.openai.com/v1"
7
+ ENV MODEL_NAME="gpt-4.1-mini"
8
+ ENV ENABLE_WEB_INTERFACE=true
9
+
10
+ # Setup working directory
11
+ WORKDIR /app
12
+ ENV PYTHONPATH=/app
13
+
14
+ # Install dependencies securely
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy all core files
19
+ COPY . /app
20
+
21
+ # Ensure HF Space port exposes correctly
22
+ EXPOSE 7860
23
+
24
+ # Alternative CMD for testing locally:
25
+ # CMD ["python", "inference.py"]
26
+
27
+ # CMD strictly runs the FastAPI server
28
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,44 @@
1
  ---
2
- title: Meta
3
- emoji: 🐢
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: docker
 
 
 
7
  pinned: false
8
- license: mit
9
  ---
 
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Customer Support Triage
3
+ emoji: 🎧
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
+ sdk_version: "1.0.0"
8
+ python_version: "3.10.0"
9
+ app_file: app.py
10
  pinned: false
 
11
  ---
12
+ # Customer Support Triage Environment
13
 
14
+ A real-world customer support simulation environment requiring information extraction, system actions, and routing.
15
+ Built using the official OpenEnv interface specification.
16
+
17
+ ## Overview
18
+ This environment simulates a text-based customer support chat system. The agent acts on incoming support tickets and chooses actions such as asking the user for missing info, processing refunds, or routing the ticket to specialized teams.
19
+
20
+ ## Action & Observation Spaces
21
+ - **Observation:** `ticket_id`, `customer_message`, `history`, `missing_info`, `status`, `refund_processed`, `done`, `reward`.
22
+ - **Action:**
23
+ - `action_type`: One of 'ROUTE', 'ASK_INFO', 'REFUND', 'CLOSE'
24
+ - `argument`: The specific queue (e.g., 'BILLING'), the info to ask for (e.g., 'serial_number'), or order to refund.
25
+
26
+ ## Tasks
27
+ 1. **easy_password_reset** (Easy): A simple IT support routing task requiring no additional info.
28
+ 2. **medium_hardware_issue** (Medium): Requires the agent to first ask for the `serial_number` before routing to hardware support.
29
+ 3. **hard_refund_processing** (Hard): Requires the agent to ask for `order_id` and `photo_evidence`, process a refund by taking the REFUND action, and finally route the ticket to billing.
30
+
31
+ ## Usage
32
+ Start the environment server using Docker:
33
+ ```bash
34
+ docker build -t customer-support-env .
35
+ docker run -p 8000:8000 customer-support-env
36
+ ```
37
+ Or start directly via python:
38
+ ```bash
39
+ uvicorn server.app:app --port 8000
40
+ ```
41
+ Run the baseline baseline inference script:
42
+ ```bash
43
+ python inference.py
44
+ ```
env.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import Observation, Action, State
2
+ from tasks import TASKS, grader
3
+ import copy
4
+ from typing import Any, Optional
5
+ import openenv.core.env_server as es
6
+
7
+ class CustomerSupportEnv(es.Environment):
8
+ def __init__(self, **kwargs):
9
+ super().__init__(**kwargs)
10
+ self.current_task_idx = 0
11
+ self.state_data = {}
12
+ self.step_count = 0
13
+ self.max_steps = 10
14
+ self.done = False
15
+ self.reset()
16
+
17
+ def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, task_idx: int = 0, **kwargs: Any) -> Observation:
18
+ self.current_task_idx = task_idx
19
+ task = TASKS[self.current_task_idx]
20
+ self.step_count = 0
21
+ self.done = False
22
+
23
+ self.state_data = {
24
+ "ticket_id": f"TKT-{1000 + task_idx}",
25
+ "customer_message": task.initial_msg,
26
+ "history":[f"Customer: {task.initial_msg}"],
27
+ "missing_info": task.required_info.copy(),
28
+ "collected_info":[],
29
+ "route": None,
30
+ "refund_processed": False,
31
+ "status": "OPEN",
32
+ "episode_id": episode_id
33
+ }
34
+ return self._get_obs(reward=0.0, feedback="")
35
+
36
+ @property
37
+ def state(self) -> State:
38
+ return State(
39
+ ticket_id=self.state_data.get("ticket_id", ""),
40
+ customer_message=self.state_data.get("customer_message", ""),
41
+ history=copy.deepcopy(self.state_data.get("history", [])),
42
+ missing_info=copy.deepcopy(self.state_data.get("missing_info", [])),
43
+ status=self.state_data.get("status", "OPEN"),
44
+ refund_processed=self.state_data.get("refund_processed", False),
45
+ episode_id=self.state_data.get("episode_id", None),
46
+ step_count=self.step_count
47
+ )
48
+
49
+ def _get_obs(self, reward: float = 0.0, feedback: str = "") -> Observation:
50
+ return Observation(
51
+ ticket_id=self.state_data["ticket_id"],
52
+ customer_message=self.state_data["customer_message"],
53
+ history=self.state_data["history"],
54
+ missing_info=self.state_data["missing_info"],
55
+ status=self.state_data["status"],
56
+ refund_processed=self.state_data["refund_processed"],
57
+ done=self.done,
58
+ reward=reward,
59
+ metadata={"feedback": feedback, "state": self.state_data}
60
+ )
61
+
62
+ def step(self, action: Action, timeout_s: Optional[float] = None, **kwargs: Any) -> Observation:
63
+ if self.done:
64
+ return self._get_obs(reward=0.0, feedback="Episode already done")
65
+
66
+ self.step_count += 1
67
+ reward_val = 0.0
68
+ feedback = ""
69
+ task = TASKS[self.current_task_idx]
70
+
71
+ # Penalize infinite loops / max steps
72
+ if self.step_count >= self.max_steps:
73
+ self.done = True
74
+ return self._get_obs(reward=-0.5, feedback="Max steps reached")
75
+
76
+ if action.action_type == "ASK_INFO":
77
+ asked = action.argument.lower()
78
+ found = False
79
+ for req in self.state_data["missing_info"]:
80
+ if req.lower() in asked.lower():
81
+ self.state_data["missing_info"].remove(req)
82
+ self.state_data["collected_info"].append(req)
83
+ reply = f"Here is my {req}: [MOCK_DATA]"
84
+ self.state_data["history"].extend([f"Agent: {action.argument}", f"Customer: {reply}"])
85
+ self.state_data["customer_message"] = reply
86
+ reward_val = 0.2
87
+ feedback = f"Successfully collected {req}"
88
+ found = True
89
+ break
90
+ if not found:
91
+ reward_val = -0.1
92
+ feedback = "Asked for unnecessary information."
93
+
94
+ elif action.action_type == "REFUND":
95
+ if task.needs_refund and "order_id" in self.state_data["collected_info"]:
96
+ self.state_data["refund_processed"] = True
97
+ reward_val = 0.3
98
+ feedback = "Refund processed successfully."
99
+ else:
100
+ reward_val = -0.5
101
+ feedback = "Cannot process refund without order ID or refund not required."
102
+
103
+ elif action.action_type == "ROUTE":
104
+ self.state_data["route"] = action.argument
105
+ if self.state_data["missing_info"]:
106
+ reward_val = -0.5
107
+ feedback = "Routed prematurely without gathering required info."
108
+ else:
109
+ self.done = True
110
+ final_score = grader(task, self.state_data)
111
+ reward_val = float(final_score)
112
+ feedback = f"Ticket routed. Final Score: {final_score}"
113
+
114
+ elif action.action_type == "CLOSE":
115
+ self.done = True
116
+ self.state_data["status"] = "CLOSED"
117
+ final_score = grader(task, self.state_data)
118
+ reward_val = float(final_score)
119
+ feedback = f"Ticket closed. Final Score: {final_score}"
120
+
121
+ return self._get_obs(reward=reward_val, feedback=feedback)
full_validate.sh ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # validate-submission.sh — OpenEnv Submission Validator
4
+ #
5
+ # Checks that your HF Space is live, Docker image builds, and openenv validate passes.
6
+ #
7
+ # Prerequisites:
8
+ # - Docker: https://docs.docker.com/get-docker/
9
+ # - openenv-core: pip install openenv-core
10
+ # - curl (usually pre-installed)
11
+ #
12
+ # Run:
13
+ # curl -fsSL https://raw.githubusercontent.com/<owner>/<repo>/main/scripts/validate-submission.sh | bash -s -- <ping_url> [repo_dir]
14
+ #
15
+ # Or download and run locally:
16
+ # chmod +x validate-submission.sh
17
+ # ./validate-submission.sh <ping_url> [repo_dir]
18
+ #
19
+ # Arguments:
20
+ # ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)
21
+ # repo_dir Path to your repo (default: current directory)
22
+ #
23
+ # Examples:
24
+ # ./validate-submission.sh https://my-team.hf.space
25
+ # ./validate-submission.sh https://my-team.hf.space ./my-repo
26
+ #
27
+
28
+ set -uo pipefail
29
+
30
+ DOCKER_BUILD_TIMEOUT=600
31
+ if [ -t 1 ]; then
32
+ RED='\033[0;31m'
33
+ GREEN='\033[0;32m'
34
+ YELLOW='\033[1;33m'
35
+ BOLD='\033[1m'
36
+ NC='\033[0m'
37
+ else
38
+ RED='' GREEN='' YELLOW='' BOLD='' NC=''
39
+ fi
40
+
41
+ run_with_timeout() {
42
+ local secs="$1"; shift
43
+ if command -v timeout &>/dev/null; then
44
+ timeout "$secs" "$@"
45
+ elif command -v gtimeout &>/dev/null; then
46
+ gtimeout "$secs" "$@"
47
+ else
48
+ "$@" &
49
+ local pid=$!
50
+ ( sleep "$secs" && kill "$pid" 2>/dev/null ) &
51
+ local watcher=$!
52
+ wait "$pid" 2>/dev/null
53
+ local rc=$?
54
+ kill "$watcher" 2>/dev/null
55
+ wait "$watcher" 2>/dev/null
56
+ return $rc
57
+ fi
58
+ }
59
+
60
+ portable_mktemp() {
61
+ local prefix="${1:-validate}"
62
+ mktemp "${TMPDIR:-/tmp}/${prefix}-XXXXXX" 2>/dev/null || mktemp
63
+ }
64
+
65
+ CLEANUP_FILES=()
66
+ cleanup() { rm -f "${CLEANUP_FILES[@]+"${CLEANUP_FILES[@]}"}"; }
67
+ trap cleanup EXIT
68
+
69
+ PING_URL="${1:-}"
70
+ REPO_DIR="${2:-.}"
71
+
72
+ if [ -z "$PING_URL" ]; then
73
+ printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
74
+ printf "\n"
75
+ printf " ping_url Your HuggingFace Space URL (e.g. https://your-space.hf.space)\n"
76
+ printf " repo_dir Path to your repo (default: current directory)\n"
77
+ exit 1
78
+ fi
79
+
80
+ if ! REPO_DIR="$(cd "$REPO_DIR" 2>/dev/null && pwd)"; then
81
+ printf "Error: directory '%s' not found\n" "${2:-.}"
82
+ exit 1
83
+ fi
84
+ PING_URL="${PING_URL%/}"
85
+ export PING_URL
86
+ PASS=0
87
+
88
+ log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
89
+ pass() { log "${GREEN}PASSED${NC} -- $1"; PASS=$((PASS + 1)); }
90
+ fail() { log "${RED}FAILED${NC} -- $1"; }
91
+ hint() { printf " ${YELLOW}Hint:${NC} %b\n" "$1"; }
92
+ stop_at() {
93
+ printf "\n"
94
+ printf "${RED}${BOLD}Validation stopped at %s.${NC} Fix the above before continuing.\n" "$1"
95
+ exit 1
96
+ }
97
+
98
+ printf "\n"
99
+ printf "${BOLD}========================================${NC}\n"
100
+ printf "${BOLD} OpenEnv Submission Validator${NC}\n"
101
+ printf "${BOLD}========================================${NC}\n"
102
+ log "Repo: $REPO_DIR"
103
+ log "Ping URL: $PING_URL"
104
+ printf "\n"
105
+
106
+ log "${BOLD}Step 1/3: Pinging HF Space${NC} ($PING_URL/reset) ..."
107
+
108
+ CURL_OUTPUT=$(portable_mktemp "validate-curl")
109
+ CLEANUP_FILES+=("$CURL_OUTPUT")
110
+ HTTP_CODE=$(curl -s -o "$CURL_OUTPUT" -w "%{http_code}" -X POST \
111
+ -H "Content-Type: application/json" -d '{}' \
112
+ "$PING_URL/reset" --max-time 30 2>"$CURL_OUTPUT" || printf "000")
113
+
114
+ if [ "$HTTP_CODE" = "200" ]; then
115
+ pass "HF Space is live and responds to /reset"
116
+ elif [ "$HTTP_CODE" = "000" ]; then
117
+ fail "HF Space not reachable (connection failed or timed out)"
118
+ hint "Check your network connection and that the Space is running."
119
+ hint "Try: curl -s -o /dev/null -w '%%{http_code}' -X POST $PING_URL/reset"
120
+ stop_at "Step 1"
121
+ else
122
+ fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200)"
123
+ hint "Make sure your Space is running and the URL is correct."
124
+ hint "Try opening $PING_URL in your browser first."
125
+ stop_at "Step 1"
126
+ fi
127
+
128
+ log "${BOLD}Step 2/3: Running docker build${NC} ..."
129
+
130
+ if ! command -v docker &>/dev/null; then
131
+ fail "docker command not found"
132
+ hint "Install Docker: https://docs.docker.com/get-docker/"
133
+ stop_at "Step 2"
134
+ fi
135
+
136
+ if [ -f "$REPO_DIR/Dockerfile" ]; then
137
+ DOCKER_CONTEXT="$REPO_DIR"
138
+ elif [ -f "$REPO_DIR/server/Dockerfile" ]; then
139
+ DOCKER_CONTEXT="$REPO_DIR/server"
140
+ else
141
+ fail "No Dockerfile found in repo root or server/ directory"
142
+ stop_at "Step 2"
143
+ fi
144
+
145
+ log " Found Dockerfile in $DOCKER_CONTEXT"
146
+
147
+ BUILD_OK=false
148
+ BUILD_OUTPUT=$(run_with_timeout "$DOCKER_BUILD_TIMEOUT" docker build "$DOCKER_CONTEXT" 2>&1) && BUILD_OK=true
149
+
150
+ if [ "$BUILD_OK" = true ]; then
151
+ pass "Docker build succeeded"
152
+ else
153
+ fail "Docker build failed (timeout=${DOCKER_BUILD_TIMEOUT}s)"
154
+ printf "%s\n" "$BUILD_OUTPUT" | tail -20
155
+ stop_at "Step 2"
156
+ fi
157
+
158
+ log "${BOLD}Step 3/3: Running openenv validate${NC} ..."
159
+
160
+ if ! command -v openenv &>/dev/null; then
161
+ fail "openenv command not found"
162
+ hint "Install it: pip install openenv-core"
163
+ stop_at "Step 3"
164
+ fi
165
+
166
+ VALIDATE_OK=false
167
+ VALIDATE_OUTPUT=$(cd "$REPO_DIR" && openenv validate 2>&1) && VALIDATE_OK=true
168
+
169
+ if [ "$VALIDATE_OK" = true ]; then
170
+ pass "openenv validate passed"
171
+ [ -n "$VALIDATE_OUTPUT" ] && log " $VALIDATE_OUTPUT"
172
+ else
173
+ fail "openenv validate failed"
174
+ printf "%s\n" "$VALIDATE_OUTPUT"
175
+ stop_at "Step 3"
176
+ fi
177
+
178
+ printf "\n"
179
+ printf "${BOLD}========================================${NC}\n"
180
+ printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
181
+ printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
182
+ printf "${BOLD}========================================${NC}\n"
183
+ printf "\n"
184
+
185
+ exit 0
inference.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from openai import OpenAI
4
+ from env import CustomerSupportEnv
5
+ from models import Action
6
+ from tasks import TASKS
7
+
8
+ try:
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+ except ImportError:
12
+ pass
13
+
14
+
15
+ # 1. Required Environment Variables
16
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
17
+ MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
18
+ HF_TOKEN = os.getenv("HF_TOKEN")
19
+
20
+ if HF_TOKEN is None:
21
+ raise ValueError("HF_TOKEN environment variable is required")
22
+
23
+ # 2. Open AI Client Only
24
+ client = OpenAI(
25
+ base_url=API_BASE_URL,
26
+ api_key=HF_TOKEN
27
+ )
28
+
29
+ def run_inference():
30
+ env = CustomerSupportEnv()
31
+
32
+ for idx, task in enumerate(TASKS):
33
+ obs = env.reset(task_idx=idx)
34
+ done = False
35
+ step_idx = 0
36
+ rewards_history = []
37
+
38
+ # [START] FORMAT
39
+ print(f"[START] task={task.name} env=customer_support model={MODEL_NAME}")
40
+
41
+ while not done:
42
+ step_idx += 1
43
+ error_msg = "null"
44
+ reward_val = 0.00
45
+ action_str = ""
46
+
47
+ # 🚀 HEAVILY ENGINEERED PROMPT FOR STRICT COMPLIANCE
48
+ prompt = (
49
+ "System: You are an automated customer support AI. You MUST respond strictly in JSON format matching this schema: "
50
+ "{\"action_type\": \"ROUTE\"|\"ASK_INFO\"|\"REFUND\"|\"CLOSE\", \"argument\": \"string\"}\n\n"
51
+ "CRITICAL RULES:\n"
52
+ "1. If 'missing_info' in the observation is empty ([]), DO NOT use ASK_INFO. You must take action (ROUTE or REFUND).\n"
53
+ "2. If 'missing_info' contains items, you MUST use ASK_INFO. The 'argument' MUST contain the EXACT string from 'missing_info' (e.g., 'serial_number', 'order_id', 'photo_evidence'). Ask for ONLY ONE missing item at a time\n"
54
+ "3. When using ROUTE, the 'argument' MUST be exactly one of these three codes: 'IT_SUPPORT', 'HARDWARE_SUPPORT', or 'BILLING'. Do not output full sentences.\n"
55
+ "4. If the user wants a refund, and you have collected 'order_id', you MUST first use the REFUND action. Then, in the next step, use ROUTE with 'BILLING'.\n\n"
56
+ f"Observation: {obs.model_dump_json()}"
57
+ )
58
+
59
+ try:
60
+ response = client.chat.completions.create(
61
+ model=MODEL_NAME,
62
+ messages=[{"role": "user", "content": prompt}],
63
+ response_format={"type": "json_object"}
64
+ )
65
+
66
+ # Safely parse JSON in case Qwen outputs markdown ticks
67
+ raw_action = response.choices[0].message.content.strip()
68
+ if raw_action.startswith("```json"):
69
+ raw_action = raw_action[7:-3].strip()
70
+ elif raw_action.startswith("```"):
71
+ raw_action = raw_action[3:-3].strip()
72
+
73
+ action_data = json.loads(raw_action)
74
+
75
+ # Pydantic validation
76
+ action = Action(**action_data)
77
+ action_str = f"{action.action_type}('{action.argument}')"
78
+
79
+ # Env Step
80
+ obs = env.step(action)
81
+ done = obs.done
82
+ reward_val = float(obs.reward) if obs.reward is not None else 0.0
83
+ rewards_history.append(reward_val)
84
+
85
+ except Exception as e:
86
+ error_msg = str(e).replace('\n', ' ')
87
+ action_str = "ERROR"
88
+ done = True
89
+ rewards_history.append(0.00)
90
+
91
+ # [STEP] FORMAT
92
+ print(f"[STEP] step={step_idx} action={action_str} reward={reward_val:.2f} done={str(done).lower()} error={error_msg}")
93
+
94
+ # [END] FORMAT
95
+ # A score > 0.8 typically means success based on our grader logic
96
+ final_score = sum(rewards_history) if rewards_history else 0.0
97
+ success = final_score > 0.8
98
+ rewards_str = ",".join([f"{r:.2f}" for r in rewards_history])
99
+ print(f"[END] success={str(success).lower()} steps={step_idx} score={final_score:.2f} rewards={rewards_str}")
100
+
101
+ if __name__ == "__main__":
102
+ run_inference()
local_validate.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -uo pipefail
3
+
4
+ PING_URL="http://localhost:7860"
5
+ REPO_DIR="."
6
+
7
+ echo "Step 1/3: Pinging HF Space ($PING_URL/reset) ..."
8
+ HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "Content-Type: application/json" -d '{}' "$PING_URL/reset" --max-time 30 2>/dev/null || printf "000")
9
+ echo "HTTP_CODE: $HTTP_CODE"
10
+
11
+ echo "Step 2/3: Running docker build ..."
12
+ docker build "$REPO_DIR"
13
+
14
+ echo "Step 3/3: Running openenv validate ..."
15
+ openenv validate
models.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, ConfigDict, Field
2
+ from typing import List, Literal, Optional
3
+ import openenv.core.env_server.types as openenv_types
4
+
5
+ class Observation(openenv_types.Observation):
6
+ ticket_id: str = Field(default="", description="Unique ID of the ticket")
7
+ customer_message: str = Field(default="", description="The current message from the customer")
8
+ history: List[str] = Field(default_factory=list, description="Conversation history")
9
+ missing_info: List[str] = Field(default_factory=list, description="Fields required before routing")
10
+ status: str = Field(default="OPEN", description="Ticket status")
11
+ refund_processed: bool = Field(default=False, description="True if a refund was already executed")
12
+
13
+ class Action(openenv_types.Action):
14
+ action_type: Literal["ROUTE", "ASK_INFO", "REFUND", "CLOSE"] = Field(..., description="Type of action to take")
15
+ argument: str = Field(..., description="The category to route to, the question to ask, or the order ID to refund")
16
+
17
+ class State(openenv_types.State):
18
+ ticket_id: str = Field(default="", description="Unique ID of the ticket")
19
+ customer_message: str = Field(default="", description="The current message from the customer")
20
+ history: List[str] = Field(default_factory=list, description="Conversation history")
21
+ missing_info: List[str] = Field(default_factory=list, description="Fields required before routing")
22
+ status: str = Field(default="OPEN", description="Ticket status")
23
+ refund_processed: bool = Field(default=False, description="True if a refund was already executed")
24
+
25
+ class Reward(BaseModel):
26
+ value: float = Field(..., description="Numerical reward between -1.0 and 1.0")
27
+ feedback: str = Field(..., description="Incremental feedback for the agent")
openenv.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: "customer-support-triage"
3
+ version: "1.0.0"
4
+ type: "space"
5
+ runtime: "fastapi"
6
+ app: "server.app:app"
7
+ port: 7860
8
+ description: "A real-world customer support simulation environment requiring information extraction, system actions, and routing."
9
+ tags: ["openenv", "text", "customer-support"]
10
+ tasks:
11
+ - easy_password_reset
12
+ - medium_hardware_issue
13
+ - hard_refund_processing
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "metapy"
3
+ version = "1.0.0"
4
+ description = "A real-world customer support simulation environment"
5
+ dependencies = [
6
+ "openenv-core",
7
+ "fastapi",
8
+ "uvicorn",
9
+ "openai>=1.12.0",
10
+ "pydantic>=2.0.0",
11
+ "python-dotenv>=1.0.0"
12
+ ]
13
+
14
+ [build-system]
15
+ requires = ["setuptools>=42", "wheel"]
16
+ build-backend = "setuptools.build_meta"
17
+
18
+ [tool.setuptools.packages.find]
19
+ where = ["."]
20
+
21
+ [project.scripts]
22
+ server = "server.app:main"
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai>=1.12.0
2
+ pydantic>=2.0.0
3
+ python-dotenv>=1.0.0
4
+ openenv-core>=0.1.0
5
+ fastapi>=0.100.0
6
+ uvicorn>=0.23.0
server/app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ import uvicorn
5
+ from openenv.core.env_server.http_server import create_app
6
+ from env import CustomerSupportEnv
7
+ from models import Action, Observation
8
+
9
+ # Use the environment class and Pydantic types to create the app
10
+ app = create_app(
11
+ CustomerSupportEnv,
12
+ Action,
13
+ Observation,
14
+ env_name="customer_support_env"
15
+ )
16
+
17
+ def main():
18
+ uvicorn.run(app, host="0.0.0.0", port=7860)
19
+
20
+ if __name__ == "__main__":
21
+ main()
tasks.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TaskDef:
2
+ def __init__(self, name: str, initial_msg: str, expected_route: str, required_info: list = None, needs_refund: bool = False):
3
+ self.name = name
4
+ self.initial_msg = initial_msg
5
+ self.expected_route = expected_route
6
+ self.required_info = required_info or[]
7
+ self.needs_refund = needs_refund
8
+
9
+ # Define 3 strict tasks (Easy, Medium, Hard)
10
+ TASKS =[
11
+ TaskDef(
12
+ name="easy_password_reset",
13
+ initial_msg="I forgot my password and cannot log in.",
14
+ expected_route="IT_SUPPORT"
15
+ ),
16
+ TaskDef(
17
+ name="medium_hardware_issue",
18
+ initial_msg="My laptop won't turn on.",
19
+ expected_route="HARDWARE_SUPPORT",
20
+ required_info=["serial_number"]
21
+ ),
22
+ TaskDef(
23
+ name="hard_refund_processing",
24
+ initial_msg="I want a refund for my recent purchase, it arrived broken.",
25
+ expected_route="BILLING",
26
+ required_info=["order_id", "photo_evidence"],
27
+ needs_refund=True
28
+ )
29
+ ]
30
+
31
+ def grader(task: TaskDef, final_state: dict) -> float:
32
+ """Deterministic programmatic grader returning 0.0 to 1.0"""
33
+ score = 0.0
34
+ total_checks = 1 + len(task.required_info) + (1 if task.needs_refund else 0)
35
+
36
+ if final_state.get("route") == task.expected_route:
37
+ score += 1.0
38
+
39
+ for info in task.required_info:
40
+ if info in final_state.get("collected_info",[]):
41
+ score += 1.0
42
+
43
+ if task.needs_refund and final_state.get("refund_processed", False):
44
+ score += 1.0
45
+
46
+ return score / total_checks
uv.lock ADDED
The diff for this file is too large to render. See raw diff