mathi3046 commited on
Commit
fb78c46
·
0 Parent(s):

Initial commit

Browse files
.env ADDED
File without changes
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stable base image (avoids Docker Hub auth issues)
2
+ FROM python:3.9-slim-buster
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy all files
8
+ COPY . .
9
+
10
+ # Upgrade pip
11
+ RUN pip install --no-cache-dir --upgrade pip
12
+
13
+ # Install dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Run start script
17
+ CMD ["bash", "run.sh"]
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SupportDeskEnv
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # 🧠 SupportDeskEnv — OpenEnv for Real-World Customer Support AI
12
+
13
+ SupportDeskEnv is a production-grade OpenEnv environment that simulates real-world customer support workflows, enabling evaluation of AI agents on decision-making, empathy, and multi-step reasoning.
14
+
15
+ ---
16
+
17
+ ## 🚀 Overview
18
+
19
+ Modern AI agents must handle more than just tasks — they must:
20
+ - Understand user intent
21
+ - Respond with empathy
22
+ - Take correct actions
23
+ - Resolve issues efficiently
24
+
25
+ SupportDeskEnv models this challenge by simulating realistic support tickets and evaluating agent behavior using structured rewards.
26
+
27
+ ---
28
+
29
+ ## 🌍 Real-World Use Case
30
+
31
+ This environment replicates real customer support scenarios such as:
32
+ - Login failures
33
+ - Payment and billing issues
34
+ - General account queries
35
+
36
+ It captures:
37
+ - Emotional context (e.g., frustrated users)
38
+ - Multi-step interactions
39
+ - Resolution workflows
40
+
41
+ ---
42
+
43
+ ## 🧩 OpenEnv Specification
44
+
45
+ Fully compliant with OpenEnv:
46
+
47
+ - ✅ Typed `Observation`, `Action`, `Reward` models (Pydantic)
48
+ - ✅ `reset()`, `step()`, `state()` APIs
49
+ - ✅ `openenv.yaml` included
50
+ - ✅ Validated via `openenv validate`
51
+
52
+ ---
53
+
54
+ ## ⚙️ Action Space
55
+
56
+ ```json
57
+ ## ⚙️ Action Space
58
+
59
+ ```json
60
+ {
61
+ "category": "billing | tech | general",
62
+ "response": "string",
63
+ "escalate": "boolean",
64
+ "resolve": "boolean"
65
+ }
66
+
67
+ ## ⚙️ Action Space
68
+
69
+ ```json
70
+ {
71
+ "category": "billing | tech | general",
72
+ "response": "string",
73
+ "escalate": "boolean",
74
+ "resolve": "boolean"
75
+ }
76
+
__pycache__/server.cpython-314.pyc ADDED
Binary file (2.51 kB). View file
 
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from server import app
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Init for app
app/__pycache__/__init__.cpython-314.pyc ADDED
Binary file (162 Bytes). View file
 
app/__pycache__/env.cpython-314.pyc ADDED
Binary file (4.68 kB). View file
 
app/__pycache__/graders.cpython-314.pyc ADDED
Binary file (3.45 kB). View file
 
app/__pycache__/models.cpython-314.pyc ADDED
Binary file (2.99 kB). View file
 
app/__pycache__/reward.cpython-314.pyc ADDED
Binary file (4.19 kB). View file
 
app/__pycache__/tasks.cpython-314.pyc ADDED
Binary file (1.47 kB). View file
 
app/__pycache__/utils.cpython-314.pyc ADDED
Binary file (1.53 kB). View file
 
app/env.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from .models import Observation, Action, Reward
3
+ from .utils import generate_ticket
4
+ from .tasks import get_task
5
+ from .reward import calculate_reward
6
+ from .graders import grade_easy, grade_medium, grade_hard
7
+
8
+ class SupportDeskEnv:
9
+ def __init__(self):
10
+ self.state_data = None
11
+ self.expected_category = None
12
+ self.task_config = None
13
+
14
+ async def reset(self, level: str = "medium") -> Observation:
15
+ self.task_config = get_task(level)
16
+ ticket = generate_ticket(self.task_config.level)
17
+ self.expected_category = ticket["category"]
18
+
19
+ self.state_data = Observation(
20
+ ticket_id=ticket["id"],
21
+ user_message=ticket["message"],
22
+ sentiment=ticket["sentiment"],
23
+ history=[{"role": "user", "content": ticket["message"]}],
24
+ step_count=0,
25
+ task_level=self.task_config.level
26
+ )
27
+ return self.state_data
28
+
29
+ async def step(self, action: Action) -> Dict[str, Any]:
30
+ if not self.state_data:
31
+ await self.reset()
32
+
33
+ self.state_data.step_count += 1
34
+ self.state_data.history.append({"role": "agent", "content": action.response or ""})
35
+
36
+ # Pass max_steps to calculate_reward for the efficiency bonus
37
+ reward = calculate_reward(self.state_data, action, self.expected_category, self.task_config.max_steps)
38
+
39
+ done = action.resolve or action.escalate or self.state_data.step_count >= self.task_config.max_steps
40
+
41
+ # Grading based on level
42
+ if self.task_config.level == "easy":
43
+ task_score = grade_easy(action, self.expected_category)
44
+ elif self.task_config.level == "medium":
45
+ task_score = grade_medium(action, self.expected_category)
46
+ else:
47
+ task_score = grade_hard(action, self.state_data, self.expected_category)
48
+
49
+ reward.metrics["grader_score"] = task_score
50
+
51
+ if not done:
52
+ self.state_data.user_message = "Can you explain more?"
53
+ self.state_data.history.append({"role": "user", "content": self.state_data.user_message})
54
+
55
+ return {
56
+ "observation": self.state_data.dict(),
57
+ "reward": reward.dict(),
58
+ "done": done,
59
+ "info": {"expected_category": self.expected_category, "task_score": task_score}
60
+ }
61
+
62
+ async def state(self) -> Observation:
63
+ if not self.state_data:
64
+ await self.reset()
65
+ return self.state_data
app/graders.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ def _safe_lower(val: Any) -> str:
4
+ return (val or "").lower()
5
+
6
+ def grade_easy(action: Any, expected_category: str) -> float:
7
+ score = 0.0
8
+ category = _safe_lower(getattr(action, "category", ""))
9
+
10
+ if category == _safe_lower(expected_category):
11
+ score += 1.0
12
+
13
+ return float(max(0.0, min(1.0, score)))
14
+
15
+ def grade_medium(action: Any, expected_category: str) -> float:
16
+ score = 0.0
17
+ category = _safe_lower(getattr(action, "category", ""))
18
+ response = _safe_lower(getattr(action, "response", ""))
19
+
20
+ if category == _safe_lower(expected_category):
21
+ score += 0.5
22
+
23
+ if len(response) > 20 and getattr(action, "resolve", False):
24
+ score += 0.5
25
+
26
+ return float(max(0.0, min(1.0, score)))
27
+
28
+ def grade_hard(action: Any, state_data: Any, expected_category: str) -> float:
29
+ score = 0.0
30
+ category = _safe_lower(getattr(action, "category", ""))
31
+ response = _safe_lower(getattr(action, "response", ""))
32
+ sentiment = _safe_lower(getattr(state_data, "sentiment", ""))
33
+
34
+ if category == _safe_lower(expected_category):
35
+ score += 0.3
36
+
37
+ if getattr(action, "resolve", False) and len(response) > 30:
38
+ score += 0.3
39
+
40
+ # Strict rule for angry customers
41
+ if sentiment == "angry" and any(word in response for word in ["sorry", "apologize", "understand", "frustrating"]):
42
+ score += 0.4
43
+ elif sentiment != "angry":
44
+ score += 0.4
45
+
46
+ return float(max(0.0, min(1.0, score)))
app/models.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+
4
+ class Observation(BaseModel):
5
+ ticket_id: str = Field(description="Unique ID for the support ticket")
6
+ user_message: str = Field(description="The latest message from the customer")
7
+ sentiment: str = Field(description="Customer sentiment (angry, frustrated, neutral, polite)")
8
+ history: List[dict] = Field(default_factory=list, description="Conversation history")
9
+ step_count: int = Field(default=0, description="Current step in the conversation")
10
+ task_level: str = Field(description="Task difficulty level (easy, medium, hard)")
11
+
12
+ class Action(BaseModel):
13
+ category: Optional[str] = Field(default=None, description="Classified issue category (billing, tech, general)")
14
+ response: Optional[str] = Field(default="", description="Agent's response to the customer")
15
+ escalate: bool = Field(default=False, description="Whether to escalate the ticket to a human")
16
+ resolve: bool = Field(default=False, description="Whether the issue is considered resolved")
17
+
18
+ class Reward(BaseModel):
19
+ score: float = Field(..., ge=0.0, le=1.0, description="Overall reward score")
20
+ metrics: dict = Field(default_factory=dict, description="Detailed metrics breakdown")
app/reward.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .models import Action, Observation, Reward
2
+
3
+ def calculate_reward(obs: Observation, action: Action, expected_category: str, max_steps: int) -> Reward:
4
+ score = 0.0
5
+ metrics = {}
6
+
7
+ # Correct category extraction
8
+ cat = (action.category or "").lower()
9
+ if cat and expected_category.lower() in cat:
10
+ score += 0.3
11
+ metrics["category_correct"] = 0.3
12
+
13
+ # Helpful & Empathetic check (Deterministic heuristic)
14
+ resp = (action.response or "").lower()
15
+ if resp:
16
+ if any(word in resp for word in ["sorry", "apologize", "understand", "help"]):
17
+ empathy_score = 0.2 if obs.sentiment in ["angry", "frustrated"] else 0.1
18
+ score += empathy_score
19
+ metrics["empathy"] = empathy_score
20
+
21
+ # Angry customer strict rule
22
+ if obs.sentiment == "angry" and not any(w in resp for w in ["sorry", "apologize", "understand"]):
23
+ score -= 0.25
24
+ metrics["angry_penalty"] = -0.25
25
+
26
+ # Anti-generic response penalty
27
+ generic_phrases = ["i will help you", "let me help", "i understand your issue"]
28
+ if any(phrase in resp for phrase in generic_phrases) and len(action.response or "") < 60:
29
+ score -= 0.1
30
+ metrics["generic_penalty"] = -0.1
31
+
32
+ if any(word in resp for word in ["step", "fix", "update", "here is", "resolved"]):
33
+ score += 0.3
34
+ metrics["helpfulness"] = 0.3
35
+
36
+ # Repetition penalty
37
+ past_responses = [msg["content"].lower() for msg in obs.history if msg["role"] == "agent"]
38
+ if resp in past_responses:
39
+ score -= 0.2
40
+ metrics["repetition_penalty"] = -0.2
41
+
42
+ # Penalties
43
+ if action.escalate:
44
+ score -= 0.1
45
+ metrics["escalation_penalty"] = -0.1
46
+
47
+ if action.resolve and not action.escalate:
48
+ score += 0.2
49
+ metrics["resolution_bonus"] = 0.2
50
+
51
+ # Efficiency bonus
52
+ if obs.step_count < max_steps:
53
+ efficiency_bonus = 0.1 * (max_steps - obs.step_count)
54
+ score += efficiency_bonus
55
+ metrics["efficiency_bonus"] = efficiency_bonus
56
+
57
+ final_score = float(max(0.0, min(1.0, score)))
58
+ return Reward(score=final_score, metrics=metrics)
app/tasks.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TaskConfig:
2
+ def __init__(self, level: str, max_steps: int):
3
+ self.level = level
4
+ self.max_steps = max_steps
5
+
6
+ EASY_TASK = TaskConfig(level="easy", max_steps=1)
7
+ MEDIUM_TASK = TaskConfig(level="medium", max_steps=1)
8
+ HARD_TASK = TaskConfig(level="hard", max_steps=5)
9
+
10
+ def get_task(level: str) -> TaskConfig:
11
+ tasks = {"easy": EASY_TASK, "medium": MEDIUM_TASK, "hard": HARD_TASK}
12
+ return tasks.get(level.lower(), EASY_TASK)
app/utils.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import uuid
3
+
4
+ ISSUE_TEMPLATES = {
5
+ "billing": ["I was overcharged on my last invoice.", "How do I update my credit card?", "Cancel my subscription."],
6
+ "tech": ["The app keeps crashing on startup.", "I can't log in to my account.", "API is returning 500 errors."],
7
+ "general": ["What are your business hours?", "Where can I find the documentation?", "Do you offer enterprise plans?"]
8
+ }
9
+ SENTIMENTS = ["angry", "frustrated", "neutral", "polite"]
10
+
11
+ def generate_ticket(level: str):
12
+ category = random.choice(list(ISSUE_TEMPLATES.keys()))
13
+ message = random.choice(ISSUE_TEMPLATES[category])
14
+ sentiment = random.choice(SENTIMENTS)
15
+
16
+ return {
17
+ "id": f"TKT-{uuid.uuid4().hex[:8].upper()}",
18
+ "category": category,
19
+ "message": message,
20
+ "sentiment": sentiment,
21
+ "level": level
22
+ }
app/validate.ps1 ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Write-Host "Running validation..."
2
+
3
+ # Step 1: OpenEnv validation
4
+ C:\Users\gobin\AppData\Roaming\Python\Python314\Scripts\openenv.exe validate
5
+ if ($LASTEXITCODE -ne 0) {
6
+ Write-Host "❌ OpenEnv validation failed"
7
+ exit 1
8
+ }
9
+
10
+ # Step 2: Docker build
11
+ docker build -t supportdesk_env .
12
+ if ($LASTEXITCODE -ne 0) {
13
+ Write-Host "❌ Docker build failed"
14
+ exit 1
15
+ }
16
+
17
+ # ✅ Final Success Banner
18
+ Write-Host "========================================"
19
+ Write-Host " All 3/3 checks passed!"
20
+ Write-Host " Your submission is ready to submit."
21
+ Write-Host "========================================"
inference.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ from openai import OpenAI
5
+
6
+ # =============================
7
+ # ENV VARIABLES (STRICT)
8
+ # =============================
9
+ API_BASE_URL = os.environ.get("API_BASE_URL")
10
+ API_KEY = os.environ.get("API_KEY")
11
+ MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
12
+
13
+ ENV_URL = "http://localhost:7860"
14
+
15
+ # =============================
16
+ # 🔥 GLOBAL LLM CLIENT
17
+ # =============================
18
+ try:
19
+ client = OpenAI(
20
+ base_url=API_BASE_URL,
21
+ api_key=API_KEY
22
+ )
23
+ print("✅ LLM CLIENT INITIALIZED", flush=True)
24
+ except Exception as e:
25
+ print(f"❌ CLIENT INIT FAILED: {e}", flush=True)
26
+ client = None
27
+
28
+
29
+ # =============================
30
+ # LLM ACTION (RESPONSES API)
31
+ # =============================
32
+ def get_llm_action(issue):
33
+ try:
34
+ if client is None:
35
+ raise Exception("Client not initialized")
36
+
37
+ print("🚀 CALLING LLM...", flush=True)
38
+
39
+ response = client.responses.create(
40
+ model=MODEL_NAME,
41
+ input=issue
42
+ )
43
+
44
+ print("✅ LLM RESPONSE RECEIVED", flush=True)
45
+
46
+ # Extract text safely
47
+ output_text = ""
48
+ try:
49
+ output_text = response.output[0].content[0].text
50
+ except:
51
+ output_text = "I'll help you with this."
52
+
53
+ return {
54
+ "category": "tech",
55
+ "response": output_text[:120],
56
+ "escalate": False,
57
+ "resolve": True
58
+ }
59
+
60
+ except Exception as e:
61
+ print(f"❌ LLM ERROR: {e}", flush=True)
62
+ return {
63
+ "category": "tech",
64
+ "response": "Temporary issue. Please try again.",
65
+ "escalate": False,
66
+ "resolve": True
67
+ }
68
+
69
+
70
+ # =============================
71
+ # MAIN INFERENCE
72
+ # =============================
73
+ def run_inference(level="easy"):
74
+ print(f"[START] task=supportdesk_{level} env=SupportDeskEnv model={MODEL_NAME}", flush=True)
75
+
76
+ total_reward = 0.0
77
+ steps_taken = 0
78
+ done = False
79
+
80
+ try:
81
+ res = requests.post(f"{ENV_URL}/reset", json={"level": level}, timeout=5)
82
+ obs = res.json().get("observation", {})
83
+
84
+ while not done and steps_taken < 5:
85
+ steps_taken += 1
86
+
87
+ issue = obs.get("user_message", "Help needed")
88
+
89
+ # 🔥 ALWAYS CALL LLM
90
+ action = get_llm_action(issue)
91
+
92
+ step_res = requests.post(
93
+ f"{ENV_URL}/step",
94
+ json=action,
95
+ timeout=5
96
+ ).json()
97
+
98
+ reward = step_res.get("reward", {}).get("score", 0.0)
99
+ done = step_res.get("done", False)
100
+ obs = step_res.get("observation", {})
101
+
102
+ total_reward += reward
103
+
104
+ print(
105
+ f"[STEP] step={steps_taken} action={json.dumps(action)} reward={reward} done={done}",
106
+ flush=True
107
+ )
108
+
109
+ score = total_reward / max(steps_taken, 1)
110
+ score = min(max(score, 0.0), 1.0)
111
+ success = score >= 0.6
112
+
113
+ print(
114
+ f"[END] success={success} steps={steps_taken} score={score}",
115
+ flush=True
116
+ )
117
+
118
+ except Exception as e:
119
+ print(
120
+ f"[END] success=False steps={steps_taken} score=0.0 error={e}",
121
+ flush=True
122
+ )
123
+
124
+
125
+ # =============================
126
+ # ENTRY POINT
127
+ # =============================
128
+ if __name__ == "__main__":
129
+ run_inference("easy")
130
+ run_inference("medium")
131
+ run_inference("hard")
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ name: "SupportDeskEnv"
2
+ version: "1.0.0"
3
+ description: "AI Customer Support Simulation Environment"
4
+ entrypoint: "server.py"
5
+ interface: "fastapi"
6
+ port: 7860
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "supportdesk-env"
3
+ version = "0.1.0"
4
+ description = "AI Customer Support Simulation Environment using OpenEnv"
5
+ requires-python = ">=3.10"
6
+ authors = [
7
+ { name = "Gobinath" }
8
+ ]
9
+ dependencies = [
10
+ "fastapi",
11
+ "uvicorn",
12
+ "pydantic",
13
+ "openai",
14
+ "openenv-core>=0.2.0"
15
+ ]
16
+
17
+ [project.scripts]
18
+ server = "server.app:main"
19
+
20
+ [tool.openenv]
21
+ entrypoint = "server.app:main"
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ requests==2.31.0
2
+ openai>=1.40.0
3
+ fastapi
4
+ uvicorn
run.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Start the FastAPI server in the background
3
+ python server.py &
4
+ # Wait for the server to initialize
5
+ sleep 3
6
+ # Run the inference tasks
7
+ python inference.py
8
+ # Keep the container running by waiting for background processes
9
+ wait
server.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.env import SupportDeskEnv
3
+ from app.models import Action
4
+
5
+ app = FastAPI(title="SupportDeskEnv")
6
+ env = SupportDeskEnv()
7
+
8
+ # -----------------------------
9
+ # ROOT CHECK
10
+ # -----------------------------
11
+ @app.get("/")
12
+ def root():
13
+ return {"message": "SupportDeskEnv is running 🚀"}
14
+
15
+ # -----------------------------
16
+ # RESET (FIXED - BODY OPTIONAL)
17
+ # -----------------------------
18
+ @app.post("/reset")
19
+ async def reset(req: dict = {}):
20
+ level = req.get("level", "medium")
21
+ obs = await env.reset(level)
22
+ return {"observation": obs.dict()}
23
+
24
+ # Optional GET (for browser testing)
25
+ @app.get("/reset")
26
+ async def reset_get():
27
+ obs = await env.reset("medium")
28
+ return {"observation": obs.dict()}
29
+
30
+ # -----------------------------
31
+ # STEP
32
+ # -----------------------------
33
+ @app.post("/step")
34
+ async def step(action: Action):
35
+ result = await env.step(action)
36
+ return result
37
+
38
+ # -----------------------------
39
+ # STATE
40
+ # -----------------------------
41
+ @app.get("/state")
42
+ async def state():
43
+ obs = await env.state()
44
+ return {"observation": obs.dict()}
45
+
46
+ # -----------------------------
47
+ # RUN SERVER
48
+ # -----------------------------
49
+ if __name__ == "__main__":
50
+ import uvicorn
51
+ uvicorn.run(app, host="0.0.0.0", port=7860)
server/app.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from server import app
2
+ import uvicorn
3
+
4
+ def main():
5
+ uvicorn.run(app, host="0.0.0.0", port=7860)
6
+
7
+ if __name__ == '__main__':
8
+ main()
test_dir/test_env/README.md ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Test Env Environment Server
3
+ emoji: 📺
4
+ colorFrom: red
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
+ ---
13
+
14
+ # Test Env Environment
15
+
16
+ A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
17
+
18
+ ## Quick Start
19
+
20
+ The simplest way to use the Test Env environment is through the `TestEnv` class:
21
+
22
+ ```python
23
+ from test_env import TestAction, TestEnv
24
+
25
+ try:
26
+ # Create environment from Docker image
27
+ test_envenv = TestEnv.from_docker_image("test_env-env:latest")
28
+
29
+ # Reset
30
+ result = test_envenv.reset()
31
+ print(f"Reset: {result.observation.echoed_message}")
32
+
33
+ # Send multiple messages
34
+ messages = ["Hello, World!", "Testing echo", "Final message"]
35
+
36
+ for msg in messages:
37
+ result = test_envenv.step(TestAction(message=msg))
38
+ print(f"Sent: '{msg}'")
39
+ print(f" → Echoed: '{result.observation.echoed_message}'")
40
+ print(f" → Length: {result.observation.message_length}")
41
+ print(f" → Reward: {result.reward}")
42
+
43
+ finally:
44
+ # Always clean up
45
+ test_envenv.close()
46
+ ```
47
+
48
+ That's it! The `TestEnv.from_docker_image()` method handles:
49
+ - Starting the Docker container
50
+ - Waiting for the server to be ready
51
+ - Connecting to the environment
52
+ - Container cleanup when you call `close()`
53
+
54
+ ## Building the Docker Image
55
+
56
+ Before using the environment, you need to build the Docker image:
57
+
58
+ ```bash
59
+ # From project root
60
+ docker build -t test_env-env:latest -f server/Dockerfile .
61
+ ```
62
+
63
+ ## Deploying to Hugging Face Spaces
64
+
65
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
66
+
67
+ ```bash
68
+ # From the environment directory (where openenv.yaml is located)
69
+ openenv push
70
+
71
+ # Or specify options
72
+ openenv push --namespace my-org --private
73
+ ```
74
+
75
+ The `openenv push` command will:
76
+ 1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
77
+ 2. Prepare a custom build for Hugging Face Docker space (enables web interface)
78
+ 3. Upload to Hugging Face (ensuring you're logged in)
79
+
80
+ ### Prerequisites
81
+
82
+ - Authenticate with Hugging Face: The command will prompt for login if not already authenticated
83
+
84
+ ### Options
85
+
86
+ - `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
87
+ - `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
88
+ - `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
89
+ - `--private`: Deploy the space as private (default: public)
90
+
91
+ ### Examples
92
+
93
+ ```bash
94
+ # Push to your personal namespace (defaults to username/env-name from openenv.yaml)
95
+ openenv push
96
+
97
+ # Push to a specific repository
98
+ openenv push --repo-id my-org/my-env
99
+
100
+ # Push with a custom base image
101
+ openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
102
+
103
+ # Push as a private space
104
+ openenv push --private
105
+
106
+ # Combine options
107
+ openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
108
+ ```
109
+
110
+ After deployment, your space will be available at:
111
+ `https://huggingface.co/spaces/<repo-id>`
112
+
113
+ The deployed space includes:
114
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
115
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
116
+ - **Health Check** at `/health` - Container health monitoring
117
+ - **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
118
+
119
+ ## Environment Details
120
+
121
+ ### Action
122
+ **TestAction**: Contains a single field
123
+ - `message` (str) - The message to echo back
124
+
125
+ ### Observation
126
+ **TestObservation**: Contains the echo response and metadata
127
+ - `echoed_message` (str) - The message echoed back
128
+ - `message_length` (int) - Length of the message
129
+ - `reward` (float) - Reward based on message length (length × 0.1)
130
+ - `done` (bool) - Always False for echo environment
131
+ - `metadata` (dict) - Additional info like step count
132
+
133
+ ### Reward
134
+ The reward is calculated as: `message_length × 0.1`
135
+ - "Hi" → reward: 0.2
136
+ - "Hello, World!" → reward: 1.3
137
+ - Empty message → reward: 0.0
138
+
139
+ ## Advanced Usage
140
+
141
+ ### Connecting to an Existing Server
142
+
143
+ If you already have a Test Env environment server running, you can connect directly:
144
+
145
+ ```python
146
+ from test_env import TestEnv
147
+
148
+ # Connect to existing server
149
+ test_envenv = TestEnv(base_url="<ENV_HTTP_URL_HERE>")
150
+
151
+ # Use as normal
152
+ result = test_envenv.reset()
153
+ result = test_envenv.step(TestAction(message="Hello!"))
154
+ ```
155
+
156
+ Note: When connecting to an existing server, `test_envenv.close()` will NOT stop the server.
157
+
158
+ ### Using the Context Manager
159
+
160
+ The client supports context manager usage for automatic connection management:
161
+
162
+ ```python
163
+ from test_env import TestAction, TestEnv
164
+
165
+ # Connect with context manager (auto-connects and closes)
166
+ with TestEnv(base_url="http://localhost:8000") as env:
167
+ result = env.reset()
168
+ print(f"Reset: {result.observation.echoed_message}")
169
+ # Multiple steps with low latency
170
+ for msg in ["Hello", "World", "!"]:
171
+ result = env.step(TestAction(message=msg))
172
+ print(f"Echoed: {result.observation.echoed_message}")
173
+ ```
174
+
175
+ The client uses WebSocket connections for:
176
+ - **Lower latency**: No HTTP connection overhead per request
177
+ - **Persistent session**: Server maintains your environment state
178
+ - **Efficient for episodes**: Better for many sequential steps
179
+
180
+ ### Concurrent WebSocket Sessions
181
+
182
+ The server supports multiple concurrent WebSocket connections. To enable this,
183
+ modify `server/app.py` to use factory mode:
184
+
185
+ ```python
186
+ # In server/app.py - use factory mode for concurrent sessions
187
+ app = create_app(
188
+ TestEnvironment, # Pass class, not instance
189
+ TestAction,
190
+ TestObservation,
191
+ max_concurrent_envs=4, # Allow 4 concurrent sessions
192
+ )
193
+ ```
194
+
195
+ Then multiple clients can connect simultaneously:
196
+
197
+ ```python
198
+ from test_env import TestAction, TestEnv
199
+ from concurrent.futures import ThreadPoolExecutor
200
+
201
+ def run_episode(client_id: int):
202
+ with TestEnv(base_url="http://localhost:8000") as env:
203
+ result = env.reset()
204
+ for i in range(10):
205
+ result = env.step(TestAction(message=f"Client {client_id}, step {i}"))
206
+ return client_id, result.observation.message_length
207
+
208
+ # Run 4 episodes concurrently
209
+ with ThreadPoolExecutor(max_workers=4) as executor:
210
+ results = list(executor.map(run_episode, range(4)))
211
+ ```
212
+
213
+ ## Development & Testing
214
+
215
+ ### Direct Environment Testing
216
+
217
+ Test the environment logic directly without starting the HTTP server:
218
+
219
+ ```bash
220
+ # From the server directory
221
+ python3 server/test_env_environment.py
222
+ ```
223
+
224
+ This verifies that:
225
+ - Environment resets correctly
226
+ - Step executes actions properly
227
+ - State tracking works
228
+ - Rewards are calculated correctly
229
+
230
+ ### Running Locally
231
+
232
+ Run the server locally for development:
233
+
234
+ ```bash
235
+ uvicorn server.app:app --reload
236
+ ```
237
+
238
+ ## Project Structure
239
+
240
+ ```
241
+ test_env/
242
+ ├── .dockerignore # Docker build exclusions
243
+ ├── __init__.py # Module exports
244
+ ├── README.md # This file
245
+ ├── openenv.yaml # OpenEnv manifest
246
+ ├── pyproject.toml # Project metadata and dependencies
247
+ ├── uv.lock # Locked dependencies (generated)
248
+ ├── client.py # TestEnv client
249
+ ├── models.py # Action and Observation models
250
+ └── server/
251
+ ├── __init__.py # Server module exports
252
+ ├── test_env_environment.py # Core environment logic
253
+ ├── app.py # FastAPI application (HTTP + WebSocket endpoints)
254
+ └── Dockerfile # Container image definition
255
+ ```
test_dir/test_env/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Test Env Environment."""
8
+
9
+ from .client import TestEnv
10
+ from .models import TestAction, TestObservation
11
+
12
+ __all__ = [
13
+ "TestAction",
14
+ "TestObservation",
15
+ "TestEnv",
16
+ ]
test_dir/test_env/client.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Test Env Environment Client."""
8
+
9
+ from typing import Dict
10
+
11
+ from openenv.core import EnvClient
12
+ from openenv.core.client_types import StepResult
13
+ from openenv.core.env_server.types import State
14
+
15
+ from .models import TestAction, TestObservation
16
+
17
+
18
+ class TestEnv(
19
+ EnvClient[TestAction, TestObservation, State]
20
+ ):
21
+ """
22
+ Client for the Test Env Environment.
23
+
24
+ This client maintains a persistent WebSocket connection to the environment server,
25
+ enabling efficient multi-step interactions with lower latency.
26
+ Each client instance has its own dedicated environment session on the server.
27
+
28
+ Example:
29
+ >>> # Connect to a running server
30
+ >>> with TestEnv(base_url="http://localhost:8000") as client:
31
+ ... result = client.reset()
32
+ ... print(result.observation.echoed_message)
33
+ ...
34
+ ... result = client.step(TestAction(message="Hello!"))
35
+ ... print(result.observation.echoed_message)
36
+
37
+ Example with Docker:
38
+ >>> # Automatically start container and connect
39
+ >>> client = TestEnv.from_docker_image("test_env-env:latest")
40
+ >>> try:
41
+ ... result = client.reset()
42
+ ... result = client.step(TestAction(message="Test"))
43
+ ... finally:
44
+ ... client.close()
45
+ """
46
+
47
+ def _step_payload(self, action: TestAction) -> Dict:
48
+ """
49
+ Convert TestAction to JSON payload for step message.
50
+
51
+ Args:
52
+ action: TestAction instance
53
+
54
+ Returns:
55
+ Dictionary representation suitable for JSON encoding
56
+ """
57
+ return {
58
+ "message": action.message,
59
+ }
60
+
61
+ def _parse_result(self, payload: Dict) -> StepResult[TestObservation]:
62
+ """
63
+ Parse server response into StepResult[TestObservation].
64
+
65
+ Args:
66
+ payload: JSON response data from server
67
+
68
+ Returns:
69
+ StepResult with TestObservation
70
+ """
71
+ obs_data = payload.get("observation", {})
72
+ observation = TestObservation(
73
+ echoed_message=obs_data.get("echoed_message", ""),
74
+ message_length=obs_data.get("message_length", 0),
75
+ done=payload.get("done", False),
76
+ reward=payload.get("reward"),
77
+ metadata=obs_data.get("metadata", {}),
78
+ )
79
+
80
+ return StepResult(
81
+ observation=observation,
82
+ reward=payload.get("reward"),
83
+ done=payload.get("done", False),
84
+ )
85
+
86
+ def _parse_state(self, payload: Dict) -> State:
87
+ """
88
+ Parse server response into State object.
89
+
90
+ Args:
91
+ payload: JSON response from state request
92
+
93
+ Returns:
94
+ State object with episode_id and step_count
95
+ """
96
+ return State(
97
+ episode_id=payload.get("episode_id"),
98
+ step_count=payload.get("step_count", 0),
99
+ )
test_dir/test_env/models.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Test Env Environment.
9
+
10
+ The test_env environment is a simple test environment that echoes back messages.
11
+ """
12
+
13
+ from openenv.core.env_server.types import Action, Observation
14
+ from pydantic import Field
15
+
16
+
17
+ class TestAction(Action):
18
+ """Action for the Test Env environment - just a message to echo."""
19
+
20
+ message: str = Field(..., description="Message to echo back")
21
+
22
+
23
+ class TestObservation(Observation):
24
+ """Observation from the Test Env environment - the echoed message."""
25
+
26
+ echoed_message: str = Field(default="", description="The echoed message")
27
+ message_length: int = Field(default=0, description="Length of the echoed message")
test_dir/test_env/openenv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: test_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
7
+
test_dir/test_env/pyproject.toml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-test_env"
13
+ version = "0.1.0"
14
+ description = "Test Env environment for OpenEnv"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
18
+ # install from github
19
+ # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
20
+ "openenv-core[core]>=0.2.2",
21
+ # Environment-specific dependencies
22
+ # Add all dependencies needed for your environment here
23
+ # Examples:
24
+ # "numpy>=1.19.0",
25
+ # "torch>=2.0.0",
26
+ # "gymnasium>=0.29.0",
27
+ # "openspiel>=1.0.0",
28
+ # "smolagents>=1.22.0,<2",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "pytest>=8.0.0",
34
+ "pytest-cov>=4.0.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ # Server entry point - enables running via: uv run --project . server
39
+ # or: python -m test_env.server.app
40
+ server = "test_env.server.app:main"
41
+
42
+ [tool.setuptools]
43
+ include-package-data = true
44
+ packages = ["test_env", "test_env.server"]
45
+ package-dir = { "test_env" = ".", "test_env.server" = "server" }
test_dir/test_env/server/Dockerfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=test_env
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ # Health check
75
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
76
+ CMD curl -f http://localhost:8000/health || exit 1
77
+
78
+ # Run the FastAPI server
79
+ # The module path is constructed to work with the /app/env structure
80
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
test_dir/test_env/server/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Test Env environment server components."""
8
+
9
+ from .test_env_environment import TestEnvironment
10
+
11
+ __all__ = ["TestEnvironment"]
test_dir/test_env/server/app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Test Env Environment.
9
+
10
+ This module creates an HTTP server that exposes the TestEnvironment
11
+ over HTTP and WebSocket endpoints, compatible with EnvClient.
12
+
13
+ Endpoints:
14
+ - POST /reset: Reset the environment
15
+ - POST /step: Execute an action
16
+ - GET /state: Get current environment state
17
+ - GET /schema: Get action/observation schemas
18
+ - WS /ws: WebSocket endpoint for persistent sessions
19
+
20
+ Usage:
21
+ # Development (with auto-reload):
22
+ uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
23
+
24
+ # Production:
25
+ uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
26
+
27
+ # Or run directly:
28
+ python -m server.app
29
+ """
30
+
31
+ try:
32
+ from openenv.core.env_server.http_server import create_app
33
+ except Exception as e: # pragma: no cover
34
+ raise ImportError(
35
+ "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
36
+ ) from e
37
+
38
+ try:
39
+ from ..models import TestAction, TestObservation
40
+ from .test_env_environment import TestEnvironment
41
+ except ModuleNotFoundError:
42
+ from models import TestAction, TestObservation
43
+ from server.test_env_environment import TestEnvironment
44
+
45
+
46
+ # Create the app with web interface and README integration
47
+ app = create_app(
48
+ TestEnvironment,
49
+ TestAction,
50
+ TestObservation,
51
+ env_name="test_env",
52
+ max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
53
+ )
54
+
55
+
56
+ def main(host: str = "0.0.0.0", port: int = 8000):
57
+ """
58
+ Entry point for direct execution via uv run or python -m.
59
+
60
+ This function enables running the server without Docker:
61
+ uv run --project . server
62
+ uv run --project . server --port 8001
63
+ python -m test_env.server.app
64
+
65
+ Args:
66
+ host: Host address to bind to (default: "0.0.0.0")
67
+ port: Port number to listen on (default: 8000)
68
+
69
+ For production deployments, consider using uvicorn directly with
70
+ multiple workers:
71
+ uvicorn test_env.server.app:app --workers 4
72
+ """
73
+ import uvicorn
74
+
75
+ uvicorn.run(app, host=host, port=port)
76
+
77
+
78
+ if __name__ == "__main__":
79
+ import argparse
80
+
81
+ parser = argparse.ArgumentParser()
82
+ parser.add_argument("--port", type=int, default=8000)
83
+ args = parser.parse_args()
84
+ main(port=args.port)
test_dir/test_env/server/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
4
+
5
+
6
+
test_dir/test_env/server/test_env_environment.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Test Env Environment Implementation.
9
+
10
+ A simple test environment that echoes back messages sent to it.
11
+ Perfect for testing HTTP server infrastructure.
12
+ """
13
+
14
+ from uuid import uuid4
15
+
16
+ from openenv.core.env_server.interfaces import Environment
17
+ from openenv.core.env_server.types import State
18
+
19
+ try:
20
+ from ..models import TestAction, TestObservation
21
+ except ImportError:
22
+ from models import TestAction, TestObservation
23
+
24
+
25
+ class TestEnvironment(Environment):
26
+ """
27
+ A simple echo environment that echoes back messages.
28
+
29
+ This environment is designed for testing the HTTP server infrastructure.
30
+ It maintains minimal state and simply echoes back whatever message it receives.
31
+
32
+ Example:
33
+ >>> env = TestEnvironment()
34
+ >>> obs = env.reset()
35
+ >>> print(obs.echoed_message) # "Test Env environment ready!"
36
+ >>>
37
+ >>> obs = env.step(TestAction(message="Hello"))
38
+ >>> print(obs.echoed_message) # "Hello"
39
+ >>> print(obs.message_length) # 5
40
+ """
41
+
42
+ # Enable concurrent WebSocket sessions.
43
+ # Set to True if your environment isolates state between instances.
44
+ # When True, multiple WebSocket clients can connect simultaneously, each
45
+ # getting their own environment instance (when using factory mode in app.py).
46
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
47
+
48
+ def __init__(self):
49
+ """Initialize the test_env environment."""
50
+ self._state = State(episode_id=str(uuid4()), step_count=0)
51
+ self._reset_count = 0
52
+
53
+ def reset(self) -> TestObservation:
54
+ """
55
+ Reset the environment.
56
+
57
+ Returns:
58
+ TestObservation with a ready message
59
+ """
60
+ self._state = State(episode_id=str(uuid4()), step_count=0)
61
+ self._reset_count += 1
62
+
63
+ return TestObservation(
64
+ echoed_message="Test Env environment ready!",
65
+ message_length=0,
66
+ done=False,
67
+ reward=0.0,
68
+ )
69
+
70
+ def step(self, action: TestAction) -> TestObservation: # type: ignore[override]
71
+ """
72
+ Execute a step in the environment by echoing the message.
73
+
74
+ Args:
75
+ action: TestAction containing the message to echo
76
+
77
+ Returns:
78
+ TestObservation with the echoed message and its length
79
+ """
80
+ self._state.step_count += 1
81
+
82
+ message = action.message
83
+ length = len(message)
84
+
85
+ # Simple reward: longer messages get higher rewards
86
+ reward = length * 0.1
87
+
88
+ return TestObservation(
89
+ echoed_message=message,
90
+ message_length=length,
91
+ done=False,
92
+ reward=reward,
93
+ metadata={"original_message": message, "step": self._state.step_count},
94
+ )
95
+
96
+ @property
97
+ def state(self) -> State:
98
+ """
99
+ Get the current environment state.
100
+
101
+ Returns:
102
+ Current State with episode_id and step_count
103
+ """
104
+ return self._state
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
validate.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ BOLD="\033[1m"
5
+ GREEN="\033[32m"
6
+ NC="\033[0m"
7
+
8
+ echo "Running Check 1/3: openenv validate..."
9
+ python -m uv run openenv validate
10
+
11
+ echo "Running Check 2/3: docker build..."
12
+ docker build -t supportdesk_env .
13
+
14
+ echo "Running Check 3/3: Python syntax check..."
15
+ python -m uv run python -m py_compile server.py
16
+
17
+ printf "${BOLD}========================================${NC}\n"
18
+ printf "${GREEN}${BOLD} All 3/3 checks passed!${NC}\n"
19
+ printf "${GREEN}${BOLD} Your submission is ready to submit.${NC}\n"
20
+ printf "${BOLD}========================================${NC}\n"