Spaces:

mathi3046
/

openenv-meta

Sleeping

App Files Files Community

mathi3046 commited on Apr 8

Commit

fb78c46

0 Parent(s):

Initial commit

Browse files

Files changed (41) hide show

.env +0 -0
.gitattributes +35 -0
Dockerfile +17 -0
README.md +76 -0
__pycache__/server.cpython-314.pyc +0 -0
app.py +1 -0
app/__init__.py +1 -0
app/__pycache__/__init__.cpython-314.pyc +0 -0
app/__pycache__/env.cpython-314.pyc +0 -0
app/__pycache__/graders.cpython-314.pyc +0 -0
app/__pycache__/models.cpython-314.pyc +0 -0
app/__pycache__/reward.cpython-314.pyc +0 -0
app/__pycache__/tasks.cpython-314.pyc +0 -0
app/__pycache__/utils.cpython-314.pyc +0 -0
app/env.py +65 -0
app/graders.py +46 -0
app/models.py +20 -0
app/reward.py +58 -0
app/tasks.py +12 -0
app/utils.py +22 -0
app/validate.ps1 +21 -0
inference.py +131 -0
openenv.yaml +6 -0
pyproject.toml +21 -0
requirements.txt +4 -0
run.sh +9 -0
server.py +51 -0
server/app.py +8 -0
test_dir/test_env/README.md +255 -0
test_dir/test_env/__init__.py +16 -0
test_dir/test_env/client.py +99 -0
test_dir/test_env/models.py +27 -0
test_dir/test_env/openenv.yaml +7 -0
test_dir/test_env/pyproject.toml +45 -0
test_dir/test_env/server/Dockerfile +80 -0
test_dir/test_env/server/__init__.py +11 -0
test_dir/test_env/server/app.py +84 -0
test_dir/test_env/server/requirements.txt +6 -0
test_dir/test_env/server/test_env_environment.py +104 -0
uv.lock +0 -0
validate.sh +20 -0

.env ADDED Viewed

File without changes

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Stable base image (avoids Docker Hub auth issues)
+FROM python:3.9-slim-buster
+# Set working directory
+WORKDIR /app
+# Copy all files
+COPY . .
+# Upgrade pip
+RUN pip install --no-cache-dir --upgrade pip
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Run start script
+CMD ["bash", "run.sh"]

README.md ADDED Viewed

	@@ -0,0 +1,76 @@

+---
+title: SupportDeskEnv
+emoji: 🤖
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+pinned: false
+---
+# 🧠 SupportDeskEnv — OpenEnv for Real-World Customer Support AI
+SupportDeskEnv is a production-grade OpenEnv environment that simulates real-world customer support workflows, enabling evaluation of AI agents on decision-making, empathy, and multi-step reasoning.
+---
+## 🚀 Overview
+Modern AI agents must handle more than just tasks — they must:
+- Understand user intent
+- Respond with empathy
+- Take correct actions
+- Resolve issues efficiently
+SupportDeskEnv models this challenge by simulating realistic support tickets and evaluating agent behavior using structured rewards.
+---
+## 🌍 Real-World Use Case
+This environment replicates real customer support scenarios such as:
+- Login failures
+- Payment and billing issues
+- General account queries
+It captures:
+- Emotional context (e.g., frustrated users)
+- Multi-step interactions
+- Resolution workflows
+---
+## 🧩 OpenEnv Specification
+Fully compliant with OpenEnv:
+- ✅ Typed `Observation`, `Action`, `Reward` models (Pydantic)
+- ✅ `reset()`, `step()`, `state()` APIs
+- ✅ `openenv.yaml` included
+- ✅ Validated via `openenv validate`
+---
+## ⚙️ Action Space
+```json
+## ⚙️ Action Space
+```json
+{
+  "category": "billing | tech | general",
+  "response": "string",
+  "escalate": "boolean",
+  "resolve": "boolean"
+}
+## ⚙️ Action Space
+```json
+{
+  "category": "billing | tech | general",
+  "response": "string",
+  "escalate": "boolean",
+  "resolve": "boolean"
+}

__pycache__/server.cpython-314.pyc ADDED Viewed

Binary file (2.51 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from server import app

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Init for app

app/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (162 Bytes). View file

app/__pycache__/env.cpython-314.pyc ADDED Viewed

Binary file (4.68 kB). View file

app/__pycache__/graders.cpython-314.pyc ADDED Viewed

Binary file (3.45 kB). View file

app/__pycache__/models.cpython-314.pyc ADDED Viewed

Binary file (2.99 kB). View file

app/__pycache__/reward.cpython-314.pyc ADDED Viewed

Binary file (4.19 kB). View file

app/__pycache__/tasks.cpython-314.pyc ADDED Viewed

Binary file (1.47 kB). View file

app/__pycache__/utils.cpython-314.pyc ADDED Viewed

Binary file (1.53 kB). View file

app/env.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from typing import Dict, Any
+from .models import Observation, Action, Reward
+from .utils import generate_ticket
+from .tasks import get_task
+from .reward import calculate_reward
+from .graders import grade_easy, grade_medium, grade_hard
+class SupportDeskEnv:
+    def __init__(self):
+        self.state_data = None
+        self.expected_category = None
+        self.task_config = None
+    async def reset(self, level: str = "medium") -> Observation:
+        self.task_config = get_task(level)
+        ticket = generate_ticket(self.task_config.level)
+        self.expected_category = ticket["category"]
+        self.state_data = Observation(
+            ticket_id=ticket["id"],
+            user_message=ticket["message"],
+            sentiment=ticket["sentiment"],
+            history=[{"role": "user", "content": ticket["message"]}],
+            step_count=0,
+            task_level=self.task_config.level
+        )
+        return self.state_data
+    async def step(self, action: Action) -> Dict[str, Any]:
+        if not self.state_data:
+            await self.reset()
+        self.state_data.step_count += 1
+        self.state_data.history.append({"role": "agent", "content": action.response or ""})
+        # Pass max_steps to calculate_reward for the efficiency bonus
+        reward = calculate_reward(self.state_data, action, self.expected_category, self.task_config.max_steps)
+        done = action.resolve or action.escalate or self.state_data.step_count >= self.task_config.max_steps
+        # Grading based on level
+        if self.task_config.level == "easy":
+            task_score = grade_easy(action, self.expected_category)
+        elif self.task_config.level == "medium":
+            task_score = grade_medium(action, self.expected_category)
+        else:
+            task_score = grade_hard(action, self.state_data, self.expected_category)
+        reward.metrics["grader_score"] = task_score
+        if not done:
+            self.state_data.user_message = "Can you explain more?"
+            self.state_data.history.append({"role": "user", "content": self.state_data.user_message})
+        return {
+            "observation": self.state_data.dict(),
+            "reward": reward.dict(),
+            "done": done,
+            "info": {"expected_category": self.expected_category, "task_score": task_score}
+        }
+    async def state(self) -> Observation:
+        if not self.state_data:
+            await self.reset()
+        return self.state_data

app/graders.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from typing import Any
+def _safe_lower(val: Any) -> str:
+    return (val or "").lower()
+def grade_easy(action: Any, expected_category: str) -> float:
+    score = 0.0
+    category = _safe_lower(getattr(action, "category", ""))
+    if category == _safe_lower(expected_category):
+        score += 1.0
+    return float(max(0.0, min(1.0, score)))
+def grade_medium(action: Any, expected_category: str) -> float:
+    score = 0.0
+    category = _safe_lower(getattr(action, "category", ""))
+    response = _safe_lower(getattr(action, "response", ""))
+    if category == _safe_lower(expected_category):
+        score += 0.5
+    if len(response) > 20 and getattr(action, "resolve", False):
+        score += 0.5
+    return float(max(0.0, min(1.0, score)))
+def grade_hard(action: Any, state_data: Any, expected_category: str) -> float:
+    score = 0.0
+    category = _safe_lower(getattr(action, "category", ""))
+    response = _safe_lower(getattr(action, "response", ""))
+    sentiment = _safe_lower(getattr(state_data, "sentiment", ""))
+    if category == _safe_lower(expected_category):
+        score += 0.3
+    if getattr(action, "resolve", False) and len(response) > 30:
+        score += 0.3
+    # Strict rule for angry customers
+    if sentiment == "angry" and any(word in response for word in ["sorry", "apologize", "understand", "frustrating"]):
+        score += 0.4
+    elif sentiment != "angry":
+        score += 0.4
+    return float(max(0.0, min(1.0, score)))

app/models.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional
+class Observation(BaseModel):
+    ticket_id: str = Field(description="Unique ID for the support ticket")
+    user_message: str = Field(description="The latest message from the customer")
+    sentiment: str = Field(description="Customer sentiment (angry, frustrated, neutral, polite)")
+    history: List[dict] = Field(default_factory=list, description="Conversation history")
+    step_count: int = Field(default=0, description="Current step in the conversation")
+    task_level: str = Field(description="Task difficulty level (easy, medium, hard)")
+class Action(BaseModel):
+    category: Optional[str] = Field(default=None, description="Classified issue category (billing, tech, general)")
+    response: Optional[str] = Field(default="", description="Agent's response to the customer")
+    escalate: bool = Field(default=False, description="Whether to escalate the ticket to a human")
+    resolve: bool = Field(default=False, description="Whether the issue is considered resolved")
+class Reward(BaseModel):
+    score: float = Field(..., ge=0.0, le=1.0, description="Overall reward score")
+    metrics: dict = Field(default_factory=dict, description="Detailed metrics breakdown")

app/reward.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from .models import Action, Observation, Reward
+def calculate_reward(obs: Observation, action: Action, expected_category: str, max_steps: int) -> Reward:
+    score = 0.0
+    metrics = {}
+    # Correct category extraction
+    cat = (action.category or "").lower()
+    if cat and expected_category.lower() in cat:
+        score += 0.3
+        metrics["category_correct"] = 0.3
+    # Helpful & Empathetic check (Deterministic heuristic)
+    resp = (action.response or "").lower()
+    if resp:
+        if any(word in resp for word in ["sorry", "apologize", "understand", "help"]):
+            empathy_score = 0.2 if obs.sentiment in ["angry", "frustrated"] else 0.1
+            score += empathy_score
+            metrics["empathy"] = empathy_score
+        # Angry customer strict rule
+        if obs.sentiment == "angry" and not any(w in resp for w in ["sorry", "apologize", "understand"]):
+            score -= 0.25
+            metrics["angry_penalty"] = -0.25
+        # Anti-generic response penalty
+        generic_phrases = ["i will help you", "let me help", "i understand your issue"]
+        if any(phrase in resp for phrase in generic_phrases) and len(action.response or "") < 60:
+            score -= 0.1
+            metrics["generic_penalty"] = -0.1
+        if any(word in resp for word in ["step", "fix", "update", "here is", "resolved"]):
+            score += 0.3
+            metrics["helpfulness"] = 0.3
+        # Repetition penalty
+        past_responses = [msg["content"].lower() for msg in obs.history if msg["role"] == "agent"]
+        if resp in past_responses:
+            score -= 0.2
+            metrics["repetition_penalty"] = -0.2
+    # Penalties
+    if action.escalate:
+        score -= 0.1
+        metrics["escalation_penalty"] = -0.1
+    if action.resolve and not action.escalate:
+        score += 0.2
+        metrics["resolution_bonus"] = 0.2
+        # Efficiency bonus
+        if obs.step_count < max_steps:
+            efficiency_bonus = 0.1 * (max_steps - obs.step_count)
+            score += efficiency_bonus
+            metrics["efficiency_bonus"] = efficiency_bonus
+    final_score = float(max(0.0, min(1.0, score)))
+    return Reward(score=final_score, metrics=metrics)

app/tasks.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class TaskConfig:
+    def __init__(self, level: str, max_steps: int):
+        self.level = level
+        self.max_steps = max_steps
+EASY_TASK = TaskConfig(level="easy", max_steps=1)
+MEDIUM_TASK = TaskConfig(level="medium", max_steps=1)
+HARD_TASK = TaskConfig(level="hard", max_steps=5)
+def get_task(level: str) -> TaskConfig:
+    tasks = {"easy": EASY_TASK, "medium": MEDIUM_TASK, "hard": HARD_TASK}
+    return tasks.get(level.lower(), EASY_TASK)

app/utils.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import random
+import uuid
+ISSUE_TEMPLATES = {
+    "billing": ["I was overcharged on my last invoice.", "How do I update my credit card?", "Cancel my subscription."],
+    "tech": ["The app keeps crashing on startup.", "I can't log in to my account.", "API is returning 500 errors."],
+    "general": ["What are your business hours?", "Where can I find the documentation?", "Do you offer enterprise plans?"]
+}
+SENTIMENTS = ["angry", "frustrated", "neutral", "polite"]
+def generate_ticket(level: str):
+    category = random.choice(list(ISSUE_TEMPLATES.keys()))
+    message = random.choice(ISSUE_TEMPLATES[category])
+    sentiment = random.choice(SENTIMENTS)
+    return {
+        "id": f"TKT-{uuid.uuid4().hex[:8].upper()}",
+        "category": category,
+        "message": message,
+        "sentiment": sentiment,
+        "level": level
+    }

app/validate.ps1 ADDED Viewed

	@@ -0,0 +1,21 @@

+Write-Host "Running validation..."
+# Step 1: OpenEnv validation
+C:\Users\gobin\AppData\Roaming\Python\Python314\Scripts\openenv.exe validate
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "❌ OpenEnv validation failed"
+    exit 1
+}
+# Step 2: Docker build
+docker build -t supportdesk_env .
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "❌ Docker build failed"
+    exit 1
+}
+# ✅ Final Success Banner
+Write-Host "========================================"
+Write-Host "  All 3/3 checks passed!"
+Write-Host "  Your submission is ready to submit."
+Write-Host "========================================"

inference.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+import requests
+import json
+from openai import OpenAI
+# =============================
+# ENV VARIABLES (STRICT)
+# =============================
+API_BASE_URL = os.environ.get("API_BASE_URL")
+API_KEY = os.environ.get("API_KEY")
+MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
+ENV_URL = "http://localhost:7860"
+# =============================
+# 🔥 GLOBAL LLM CLIENT
+# =============================
+try:
+    client = OpenAI(
+        base_url=API_BASE_URL,
+        api_key=API_KEY
+    )
+    print("✅ LLM CLIENT INITIALIZED", flush=True)
+except Exception as e:
+    print(f"❌ CLIENT INIT FAILED: {e}", flush=True)
+    client = None
+# =============================
+# LLM ACTION (RESPONSES API)
+# =============================
+def get_llm_action(issue):
+    try:
+        if client is None:
+            raise Exception("Client not initialized")
+        print("🚀 CALLING LLM...", flush=True)
+        response = client.responses.create(
+            model=MODEL_NAME,
+            input=issue
+        )
+        print("✅ LLM RESPONSE RECEIVED", flush=True)
+        # Extract text safely
+        output_text = ""
+        try:
+            output_text = response.output[0].content[0].text
+        except:
+            output_text = "I'll help you with this."
+        return {
+            "category": "tech",
+            "response": output_text[:120],
+            "escalate": False,
+            "resolve": True
+        }
+    except Exception as e:
+        print(f"❌ LLM ERROR: {e}", flush=True)
+        return {
+            "category": "tech",
+            "response": "Temporary issue. Please try again.",
+            "escalate": False,
+            "resolve": True
+        }
+# =============================
+# MAIN INFERENCE
+# =============================
+def run_inference(level="easy"):
+    print(f"[START] task=supportdesk_{level} env=SupportDeskEnv model={MODEL_NAME}", flush=True)
+    total_reward = 0.0
+    steps_taken = 0
+    done = False
+    try:
+        res = requests.post(f"{ENV_URL}/reset", json={"level": level}, timeout=5)
+        obs = res.json().get("observation", {})
+        while not done and steps_taken < 5:
+            steps_taken += 1
+            issue = obs.get("user_message", "Help needed")
+            # 🔥 ALWAYS CALL LLM
+            action = get_llm_action(issue)
+            step_res = requests.post(
+                f"{ENV_URL}/step",
+                json=action,
+                timeout=5
+            ).json()
+            reward = step_res.get("reward", {}).get("score", 0.0)
+            done = step_res.get("done", False)
+            obs = step_res.get("observation", {})
+            total_reward += reward
+            print(
+                f"[STEP] step={steps_taken} action={json.dumps(action)} reward={reward} done={done}",
+                flush=True
+            )
+        score = total_reward / max(steps_taken, 1)
+        score = min(max(score, 0.0), 1.0)
+        success = score >= 0.6
+        print(
+            f"[END] success={success} steps={steps_taken} score={score}",
+            flush=True
+        )
+    except Exception as e:
+        print(
+            f"[END] success=False steps={steps_taken} score=0.0 error={e}",
+            flush=True
+        )
+# =============================
+# ENTRY POINT
+# =============================
+if __name__ == "__main__":
+    run_inference("easy")
+    run_inference("medium")
+    run_inference("hard")

openenv.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+name: "SupportDeskEnv"
+version: "1.0.0"
+description: "AI Customer Support Simulation Environment"
+entrypoint: "server.py"
+interface: "fastapi"
+port: 7860

pyproject.toml ADDED Viewed

	@@ -0,0 +1,21 @@

+[project]
+name = "supportdesk-env"
+version = "0.1.0"
+description = "AI Customer Support Simulation Environment using OpenEnv"
+requires-python = ">=3.10"
+authors = [
+    { name = "Gobinath" }
+]
+dependencies = [
+    "fastapi",
+    "uvicorn",
+    "pydantic",
+    "openai",
+    "openenv-core>=0.2.0"
+]
+[project.scripts]
+server = "server.app:main"
+[tool.openenv]
+entrypoint = "server.app:main"

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+requests==2.31.0
+openai>=1.40.0
+fastapi
+uvicorn

run.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+#!/bin/bash
+# Start the FastAPI server in the background
+python server.py &
+# Wait for the server to initialize
+sleep 3
+# Run the inference tasks
+python inference.py
+# Keep the container running by waiting for background processes
+wait

server.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from fastapi import FastAPI
+from app.env import SupportDeskEnv
+from app.models import Action
+app = FastAPI(title="SupportDeskEnv")
+env = SupportDeskEnv()
+# -----------------------------
+# ROOT CHECK
+# -----------------------------
+@app.get("/")
+def root():
+    return {"message": "SupportDeskEnv is running 🚀"}
+# -----------------------------
+# RESET (FIXED - BODY OPTIONAL)
+# -----------------------------
+@app.post("/reset")
+async def reset(req: dict = {}):
+    level = req.get("level", "medium")
+    obs = await env.reset(level)
+    return {"observation": obs.dict()}
+# Optional GET (for browser testing)
+@app.get("/reset")
+async def reset_get():
+    obs = await env.reset("medium")
+    return {"observation": obs.dict()}
+# -----------------------------
+# STEP
+# -----------------------------
+@app.post("/step")
+async def step(action: Action):
+    result = await env.step(action)
+    return result
+# -----------------------------
+# STATE
+# -----------------------------
+@app.get("/state")
+async def state():
+    obs = await env.state()
+    return {"observation": obs.dict()}
+# -----------------------------
+# RUN SERVER
+# -----------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

server/app.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from server import app
+import uvicorn
+def main():
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+if __name__ == '__main__':
+    main()

test_dir/test_env/README.md ADDED Viewed

	@@ -0,0 +1,255 @@

+---
+title: Test Env Environment Server
+emoji: 📺
+colorFrom: red
+colorTo: yellow
+sdk: docker
+pinned: false
+app_port: 8000
+base_path: /web
+tags:
+  - openenv
+---
+# Test Env Environment
+A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
+## Quick Start
+The simplest way to use the Test Env environment is through the `TestEnv` class:
+```python
+from test_env import TestAction, TestEnv
+try:
+    # Create environment from Docker image
+    test_envenv = TestEnv.from_docker_image("test_env-env:latest")
+    # Reset
+    result = test_envenv.reset()
+    print(f"Reset: {result.observation.echoed_message}")
+    # Send multiple messages
+    messages = ["Hello, World!", "Testing echo", "Final message"]
+    for msg in messages:
+        result = test_envenv.step(TestAction(message=msg))
+        print(f"Sent: '{msg}'")
+        print(f"  → Echoed: '{result.observation.echoed_message}'")
+        print(f"  → Length: {result.observation.message_length}")
+        print(f"  → Reward: {result.reward}")
+finally:
+    # Always clean up
+    test_envenv.close()
+```
+That's it! The `TestEnv.from_docker_image()` method handles:
+- Starting the Docker container
+- Waiting for the server to be ready
+- Connecting to the environment
+- Container cleanup when you call `close()`
+## Building the Docker Image
+Before using the environment, you need to build the Docker image:
+```bash
+# From project root
+docker build -t test_env-env:latest -f server/Dockerfile .
+```
+## Deploying to Hugging Face Spaces
+You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
+```bash
+# From the environment directory (where openenv.yaml is located)
+openenv push
+# Or specify options
+openenv push --namespace my-org --private
+```
+The `openenv push` command will:
+1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
+2. Prepare a custom build for Hugging Face Docker space (enables web interface)
+3. Upload to Hugging Face (ensuring you're logged in)
+### Prerequisites
+- Authenticate with Hugging Face: The command will prompt for login if not already authenticated
+### Options
+- `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
+- `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
+- `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
+- `--private`: Deploy the space as private (default: public)
+### Examples
+```bash
+# Push to your personal namespace (defaults to username/env-name from openenv.yaml)
+openenv push
+# Push to a specific repository
+openenv push --repo-id my-org/my-env
+# Push with a custom base image
+openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
+# Push as a private space
+openenv push --private
+# Combine options
+openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
+```
+After deployment, your space will be available at:
+`https://huggingface.co/spaces/<repo-id>`
+The deployed space includes:
+- **Web Interface** at `/web` - Interactive UI for exploring the environment
+- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
+- **Health Check** at `/health` - Container health monitoring
+- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
+## Environment Details
+### Action
+**TestAction**: Contains a single field
+- `message` (str) - The message to echo back
+### Observation
+**TestObservation**: Contains the echo response and metadata
+- `echoed_message` (str) - The message echoed back
+- `message_length` (int) - Length of the message
+- `reward` (float) - Reward based on message length (length × 0.1)
+- `done` (bool) - Always False for echo environment
+- `metadata` (dict) - Additional info like step count
+### Reward
+The reward is calculated as: `message_length × 0.1`
+- "Hi" → reward: 0.2
+- "Hello, World!" → reward: 1.3
+- Empty message → reward: 0.0
+## Advanced Usage
+### Connecting to an Existing Server
+If you already have a Test Env environment server running, you can connect directly:
+```python
+from test_env import TestEnv
+# Connect to existing server
+test_envenv = TestEnv(base_url="<ENV_HTTP_URL_HERE>")
+# Use as normal
+result = test_envenv.reset()
+result = test_envenv.step(TestAction(message="Hello!"))
+```
+Note: When connecting to an existing server, `test_envenv.close()` will NOT stop the server.
+### Using the Context Manager
+The client supports context manager usage for automatic connection management:
+```python
+from test_env import TestAction, TestEnv
+# Connect with context manager (auto-connects and closes)
+with TestEnv(base_url="http://localhost:8000") as env:
+    result = env.reset()
+    print(f"Reset: {result.observation.echoed_message}")
+    # Multiple steps with low latency
+    for msg in ["Hello", "World", "!"]:
+        result = env.step(TestAction(message=msg))
+        print(f"Echoed: {result.observation.echoed_message}")
+```
+The client uses WebSocket connections for:
+- **Lower latency**: No HTTP connection overhead per request
+- **Persistent session**: Server maintains your environment state
+- **Efficient for episodes**: Better for many sequential steps
+### Concurrent WebSocket Sessions
+The server supports multiple concurrent WebSocket connections. To enable this,
+modify `server/app.py` to use factory mode:
+```python
+# In server/app.py - use factory mode for concurrent sessions
+app = create_app(
+    TestEnvironment,  # Pass class, not instance
+    TestAction,
+    TestObservation,
+    max_concurrent_envs=4,  # Allow 4 concurrent sessions
+)
+```
+Then multiple clients can connect simultaneously:
+```python
+from test_env import TestAction, TestEnv
+from concurrent.futures import ThreadPoolExecutor
+def run_episode(client_id: int):
+    with TestEnv(base_url="http://localhost:8000") as env:
+        result = env.reset()
+        for i in range(10):
+            result = env.step(TestAction(message=f"Client {client_id}, step {i}"))
+        return client_id, result.observation.message_length
+# Run 4 episodes concurrently
+with ThreadPoolExecutor(max_workers=4) as executor:
+    results = list(executor.map(run_episode, range(4)))
+```
+## Development & Testing
+### Direct Environment Testing
+Test the environment logic directly without starting the HTTP server:
+```bash
+# From the server directory
+python3 server/test_env_environment.py
+```
+This verifies that:
+- Environment resets correctly
+- Step executes actions properly
+- State tracking works
+- Rewards are calculated correctly
+### Running Locally
+Run the server locally for development:
+```bash
+uvicorn server.app:app --reload
+```
+## Project Structure
+```
+test_env/
+├── .dockerignore         # Docker build exclusions
+├── __init__.py            # Module exports
+├── README.md              # This file
+├── openenv.yaml           # OpenEnv manifest
+├── pyproject.toml         # Project metadata and dependencies
+├── uv.lock                # Locked dependencies (generated)
+├── client.py              # TestEnv client
+├── models.py              # Action and Observation models
+└── server/
+    ├── __init__.py        # Server module exports
+    ├── test_env_environment.py  # Core environment logic
+    ├── app.py             # FastAPI application (HTTP + WebSocket endpoints)
+    └── Dockerfile         # Container image definition
+```

test_dir/test_env/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Test Env Environment."""
+from .client import TestEnv
+from .models import TestAction, TestObservation
+__all__ = [
+    "TestAction",
+    "TestObservation",
+    "TestEnv",
+]

test_dir/test_env/client.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Test Env Environment Client."""
+from typing import Dict
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult
+from openenv.core.env_server.types import State
+from .models import TestAction, TestObservation
+class TestEnv(
+    EnvClient[TestAction, TestObservation, State]
+):
+    """
+    Client for the Test Env Environment.
+    This client maintains a persistent WebSocket connection to the environment server,
+    enabling efficient multi-step interactions with lower latency.
+    Each client instance has its own dedicated environment session on the server.
+    Example:
+        >>> # Connect to a running server
+        >>> with TestEnv(base_url="http://localhost:8000") as client:
+        ...     result = client.reset()
+        ...     print(result.observation.echoed_message)
+        ...
+        ...     result = client.step(TestAction(message="Hello!"))
+        ...     print(result.observation.echoed_message)
+    Example with Docker:
+        >>> # Automatically start container and connect
+        >>> client = TestEnv.from_docker_image("test_env-env:latest")
+        >>> try:
+        ...     result = client.reset()
+        ...     result = client.step(TestAction(message="Test"))
+        ... finally:
+        ...     client.close()
+    """
+    def _step_payload(self, action: TestAction) -> Dict:
+        """
+        Convert TestAction to JSON payload for step message.
+        Args:
+            action: TestAction instance
+        Returns:
+            Dictionary representation suitable for JSON encoding
+        """
+        return {
+            "message": action.message,
+        }
+    def _parse_result(self, payload: Dict) -> StepResult[TestObservation]:
+        """
+        Parse server response into StepResult[TestObservation].
+        Args:
+            payload: JSON response data from server
+        Returns:
+            StepResult with TestObservation
+        """
+        obs_data = payload.get("observation", {})
+        observation = TestObservation(
+            echoed_message=obs_data.get("echoed_message", ""),
+            message_length=obs_data.get("message_length", 0),
+            done=payload.get("done", False),
+            reward=payload.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict) -> State:
+        """
+        Parse server response into State object.
+        Args:
+            payload: JSON response from state request
+        Returns:
+            State object with episode_id and step_count
+        """
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )

test_dir/test_env/models.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Data models for the Test Env Environment.
+The test_env environment is a simple test environment that echoes back messages.
+"""
+from openenv.core.env_server.types import Action, Observation
+from pydantic import Field
+class TestAction(Action):
+    """Action for the Test Env environment - just a message to echo."""
+    message: str = Field(..., description="Message to echo back")
+class TestObservation(Observation):
+    """Observation from the Test Env environment - the echoed message."""
+    echoed_message: str = Field(default="", description="The echoed message")
+    message_length: int = Field(default=0, description="Length of the echoed message")

test_dir/test_env/openenv.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+spec_version: 1
+name: test_env
+type: space
+runtime: fastapi
+app: server.app:app
+port: 8000

test_dir/test_env/pyproject.toml ADDED Viewed

	@@ -0,0 +1,45 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+[build-system]
+requires = ["setuptools>=45", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "openenv-test_env"
+version = "0.1.0"
+description = "Test Env environment for OpenEnv"
+requires-python = ">=3.10"
+dependencies = [
+    # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
+    # install from github
+    # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
+    "openenv-core[core]>=0.2.2",
+    # Environment-specific dependencies
+    # Add all dependencies needed for your environment here
+    # Examples:
+    # "numpy>=1.19.0",
+    # "torch>=2.0.0",
+    # "gymnasium>=0.29.0",
+    # "openspiel>=1.0.0",
+    # "smolagents>=1.22.0,<2",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-cov>=4.0.0",
+]
+[project.scripts]
+# Server entry point - enables running via: uv run --project . server
+# or: python -m test_env.server.app
+server = "test_env.server.app:main"
+[tool.setuptools]
+include-package-data = true
+packages = ["test_env", "test_env.server"]
+package-dir = { "test_env" = ".", "test_env.server" = "server" }

test_dir/test_env/server/Dockerfile ADDED Viewed

	@@ -0,0 +1,80 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# Multi-stage build using openenv-base
+# This Dockerfile is flexible and works for both:
+# - In-repo environments (with local OpenEnv sources)
+# - Standalone environments (with openenv from PyPI/Git)
+# The build script (openenv build) handles context detection and sets appropriate build args.
+ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
+FROM ${BASE_IMAGE} AS builder
+WORKDIR /app
+# Ensure git is available (required for installing dependencies from VCS)
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+# Build argument to control whether we're building standalone or in-repo
+ARG BUILD_MODE=in-repo
+ARG ENV_NAME=test_env
+# Copy environment code (always at root of build context)
+COPY . /app/env
+# For in-repo builds, openenv is already vendored in the build context
+# For standalone builds, openenv will be installed via pyproject.toml
+WORKDIR /app/env
+# Ensure uv is available (for local builds where base image lacks it)
+RUN if ! command -v uv >/dev/null 2>&1; then \
+        curl -LsSf https://astral.sh/uv/install.sh | sh && \
+        mv /root/.local/bin/uv /usr/local/bin/uv && \
+        mv /root/.local/bin/uvx /usr/local/bin/uvx; \
+    fi
+# Install dependencies using uv sync
+# If uv.lock exists, use it; otherwise resolve on the fly
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-install-project --no-editable; \
+    else \
+        uv sync --no-install-project --no-editable; \
+    fi
+RUN --mount=type=cache,target=/root/.cache/uv \
+    if [ -f uv.lock ]; then \
+        uv sync --frozen --no-editable; \
+    else \
+        uv sync --no-editable; \
+    fi
+# Final runtime stage
+FROM ${BASE_IMAGE}
+WORKDIR /app
+# Copy the virtual environment from builder
+COPY --from=builder /app/env/.venv /app/.venv
+# Copy the environment code
+COPY --from=builder /app/env /app/env
+# Set PATH to use the virtual environment
+ENV PATH="/app/.venv/bin:$PATH"
+# Set PYTHONPATH so imports work correctly
+ENV PYTHONPATH="/app/env:$PYTHONPATH"
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+# Run the FastAPI server
+# The module path is constructed to work with the /app/env structure
+CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]

test_dir/test_env/server/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""Test Env environment server components."""
+from .test_env_environment import TestEnvironment
+__all__ = ["TestEnvironment"]

test_dir/test_env/server/app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+FastAPI application for the Test Env Environment.
+This module creates an HTTP server that exposes the TestEnvironment
+over HTTP and WebSocket endpoints, compatible with EnvClient.
+Endpoints:
+    - POST /reset: Reset the environment
+    - POST /step: Execute an action
+    - GET /state: Get current environment state
+    - GET /schema: Get action/observation schemas
+    - WS /ws: WebSocket endpoint for persistent sessions
+Usage:
+    # Development (with auto-reload):
+    uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
+    # Production:
+    uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
+    # Or run directly:
+    python -m server.app
+"""
+try:
+    from openenv.core.env_server.http_server import create_app
+except Exception as e:  # pragma: no cover
+    raise ImportError(
+        "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
+    ) from e
+try:
+    from ..models import TestAction, TestObservation
+    from .test_env_environment import TestEnvironment
+except ModuleNotFoundError:
+    from models import TestAction, TestObservation
+    from server.test_env_environment import TestEnvironment
+# Create the app with web interface and README integration
+app = create_app(
+    TestEnvironment,
+    TestAction,
+    TestObservation,
+    env_name="test_env",
+    max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
+)
+def main(host: str = "0.0.0.0", port: int = 8000):
+    """
+    Entry point for direct execution via uv run or python -m.
+    This function enables running the server without Docker:
+        uv run --project . server
+        uv run --project . server --port 8001
+        python -m test_env.server.app
+    Args:
+        host: Host address to bind to (default: "0.0.0.0")
+        port: Port number to listen on (default: 8000)
+    For production deployments, consider using uvicorn directly with
+    multiple workers:
+        uvicorn test_env.server.app:app --workers 4
+    """
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=8000)
+    args = parser.parse_args()
+    main(port=args.port)

test_dir/test_env/server/requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+openenv[core]>=0.2.0
+fastapi>=0.115.0
+uvicorn>=0.24.0

test_dir/test_env/server/test_env_environment.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Test Env Environment Implementation.
+A simple test environment that echoes back messages sent to it.
+Perfect for testing HTTP server infrastructure.
+"""
+from uuid import uuid4
+from openenv.core.env_server.interfaces import Environment
+from openenv.core.env_server.types import State
+try:
+    from ..models import TestAction, TestObservation
+except ImportError:
+    from models import TestAction, TestObservation
+class TestEnvironment(Environment):
+    """
+    A simple echo environment that echoes back messages.
+    This environment is designed for testing the HTTP server infrastructure.
+    It maintains minimal state and simply echoes back whatever message it receives.
+    Example:
+        >>> env = TestEnvironment()
+        >>> obs = env.reset()
+        >>> print(obs.echoed_message)  # "Test Env environment ready!"
+        >>>
+        >>> obs = env.step(TestAction(message="Hello"))
+        >>> print(obs.echoed_message)  # "Hello"
+        >>> print(obs.message_length)  # 5
+    """
+    # Enable concurrent WebSocket sessions.
+    # Set to True if your environment isolates state between instances.
+    # When True, multiple WebSocket clients can connect simultaneously, each
+    # getting their own environment instance (when using factory mode in app.py).
+    SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    def __init__(self):
+        """Initialize the test_env environment."""
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._reset_count = 0
+    def reset(self) -> TestObservation:
+        """
+        Reset the environment.
+        Returns:
+            TestObservation with a ready message
+        """
+        self._state = State(episode_id=str(uuid4()), step_count=0)
+        self._reset_count += 1
+        return TestObservation(
+            echoed_message="Test Env environment ready!",
+            message_length=0,
+            done=False,
+            reward=0.0,
+        )
+    def step(self, action: TestAction) -> TestObservation:  # type: ignore[override]
+        """
+        Execute a step in the environment by echoing the message.
+        Args:
+            action: TestAction containing the message to echo
+        Returns:
+            TestObservation with the echoed message and its length
+        """
+        self._state.step_count += 1
+        message = action.message
+        length = len(message)
+        # Simple reward: longer messages get higher rewards
+        reward = length * 0.1
+        return TestObservation(
+            echoed_message=message,
+            message_length=length,
+            done=False,
+            reward=reward,
+            metadata={"original_message": message, "step": self._state.step_count},
+        )
+    @property
+    def state(self) -> State:
+        """
+        Get the current environment state.
+        Returns:
+            Current State with episode_id and step_count
+        """
+        return self._state

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

validate.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/bin/bash
+set -e
+BOLD="\033[1m"
+GREEN="\033[32m"
+NC="\033[0m"
+echo "Running Check 1/3: openenv validate..."
+python -m uv run openenv validate
+echo "Running Check 2/3: docker build..."
+docker build -t supportdesk_env .
+echo "Running Check 3/3: Python syntax check..."
+python -m uv run python -m py_compile server.py
+printf "${BOLD}========================================${NC}\n"
+printf "${GREEN}${BOLD}  All 3/3 checks passed!${NC}\n"
+printf "${GREEN}${BOLD}  Your submission is ready to submit.${NC}\n"
+printf "${BOLD}========================================${NC}\n"