Spaces:

mathi3046
/

customer-support-env

Sleeping

App Files Files Community

customer-support-env / server /app.py

mathi3046

fix(ide): suppress IDE false positive import warnings in server/app.py

11c71eb about 1 month ago

raw

history blame contribute delete

7.47 kB

	"""
	FastAPI application exposing the Customer Support Environment
	via HTTP endpoints compatible with OpenEnv specification.

	Endpoints:
	POST /reset — Reset environment, returns initial observation
	POST /step — Execute an action, returns (obs, reward, done, info)
	GET /state — Get current internal state
	GET /health — Health check
	GET /tasks — List available tasks
	GET / — Environment info
	"""

	import sys
	import os

	# Ensure project root is on the path
	_project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	if _project_root not in sys.path:
	sys.path.insert(0, _project_root)

	from typing import Any, Dict, Optional

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field, field_validator

	from models import SupportAction, SupportObservation, SupportState, safe_score # type: ignore
	from server.environment import CustomerSupportEnvironment # type: ignore
	from tasks import TASK_IDS, TASKS # type: ignore


	# ──────────────────────────────────────────────────────────────────
	# Request / Response schemas
	# ──────────────────────────────────────────────────────────────────

	class ResetRequest(BaseModel):
	task_id: Optional[str] = Field(default="easy_faq", description="Task ID to load")
	seed: Optional[int] = Field(default=None, description="Random seed (unused)")


	class StepRequest(BaseModel):
	action: SupportAction = Field(..., description="Agent action")


	class StepResponse(BaseModel):
	"""Response from the /step endpoint.

	Uses an auto-clamping validator instead of gt/lt constraints.
	This prevents Pydantic from raising ValidationError on boundary
	values and ensures the evaluator NEVER receives 0.0 or 1.0.
	"""
	observation: SupportObservation
	reward: float = Field(default=0.01, description="Step reward in strict (0, 1)")
	done: bool
	info: Dict[str, Any]

	@field_validator("reward", mode="before")
	@classmethod
	def _clamp_reward(cls, v: Any) -> float:
	"""Auto-clamp reward to strict (0, 1)."""
	return safe_score(v)


	class TaskInfo(BaseModel):
	task_id: str
	name: str
	description: str
	difficulty: str
	max_steps: int


	# ──────────────────────────────────────────────────────────────────
	# App factory
	# ──────────────────────────────────────────────────────────────────

	app = FastAPI(
	title="Customer Support Environment — OpenEnv",
	description=(
	"AI-Powered Customer Support Ticket Resolution Environment. "
	"Train agents to handle real customer issues using step/reset/state APIs."
	),
	version="1.0.0",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global environment instance (single-agent mode for simplicity)
	env = CustomerSupportEnvironment()


	# ──────────────────────────────────────────────────────────────────
	# Endpoints
	# ──────────────────────────────────────────────────────────────────

	@app.get("/", tags=["info"])
	def root():
	"""Environment info and available endpoints."""
	return {
	"name": "customer_support_env",
	"version": "1.0.0",
	"description": "AI-Powered Customer Support Ticket Resolution Environment",
	"endpoints": {
	"POST /reset": "Reset environment with a task_id",
	"POST /step": "Execute an action",
	"GET /state": "Get current state",
	"GET /health": "Health check",
	"GET /tasks": "List available tasks",
	},
	"available_tasks": TASK_IDS,
	}


	@app.get("/health", tags=["health"])
	def health():
	"""Health check endpoint."""
	return {"status": "healthy", "environment": "customer_support_env"}


	@app.get("/tasks", response_model=list[TaskInfo], tags=["tasks"])
	def list_tasks():
	"""List all available tasks with metadata."""
	result = []
	for tid, task in TASKS.items():
	result.append(
	TaskInfo(
	task_id=tid,
	name=task["ticket"]["subject"],
	description=f"{task['difficulty'].value.upper()} — {task['ticket']['subject']}",
	difficulty=task["difficulty"].value,
	max_steps=task["max_steps"],
	)
	)
	return result


	@app.post("/reset", response_model=SupportObservation, tags=["environment"])
	def reset(request: ResetRequest = ResetRequest()):
	"""Reset the environment and return the initial observation."""
	try:
	obs = env.reset(task_id=request.task_id, seed=request.seed)
	return obs
	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))


	@app.post("/step", response_model=StepResponse, tags=["environment"])
	def step(request: StepRequest):
	"""Execute an agent action and return the result."""
	try:
	obs, reward, done, info = env.step(action=request.action)

	# Triple-safe: clamp reward via safe_score before passing to StepResponse
	# (StepResponse also has its own auto-clamping validator)
	clamped_reward = safe_score(reward)

	# Also clamp all scores inside reward_breakdown in info
	if "reward_breakdown" in info and isinstance(info["reward_breakdown"], dict):
	rb = info["reward_breakdown"]
	for key in ["correctness", "tone", "completeness", "efficiency", "total"]:
	if key in rb:
	rb[key] = safe_score(rb[key])

	return StepResponse(
	observation=obs,
	reward=clamped_reward,
	done=done,
	info=info,
	)
	except RuntimeError as e:
	raise HTTPException(status_code=400, detail=str(e))


	@app.get("/state", response_model=SupportState, tags=["environment"])
	def get_state():
	"""Get the current internal state of the environment."""
	return env.state()


	# ──────────────────────────────────────────────────────────────────
	# Entry point
	# ──────────────────────────────────────────────────────────────────

	def main():
	"""Run the server directly."""
	import uvicorn
	port = int(os.environ.get("PORT", 7860))
	host = os.environ.get("HOST", "0.0.0.0")
	uvicorn.run(app, host=host, port=port)


	if __name__ == "__main__":
	main()