ambiguity-env / server.py
Yaser77
feat: implement dynamic constraint system with stochastic generation and UI visualization
e461841
"""
server.py
Minimal FastAPI server for Hugging Face Spaces deployment.
Endpoints:
GET / β†’ health check
POST /reset β†’ reset environment, return initial observation
POST /step β†’ send action, return observation + reward + done
GET /state β†’ return current internal state (for debugging)
POST /run_task β†’ run a full episode with a named task
"""
import os
import sys
from typing import Any, Dict, Optional
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel
load_dotenv()
from env.env import AmbiguityEnv
from models.models import Action
from tasks.tasks import TASKS, get_task
# ── App setup ────────────────────────────────────────────────────────────────
app = FastAPI(
title="Ambiguity Resolution Environment",
description="OpenEnv-compliant environment for the Meta OpenEnv Hackathon.",
version="1.0.0",
)
# One shared env instance (stateful per container)
env = AmbiguityEnv()
# ── Request / Response models ────────────────────────────────────────────────
class ResetRequest(BaseModel):
task_name: Optional[str] = "hard_ambiguous" # default to hardest task
instruction: Optional[str] = None # custom instruction override
class StepRequest(BaseModel):
type: str
question: Optional[str] = None
proposed_time: Optional[str] = None
proposed_participants: Optional[list] = None
proposed_location: Optional[str] = None
class RunTaskRequest(BaseModel):
task_name: Optional[str] = "hard_ambiguous"
# ── Helpers ──────────────────────────────────────────────────────────────────
def _obs_to_dict(obs) -> Dict[str, Any]:
return {
"instruction": obs.instruction,
"known_info": obs.known_info,
"constraints": obs.constraints,
"conversation_history": [
{"role": m.role, "content": m.content}
for m in obs.conversation_history
],
"last_response": obs.last_response,
}
def _task_list() -> list:
return [{"name": t["name"], "ambiguity": t["ambiguity"], "missing": t["missing_fields"]}
for t in TASKS]
# ── Routes ───────────────────────────────────────────────────────────────────
@app.get("/", response_class=HTMLResponse)
def root():
"""HTML Landing Page that embeds the Interactive UI Demo"""
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Ambiguity Resolution Environment</title>
<style>
body {
background-color: #0f172a;
color: #e2e8f0;
font-family: 'Inter', system-ui, sans-serif;
margin: 0;
padding: 0;
display: flex;
align-items: center;
justify-content: center;
min-height: 100vh;
}
.container {
max-width: 650px;
width: 100%;
text-align: center;
padding: 50px 40px;
background-color: #1e293b;
border-radius: 16px;
box-shadow: 0 20px 40px rgba(0,0,0,0.5);
border: 1px solid #334155;
}
.title {
font-size: 2.5em;
font-weight: 800;
margin-bottom: 10px;
color: #f8fafc;
letter-spacing: -0.5px;
}
.subtitle {
font-size: 1.15em;
color: #94a3b8;
margin-bottom: 40px;
}
.btn {
display: inline-block;
background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%);
color: white;
padding: 16px 36px;
font-size: 1.15em;
border-radius: 12px;
text-decoration: none;
font-weight: bold;
transition: transform 0.2s, box-shadow 0.2s;
margin-bottom: 45px;
box-shadow: 0 4px 15px rgba(59, 130, 246, 0.4);
}
.btn:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(59, 130, 246, 0.6);
}
.content-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
text-align: left;
}
.box {
background-color: #0f172a;
border: 1px solid #334155;
border-radius: 12px;
padding: 25px;
}
.box h3 {
font-size: 1.05em;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-top: 0;
margin-bottom: 15px;
}
.box-feature h3 { color: #38bdf8; }
.box-api h3 { color: #c084fc; }
.box ul {
color: #cbd5e1;
margin: 0;
padding-left: 20px;
line-height: 1.6;
font-size: 0.95em;
}
.box code {
display: block;
background-color: #1e293b;
padding: 8px 12px;
border-radius: 6px;
margin-bottom: 8px;
color: #e2e8f0;
font-family: monospace;
border: 1px solid #334155;
}
.box code:last-child {
margin-bottom: 0;
}
</style>
</head>
<body>
<div class="container">
<div class="title">🧠 Ambiguity Resolution</div>
<div class="subtitle">Multi-step reasoning under ambiguity for Agentic Systems</div>
<a href="https://huggingface.co/spaces/Yaser77/ambiguity-env-demo" target="_blank" class="btn">
πŸš€ Launch Interactive Demo
</a>
<div class="content-grid">
<div class="box box-feature">
<h3>Execution Flow</h3>
<ul>
<li>Agent asks clarifying questions</li>
<li>Environment provides missing info</li>
<li>Agent completes task</li>
</ul>
</div>
<div class="box box-api">
<h3>API Endpoints</h3>
<code>POST /reset</code>
<code>POST /step</code>
<code>GET /state</code>
</div>
</div>
</div>
</body>
</html>
"""
return HTMLResponse(content=html_content)
@app.get("/health")
def health():
return {"status": "ok"}
@app.get("/tasks")
def list_tasks():
"""Return all available tasks."""
return {"tasks": _task_list()}
@app.post("/reset")
def reset(req: ResetRequest = None):
"""
Reset the environment with the specified task.
Returns initial observation.
"""
task_name = (req.task_name if req else None) or "hard_ambiguous"
try:
# Get baseline task to populate ground truth
task = get_task(task_name).copy()
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# Override instruction if custom input provided
if req and getattr(req, "instruction", None):
custom_inst = req.instruction.strip()
if custom_inst:
task["instruction"] = custom_inst
obs = env.reset(task)
return {
"status": "ok",
"task": task_name,
"observation": _obs_to_dict(obs),
}
@app.post("/step")
def step(req: StepRequest):
"""
Send one action to the environment.
Returns observation, reward, done, info.
"""
if env.state() is None:
raise HTTPException(status_code=400, detail="Environment not initialised. Call /reset first.")
try:
action = Action(
type=req.type,
question=req.question,
proposed_time=req.proposed_time,
proposed_participants=req.proposed_participants,
proposed_location=req.proposed_location,
)
except Exception as e:
raise HTTPException(status_code=422, detail=f"Invalid action: {e}")
result = env.step(action)
# ── Enrich info with debugging + evaluation details ───────────────────────
# Pull current state AFTER the step so values are up-to-date
current_state = env.state()
enriched_info = {
# raw_reward: set by env.py BEFORE clamping β†’ full signal visibility
"raw_reward": result["info"].get("raw_reward", result["reward"]),
# agent tracking
"question_count": current_state.question_count if current_state else 0,
"collected_fields": list(current_state.collected_info.keys()) if current_state else [],
"max_questions_allowed": env.MAX_QUESTIONS,
# pass through any extra env flags (irrelevant, warning, correct_time, etc.)
**{k: v for k, v in result["info"].items() if k != "raw_reward"},
}
return {
"observation": _obs_to_dict(result["observation"]),
"reward": result["reward"], # clamped [0.0, 1.0]
"done": result["done"],
"info": enriched_info,
}
@app.get("/state")
def state():
"""Return current internal state (for debugging/grading)."""
s = env.state()
if s is None:
return {"state": None}
return {
"state": {
"instruction": s.instruction,
"true_time": s.true_time,
"true_participants": s.true_participants,
"constraints": s.constraints,
"collected_info": s.collected_info,
"question_count": s.question_count,
"done": s.done,
}
}
@app.post("/run_task")
def run_task(req: RunTaskRequest = None):
"""
Run a full episode using fallback actions (no LLM, deterministic).
Useful for automated validation.
Returns complete episode log.
"""
task_name = (req.task_name if req else None) or "hard_ambiguous"
try:
task = get_task(task_name)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
obs = env.reset(task)
log = []
rewards = []
# Deterministic fallback sequence
fallback_actions = [
Action(type="ask", question="When should this happen?"),
Action(type="ask", question="Who are the participants?"),
Action(type="execute", proposed_time=task["true_time"],
proposed_participants=task["true_participants"]),
]
for i, action in enumerate(fallback_actions, 1):
result = env.step(action)
rewards.append(result["reward"])
log.append({
"step": i,
"action": action.model_dump(),
"reward": result["reward"],
"done": result["done"],
"info": result["info"],
})
if result["done"]:
break
return {
"task": task_name,
"steps": len(log),
"total_reward": round(sum(rewards), 4),
"success": sum(rewards) > 0.5,
"log": log,
}