""" server.py Minimal FastAPI server for Hugging Face Spaces deployment. Endpoints: GET / → health check POST /reset → reset environment, return initial observation POST /step → send action, return observation + reward + done GET /state → return current internal state (for debugging) POST /run_task → run a full episode with a named task """ import os import sys from typing import Any, Dict, Optional from dotenv import load_dotenv from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse, HTMLResponse from pydantic import BaseModel load_dotenv() from env.env import AmbiguityEnv from models.models import Action from tasks.tasks import TASKS, get_task # ── App setup ──────────────────────────────────────────────────────────────── app = FastAPI( title="Ambiguity Resolution Environment", description="OpenEnv-compliant environment for the Meta OpenEnv Hackathon.", version="1.0.0", ) # One shared env instance (stateful per container) env = AmbiguityEnv() # ── Request / Response models ──────────────────────────────────────────────── class ResetRequest(BaseModel): task_name: Optional[str] = "hard_ambiguous" # default to hardest task instruction: Optional[str] = None # custom instruction override class StepRequest(BaseModel): type: str question: Optional[str] = None proposed_time: Optional[str] = None proposed_participants: Optional[list] = None proposed_location: Optional[str] = None class RunTaskRequest(BaseModel): task_name: Optional[str] = "hard_ambiguous" # ── Helpers ────────────────────────────────────────────────────────────────── def _obs_to_dict(obs) -> Dict[str, Any]: return { "instruction": obs.instruction, "known_info": obs.known_info, "constraints": obs.constraints, "conversation_history": [ {"role": m.role, "content": m.content} for m in obs.conversation_history ], "last_response": obs.last_response, } def _task_list() -> list: return [{"name": t["name"], "ambiguity": t["ambiguity"], "missing": t["missing_fields"]} for t in TASKS] # ── Routes ─────────────────────────────────────────────────────────────────── @app.get("/", response_class=HTMLResponse) def root(): """HTML Landing Page that embeds the Interactive UI Demo""" html_content = """ Ambiguity Resolution Environment
🧠 Ambiguity Resolution
Multi-step reasoning under ambiguity for Agentic Systems
🚀 Launch Interactive Demo

Execution Flow

  • Agent asks clarifying questions
  • Environment provides missing info
  • Agent completes task

API Endpoints

POST /reset POST /step GET /state
""" return HTMLResponse(content=html_content) @app.get("/health") def health(): return {"status": "ok"} @app.get("/tasks") def list_tasks(): """Return all available tasks.""" return {"tasks": _task_list()} @app.post("/reset") def reset(req: ResetRequest = None): """ Reset the environment with the specified task. Returns initial observation. """ task_name = (req.task_name if req else None) or "hard_ambiguous" try: # Get baseline task to populate ground truth task = get_task(task_name).copy() except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) # Override instruction if custom input provided if req and getattr(req, "instruction", None): custom_inst = req.instruction.strip() if custom_inst: task["instruction"] = custom_inst obs = env.reset(task) return { "status": "ok", "task": task_name, "observation": _obs_to_dict(obs), } @app.post("/step") def step(req: StepRequest): """ Send one action to the environment. Returns observation, reward, done, info. """ if env.state() is None: raise HTTPException(status_code=400, detail="Environment not initialised. Call /reset first.") try: action = Action( type=req.type, question=req.question, proposed_time=req.proposed_time, proposed_participants=req.proposed_participants, proposed_location=req.proposed_location, ) except Exception as e: raise HTTPException(status_code=422, detail=f"Invalid action: {e}") result = env.step(action) # ── Enrich info with debugging + evaluation details ─────────────────────── # Pull current state AFTER the step so values are up-to-date current_state = env.state() enriched_info = { # raw_reward: set by env.py BEFORE clamping → full signal visibility "raw_reward": result["info"].get("raw_reward", result["reward"]), # agent tracking "question_count": current_state.question_count if current_state else 0, "collected_fields": list(current_state.collected_info.keys()) if current_state else [], "max_questions_allowed": env.MAX_QUESTIONS, # pass through any extra env flags (irrelevant, warning, correct_time, etc.) **{k: v for k, v in result["info"].items() if k != "raw_reward"}, } return { "observation": _obs_to_dict(result["observation"]), "reward": result["reward"], # clamped [0.0, 1.0] "done": result["done"], "info": enriched_info, } @app.get("/state") def state(): """Return current internal state (for debugging/grading).""" s = env.state() if s is None: return {"state": None} return { "state": { "instruction": s.instruction, "true_time": s.true_time, "true_participants": s.true_participants, "constraints": s.constraints, "collected_info": s.collected_info, "question_count": s.question_count, "done": s.done, } } @app.post("/run_task") def run_task(req: RunTaskRequest = None): """ Run a full episode using fallback actions (no LLM, deterministic). Useful for automated validation. Returns complete episode log. """ task_name = (req.task_name if req else None) or "hard_ambiguous" try: task = get_task(task_name) except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) obs = env.reset(task) log = [] rewards = [] # Deterministic fallback sequence fallback_actions = [ Action(type="ask", question="When should this happen?"), Action(type="ask", question="Who are the participants?"), Action(type="execute", proposed_time=task["true_time"], proposed_participants=task["true_participants"]), ] for i, action in enumerate(fallback_actions, 1): result = env.step(action) rewards.append(result["reward"]) log.append({ "step": i, "action": action.model_dump(), "reward": result["reward"], "done": result["done"], "info": result["info"], }) if result["done"]: break return { "task": task_name, "steps": len(log), "total_reward": round(sum(rewards), 4), "success": sum(rewards) > 0.5, "log": log, }