Spaces:

Yaser77
/

ambiguity-env

Sleeping

File size: 12,088 Bytes

"""
server.py
Minimal FastAPI server for Hugging Face Spaces deployment.

Endpoints:
  GET  /          → health check
  POST /reset     → reset environment, return initial observation
  POST /step      → send action, return observation + reward + done
  GET  /state     → return current internal state (for debugging)
  POST /run_task  → run a full episode with a named task
"""

import os
import sys
from typing import Any, Dict, Optional

from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel

load_dotenv()

from env.env    import AmbiguityEnv
from models.models import Action
from tasks.tasks   import TASKS, get_task

# ── App setup ────────────────────────────────────────────────────────────────
app = FastAPI(
    title="Ambiguity Resolution Environment",
    description="OpenEnv-compliant environment for the Meta OpenEnv Hackathon.",
    version="1.0.0",
)

# One shared env instance (stateful per container)
env = AmbiguityEnv()


# ── Request / Response models ────────────────────────────────────────────────

class ResetRequest(BaseModel):
    task_name: Optional[str] = "hard_ambiguous"   # default to hardest task
    instruction: Optional[str] = None             # custom instruction override

class StepRequest(BaseModel):
    type: str
    question: Optional[str] = None
    proposed_time: Optional[str] = None
    proposed_participants: Optional[list] = None
    proposed_location: Optional[str] = None

class RunTaskRequest(BaseModel):
    task_name: Optional[str] = "hard_ambiguous"


# ── Helpers ──────────────────────────────────────────────────────────────────

def _obs_to_dict(obs) -> Dict[str, Any]:
    return {
        "instruction": obs.instruction,
        "known_info": obs.known_info,
        "constraints": obs.constraints,
        "conversation_history": [
            {"role": m.role, "content": m.content}
            for m in obs.conversation_history
        ],
        "last_response": obs.last_response,
    }

def _task_list() -> list:
    return [{"name": t["name"], "ambiguity": t["ambiguity"], "missing": t["missing_fields"]}
            for t in TASKS]


# ── Routes ───────────────────────────────────────────────────────────────────

@app.get("/", response_class=HTMLResponse)
def root():
    """HTML Landing Page that embeds the Interactive UI Demo"""
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Ambiguity Resolution Environment</title>
        <style>
            body {
                background-color: #0f172a;
                color: #e2e8f0;
                font-family: 'Inter', system-ui, sans-serif;
                margin: 0;
                padding: 0;
                display: flex;
                align-items: center;
                justify-content: center;
                min-height: 100vh;
            }
            .container {
                max-width: 650px;
                width: 100%;
                text-align: center;
                padding: 50px 40px;
                background-color: #1e293b;
                border-radius: 16px;
                box-shadow: 0 20px 40px rgba(0,0,0,0.5);
                border: 1px solid #334155;
            }
            .title {
                font-size: 2.5em;
                font-weight: 800;
                margin-bottom: 10px;
                color: #f8fafc;
                letter-spacing: -0.5px;
            }
            .subtitle {
                font-size: 1.15em;
                color: #94a3b8;
                margin-bottom: 40px;
            }
            .btn {
                display: inline-block;
                background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%);
                color: white;
                padding: 16px 36px;
                font-size: 1.15em;
                border-radius: 12px;
                text-decoration: none;
                font-weight: bold;
                transition: transform 0.2s, box-shadow 0.2s;
                margin-bottom: 45px;
                box-shadow: 0 4px 15px rgba(59, 130, 246, 0.4);
            }
            .btn:hover {
                transform: translateY(-2px);
                box-shadow: 0 6px 20px rgba(59, 130, 246, 0.6);
            }
            .content-grid {
                display: grid;
                grid-template-columns: 1fr 1fr;
                gap: 20px;
                text-align: left;
            }
            .box {
                background-color: #0f172a;
                border: 1px solid #334155;
                border-radius: 12px;
                padding: 25px;
            }
            .box h3 {
                font-size: 1.05em;
                text-transform: uppercase;
                letter-spacing: 0.5px;
                margin-top: 0;
                margin-bottom: 15px;
            }
            .box-feature h3 { color: #38bdf8; }
            .box-api h3 { color: #c084fc; }
            .box ul {
                color: #cbd5e1;
                margin: 0;
                padding-left: 20px;
                line-height: 1.6;
                font-size: 0.95em;
            }
            .box code {
                display: block;
                background-color: #1e293b;
                padding: 8px 12px;
                border-radius: 6px;
                margin-bottom: 8px;
                color: #e2e8f0;
                font-family: monospace;
                border: 1px solid #334155;
            }
            .box code:last-child {
                margin-bottom: 0;
            }
        </style>
    </head>
    <body>
        <div class="container">
            <div class="title">🧠 Ambiguity Resolution</div>
            <div class="subtitle">Multi-step reasoning under ambiguity for Agentic Systems</div>

            <a href="https://huggingface.co/spaces/Yaser77/ambiguity-env-demo" target="_blank" class="btn">
                🚀 Launch Interactive Demo
            </a>

            <div class="content-grid">
                <div class="box box-feature">
                    <h3>Execution Flow</h3>
                    <ul>
                        <li>Agent asks clarifying questions</li>
                        <li>Environment provides missing info</li>
                        <li>Agent completes task</li>
                    </ul>
                </div>

                <div class="box box-api">
                    <h3>API Endpoints</h3>
                    <code>POST /reset</code>
                    <code>POST /step</code>
                    <code>GET /state</code>
                </div>
            </div>
        </div>
    </body>
    </html>
    """
    return HTMLResponse(content=html_content)


@app.get("/health")
def health():
    return {"status": "ok"}


@app.get("/tasks")
def list_tasks():
    """Return all available tasks."""
    return {"tasks": _task_list()}


@app.post("/reset")
def reset(req: ResetRequest = None):
    """
    Reset the environment with the specified task.
    Returns initial observation.
    """
    task_name = (req.task_name if req else None) or "hard_ambiguous"
    try:
        # Get baseline task to populate ground truth
        task = get_task(task_name).copy()
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))

    # Override instruction if custom input provided
    if req and getattr(req, "instruction", None):
        custom_inst = req.instruction.strip()
        if custom_inst:
            task["instruction"] = custom_inst

    obs = env.reset(task)
    return {
        "status": "ok",
        "task": task_name,
        "observation": _obs_to_dict(obs),
    }


@app.post("/step")
def step(req: StepRequest):
    """
    Send one action to the environment.
    Returns observation, reward, done, info.
    """
    if env.state() is None:
        raise HTTPException(status_code=400, detail="Environment not initialised. Call /reset first.")

    try:
        action = Action(
            type=req.type,
            question=req.question,
            proposed_time=req.proposed_time,
            proposed_participants=req.proposed_participants,
            proposed_location=req.proposed_location,
        )
    except Exception as e:
        raise HTTPException(status_code=422, detail=f"Invalid action: {e}")

    result = env.step(action)

    # ── Enrich info with debugging + evaluation details ───────────────────────
    # Pull current state AFTER the step so values are up-to-date
    current_state = env.state()
    enriched_info = {
        # raw_reward: set by env.py BEFORE clamping → full signal visibility
        "raw_reward":            result["info"].get("raw_reward", result["reward"]),
        # agent tracking
        "question_count":        current_state.question_count if current_state else 0,
        "collected_fields":      list(current_state.collected_info.keys()) if current_state else [],
        "max_questions_allowed": env.MAX_QUESTIONS,
        # pass through any extra env flags (irrelevant, warning, correct_time, etc.)
        **{k: v for k, v in result["info"].items() if k != "raw_reward"},
    }

    return {
        "observation": _obs_to_dict(result["observation"]),
        "reward":      result["reward"],          # clamped [0.0, 1.0]
        "done":        result["done"],
        "info":        enriched_info,
    }


@app.get("/state")
def state():
    """Return current internal state (for debugging/grading)."""
    s = env.state()
    if s is None:
        return {"state": None}
    return {
        "state": {
            "instruction":        s.instruction,
            "true_time":          s.true_time,
            "true_participants":  s.true_participants,
            "constraints":        s.constraints,
            "collected_info":     s.collected_info,
            "question_count":     s.question_count,
            "done":               s.done,
        }
    }


@app.post("/run_task")
def run_task(req: RunTaskRequest = None):
    """
    Run a full episode using fallback actions (no LLM, deterministic).
    Useful for automated validation.

    Returns complete episode log.
    """
    task_name = (req.task_name if req else None) or "hard_ambiguous"
    try:
        task = get_task(task_name)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))

    obs      = env.reset(task)
    log      = []
    rewards  = []

    # Deterministic fallback sequence
    fallback_actions = [
        Action(type="ask",     question="When should this happen?"),
        Action(type="ask",     question="Who are the participants?"),
        Action(type="execute", proposed_time=task["true_time"],
               proposed_participants=task["true_participants"]),
    ]

    for i, action in enumerate(fallback_actions, 1):
        result = env.step(action)
        rewards.append(result["reward"])
        log.append({
            "step":    i,
            "action":  action.model_dump(),
            "reward":  result["reward"],
            "done":    result["done"],
            "info":    result["info"],
        })
        if result["done"]:
            break

    return {
        "task":         task_name,
        "steps":        len(log),
        "total_reward": round(sum(rewards), 4),
        "success":      sum(rewards) > 0.5,
        "log":          log,
    }