""" inference.py Evaluation entry point for the Ambiguity Resolution Environment. Updated for LLM-driven evaluation via OpenEnv proxy. """ import os import sys import json import re from typing import Any, Tuple from dotenv import load_dotenv from openai import OpenAI # ── load .env ──────────────────────────────────────────────────────────────── load_dotenv() # OpenEnv Proxy / standard HF endpoints API_BASE_URL = os.getenv("API_BASE_URL") API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN") MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct") MAX_STEPS = 5 if not API_KEY: print("ERROR: API_KEY or HF_TOKEN not set. Add it to your .env file.", file=sys.stderr) sys.exit(1) # Initialize Client client = OpenAI( base_url=API_BASE_URL, api_key=API_KEY ) from tasks.tasks import TASKS from env.env import AmbiguityEnv from models.models import Action # ───────────────────────────────────────────────────────────────────────────── # LOGGING # ───────────────────────────────────────────────────────────────────────────── def log_start(task_name: str) -> None: print(f"[START] task={task_name} env=ambiguity_env model={MODEL_NAME}", flush=True) def log_step(step: int, action: str, reward: float, done: bool, error: str | None = None) -> None: error_val = error if error else "null" done_val = str(done).lower() print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True) def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None: rewards_str = ",".join(f"{r:.2f}" for r in rewards) success_val = str(success).lower() print(f"[END] success={success_val} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True) # ───────────────────────────────────────────────────────────────────────────── # LLM AGENT LOGIC # ───────────────────────────────────────────────────────────────────────────── def parse_llm_response(raw_text: str) -> Action: """ Parses the LLM response into an Action model. Expected formats: - ask: - execute: time=