"""
inference.py — Rust Coder OpenEnv Baseline Agent

Architecture
────────────
• Runs 3 tasks (easy / medium / hard) as independent episodes.
• Each task produces its own [START]…[STEP]…[END] log block.
• A fresh WebSocket env connection is opened per task to avoid
  HF-Space WebSocket timeouts during long LLM + compilation waits.
• Scores are clamped to (0.01, 0.99) — strictly inside (0, 1).
• If HF_TOKEN is missing, minimal fallback blocks are emitted so
  the platform always receives 3 parseable task records.

Required env vars
─────────────────
    API_BASE_URL  — LLM router URL   (default: HF router)
    MODEL_NAME    — model identifier (default: Qwen 72B)
    HF_TOKEN      — HuggingFace / API key
    ENV_URL       — environment URL  (default: http://localhost:8000)
"""

import os
import asyncio
import logging
from typing import List, Optional

from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

# ── Configuration ─────────────────────────────────────────────────────────────
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
MODEL_NAME   = os.getenv("MODEL_NAME")   or "Qwen/Qwen2.5-72B-Instruct"
HF_TOKEN     = os.getenv("HF_TOKEN")    or os.getenv("API_KEY")
ENV_URL      = os.getenv("ENV_URL")     or "http://localhost:8000"

SUCCESS_SCORE_THRESHOLD = 0.5
TEMPERATURE  = 0.1
MAX_TOKENS   = 1500

# Exactly 3 tasks: easy / medium / hard (maps to problems.json indices)
EVAL_TASKS = [
    {"task_id": "task_1", "start_index": 0, "difficulty": "easy"},
    {"task_id": "task_3", "start_index": 2, "difficulty": "medium"},
    {"task_id": "task_6", "start_index": 5, "difficulty": "hard"},
]

# ── Logging ───────────────────────────────────────────────────────────────────
_LOG_LEVEL = (os.getenv("LOG_LEVEL") or "INFO").upper()
logging.basicConfig(
    level=getattr(logging, _LOG_LEVEL, logging.INFO),
    format="%(asctime)s %(levelname)s %(name)s - %(message)s",
)
logger = logging.getLogger("rust_coder.inference")

from client import RustCoderEnv
from models import RustCoderAction


# ── Strict stdout log helpers ─────────────────────────────────────────────────

def log_start(task: str, env: str, model: str) -> None:
    print(f"[START] task={task} env={env} model={model}", flush=True)


def log_step(
    step: int,
    action: str,
    reward: float,
    done: bool,
    error: Optional[str] = None,
) -> None:
    action_str = (action or "").replace("\r", "\\r").replace("\n", "\\n")[:200]
    err_field  = "null" if error is None else str(error).replace("\n", "\\n")[:200]
    print(
        f"[STEP] step={step} action={action_str} reward={reward:.2f} "
        f"done={str(bool(done)).lower()} error={err_field}",
        flush=True,
    )


def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
    print(
        f"[END] success={str(success).lower()} steps={steps} "
        f"score={score:.3f} rewards={rewards_str}",
        flush=True,
    )


# ── Score clamping ────────────────────────────────────────────────────────────

def clamp_score(raw: float) -> float:
    """
    Clamp to the open interval (0, 1) — never exactly 0.0 or 1.0.

    Floor 0.01: even compilation failures yield a non-zero score.
    Ceiling 0.99: prevents a theoretically-perfect submission from
    returning 1.0.
    """
    return round(max(0.01, min(0.99, float(raw))), 3)


# ── LLM call ─────────────────────────────────────────────────────────────────

async def get_model_code(prompt: str, client: OpenAI) -> str:
    """Ask the model for a complete Rust solution; strip markdown if needed."""
    try:
        completion = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are a senior Rust systems engineer. "
                        "Return ONLY the complete, corrected Rust source file. "
                        "No markdown fences. No commentary."
                    ),
                },
                {"role": "user", "content": prompt},
            ],
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS,
        )
        text = (completion.choices[0].message.content or "").strip()
        if "```rust" in text:
            text = text.split("```rust")[1].split("```")[0]
        elif "```" in text:
            text = text.split("```")[1].split("```")[0]
        text = text.strip()
        return text or "// empty response"
    except Exception as exc:
        logger.exception("LLM call failed")
        return f"// LLM error: {exc}"


# ── Single-task episode ───────────────────────────────────────────────────────

async def run_task(task_info: dict, client: Optional[OpenAI]) -> None:
    """
    Run one task as a fully independent episode with its own env connection.

    Opens a fresh WebSocket connection so a slow LLM call on a previous
    task cannot cause a connection timeout here.

    Always emits exactly one [START]…[STEP]…[END] block.
    """
    task_id     = task_info["task_id"]
    start_index = task_info["start_index"]

    log_start(task=task_id, env="RustCoder-v1", model=MODEL_NAME)

    rewards: List[float] = []
    steps_taken = 0
    score  = 0.01
    success = False

    # Fresh connection per task — avoids WebSocket timeout across tasks
    env = RustCoderEnv(base_url=ENV_URL)
    try:
        # ── Reset to the target task ──────────────────────────────────
        reset_result = await env.reset(start_index=start_index)
        obs = reset_result.observation

        # ── Build prompt ──────────────────────────────────────────────
        prompt = obs.problem_description or ""
        header = getattr(obs, "header_section", "")
        if header:
            prompt += (
                "\n\nHeader section (must be included verbatim):"
                f"\n```rust\n{header}\n```"
            )

        # ── Get LLM code or skip if no token ─────────────────────────
        if client is not None:
            code = await get_model_code(prompt, client)
        else:
            code = "// no HF_TOKEN — using stub"

        steps_taken = 1

        # ── Evaluate in environment ───────────────────────────────────
        step_result = await env.step(RustCoderAction(code=code))
        # Explicit None check — 0.0 is falsy but valid
        raw_reward = float(step_result.reward if step_result.reward is not None else 0.0)
        score  = clamp_score(raw_reward)
        rewards.append(score)
        success = score >= SUCCESS_SCORE_THRESHOLD

        log_step(step=1, action=code, reward=score, done=True, error=None)

    except Exception as exc:
        logger.exception("Task %s failed", task_id)
        score   = 0.01
        rewards = [0.01]
        log_step(
            step=steps_taken + 1,
            action="error",
            reward=0.01,
            done=True,
            error=str(exc),
        )
    finally:
        try:
            await env.close()
        except Exception:
            pass

    log_end(success=success, steps=steps_taken, score=score, rewards=rewards)


# ── Main ──────────────────────────────────────────────────────────────────────

async def main() -> None:
    # Build the LLM client if credentials are available
    client: Optional[OpenAI] = None
    if HF_TOKEN:
        client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
    else:
        logger.warning(
            "HF_TOKEN / API_KEY not set — LLM calls disabled. "
            "Stub code will be submitted; scores will be at floor (0.01)."
        )

    for task in EVAL_TASKS:
        await run_task(task, client)


if __name__ == "__main__":
    asyncio.run(main())