"""
Inference Script — VeriRL Verilog Hardware Design Environment
===================================
MANDATORY
- Before submitting, ensure the following variables are defined in your environment configuration:
    API_BASE_URL   The API endpoint for the LLM.
    MODEL_NAME     The model identifier to use for inference.
    HF_TOKEN       Your Hugging Face / API key.
    ENV_BASE_URL   The base URL of the running VeriRL environment server.

- Defaults:
    API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
    MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
    ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")

- The inference script must be named `inference.py` and placed in the root directory of the project
- Participants must use OpenAI Client for all LLM calls using above variables

STDOUT FORMAT
- The script must emit exactly three line types to stdout, in this order:

    [START] task=<task_name> env=<benchmark> model=<model_name>
    [STEP]  step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
    [END]   success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>

  Rules:
    - One [START] line at episode begin.
    - One [STEP] line per step, immediately after env.step() returns.
    - One [END] line after env.close(), always emitted (even on exception).
    - reward and rewards are formatted to 2 decimal places.
    - done and success are lowercase booleans: true or false.
    - error is the raw last_action_error string, or null if none.
    - All fields on a single line with no newlines within a line.
    - Each task's score must be strictly in (0, 1).

  Example:
    [START] task=mac_unit env=verirl model=Qwen2.5-72B-Instruct
    [STEP] step=1 action=write_file(312chars) reward=0.02 done=false error=null
    [STEP] step=2 action=run_compile reward=0.07 done=false error=null
    [STEP] step=3 action=submit reward=0.05 done=true error=null
    [END] success=true steps=3 score=0.47 rewards=0.02,0.07,0.05
"""

import asyncio
import json
import os
import sys
import textwrap
import time
from typing import List, Optional

from dotenv import load_dotenv
from openai import OpenAI

from verirl_env import VerirlAction, verirl_env

load_dotenv()


# --- Configuration ---
API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("API_KEY")
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:8000")
BENCHMARK = "verirl"

# Per-task wall-clock budgets (seconds) and success threshold
TASK_BUDGETS: dict[str, int] = {
    # easy
    "mac_unit":        4 * 60,
    "relu_clip":       3 * 60,
    "barrel_shifter":  3 * 60,
    # medium
    "axi_fifo":        6 * 60,
    "register_file":   5 * 60,
    "ring_buffer":     6 * 60,
    "dot_product":     5 * 60,
    "fir_filter":      6 * 60,
    # hard
    "systolic_array":  8 * 60,
    "fp16_adder":     10 * 60,
}
SUCCESS_SCORE_THRESHOLD = 0.5  # final_score in [0, 1]

SYSTEM_PROMPT = textwrap.dedent(
    """
    You are an expert RTL hardware designer. Implement the given Verilog specification correctly.

    REQUIRED WORKFLOW — follow this sequence every episode:
      1. write_file   — write a complete, synthesizable Verilog module.
                        For multi-module designs use separate files:
                        {"action_type": "write_file", "filename": "pe.v", "verilog_src": "..."}
                        {"action_type": "write_file", "filename": "top.v", "verilog_src": "..."}
      2. run_compile  — check all files for syntax errors; fix with write_file if needed
      3. run_sim      — run the testbench; read every PASS/FAIL line; fix failures with write_file
      4. (optional) run_formal — check formal properties if available; fix any counterexamples
      5. (optional) run_synth  — check area against reference cell count
      6. submit       — only after attempting compile and sim

    NEVER submit without first running run_compile and run_sim.

    Available actions — respond with exactly one JSON object, no markdown:
      {"action_type": "write_file", "filename": "design.v", "verilog_src": "<full module>", "message": "..."}
      {"action_type": "run_compile", "message": "checking syntax"}
      {"action_type": "run_sim",     "message": "running testbench"}
      {"action_type": "run_synth",   "message": "checking area"}
      {"action_type": "run_formal",  "message": "checking formal properties"}
      {"action_type": "list_files",  "message": "show written files"}
      {"action_type": "submit",      "message": "final submission"}

    Design rules:
    - No `initial` blocks in the design module (testbench only)
    - Use always @(posedge clk) for sequential logic
    - Fully combinational modules: use assign or always @(*)
    - Pay close attention to pipeline depth, pipeline registers, and timing requirements
    - For tasks requiring multiple modules: use separate write_file calls with different filenames
    """
).strip()


# ---------------------------------------------------------------------------
# Logging helpers
# ---------------------------------------------------------------------------


def log_start(task: str, model: str) -> None:
    print(f"[START] task={task} env={BENCHMARK} model={model}", flush=True)


def log_step(
    step: int, action: str, reward: float, done: bool, error: Optional[str]
) -> None:
    print(
        f"[STEP] step={step} action={action} reward={reward:.2f} "
        f"done={str(done).lower()} error={error or 'null'}",
        flush=True,
    )


def log_end(success: bool, steps: int, rewards: List[float], final_score: float) -> None:
    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
    print(
        f"[END] success={str(success).lower()} steps={steps} score={final_score:.2f} rewards={rewards_str}",
        flush=True,
    )


# ---------------------------------------------------------------------------
# Agent helpers
# ---------------------------------------------------------------------------


def safe_score(raw) -> float:
    """Clamp any score/reward to the open interval (0, 1) required by the validator."""
    return round(min(max(float(raw), 0.01), 0.99), 2)


def sanitize_error(error_str: str, max_len: int = 80) -> Optional[str]:
    """Sanitize error message: remove newlines, truncate to max_len."""
    if not error_str:
        return None
    # Replace newlines and multiple spaces with single space
    sanitized = " ".join(error_str.split())
    # Truncate
    if len(sanitized) > max_len:
        sanitized = sanitized[:max_len]
    return sanitized


def format_observation(obs) -> str:
    """Format a VerirlObservation as a readable context block for the LLM."""
    parts = []
    if obs.task_spec:
        parts.append(f"TASK SPECIFICATION:\n{obs.task_spec}")
    if obs.tool_stdout:
        parts.append(f"TOOL OUTPUT:\n{obs.tool_stdout}")
    if obs.tool_stderr:
        parts.append(f"ERRORS:\n{obs.tool_stderr}")

    # Multi-file project summary
    if getattr(obs, "current_files", None):
        file_summary = ", ".join(
            f"{n}({len(s)}chars)" for n, s in sorted(obs.current_files.items())
        )
        parts.append(f"Files on disk: {file_summary}")

    # Formal verification status
    formal_proven = getattr(obs, "formal_properties_proven", None)
    formal_total = getattr(obs, "formal_properties_total", None)
    formal_str = (
        f" | formal={formal_proven}/{formal_total}"
        if formal_proven is not None else ""
    )

    parts.append(
        f"Status: compile={'OK' if obs.compile_ok else 'FAIL'} | "
        f"tests={obs.tests_passed}/{obs.tests_total}"
        f"{formal_str} | "
        f"turn={obs.turn_number} | remaining={obs.turns_remaining}"
    )
    return "\n\n".join(parts)


def parse_action(response_text: str) -> tuple[VerirlAction, Optional[str]]:
    """Extract a JSON action from the LLM response, handling markdown fences.

    Returns (action, parse_error) where parse_error is None on success.
    """
    text = response_text.strip()
    if "```json" in text:
        text = text.split("```json")[1].split("```")[0].strip()
    elif "```" in text:
        text = text.split("```")[1].split("```")[0].strip()
    start = text.find("{")
    if start >= 0:
        try:
            decoder = json.JSONDecoder()
            data, _ = decoder.raw_decode(text, start)
            valid_fields = VerirlAction.model_fields
            return VerirlAction(**{k: v for k, v in data.items() if k in valid_fields}), None
        except Exception as exc:
            err = sanitize_error(f"parse_error: {str(exc)}", max_len=60)
            return VerirlAction(action_type="submit", message="parse error"), err
    return VerirlAction(action_type="submit", message="parse error"), "parse_error: no JSON found"


def action_label(action: VerirlAction) -> str:
    """Compact one-token label for [STEP] logging."""
    if action.action_type == "write_file" and action.verilog_src:
        return f"write_file({len(action.verilog_src)}chars)"
    return action.action_type


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _fallback_action(obs) -> VerirlAction:
    """Fallback when the LLM call fails — submit whatever code we have."""
    msg = "llm error, submitting best code" if obs.current_verilog else "llm error"
    return VerirlAction(action_type="submit", message=msg)


# ---------------------------------------------------------------------------
# Episode runner
# ---------------------------------------------------------------------------


async def run_task(task_id: str, llm: OpenAI) -> float:
    """Run one complete episode for the given task. Returns final_score in [0, 1]."""
    start_time = time.time()
    final_score = 0.0
    rewards: List[float] = []
    steps_taken = 0
    success = False

    log_start(task=task_id, model=MODEL_NAME)

    env = None
    obs = None
    try:
        budget = TASK_BUDGETS.get(task_id)
        if budget is None:
            raise ValueError(f"Unknown task_id '{task_id}'. Valid: {list(TASK_BUDGETS.keys())}")

        env = verirl_env(base_url=ENV_BASE_URL)
        result = await env.reset(task_id=task_id)
        obs = result.observation

        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": format_observation(obs)},
        ]

        for step in range(1, 100):  # max guard; episodes end via done flag
            elapsed = time.time() - start_time
            if elapsed > budget:
                action = VerirlAction(
                    action_type="submit", message="time budget exceeded"
                )
                result = await env.step(action)
                reward = safe_score(result.reward or 0.01)
                rewards.append(reward)
                steps_taken = step
                log_step(step, action_label(action), reward, True, None)
                final_score = safe_score(result.observation.final_score or 0.01)
                break

            # LLM call
            error: Optional[str] = None
            try:
                response = llm.chat.completions.create(
                    model=MODEL_NAME,
                    messages=messages,
                )
                assistant_text = response.choices[0].message.content or ""
            except Exception as exc:
                error = sanitize_error(str(exc))
                assistant_text = ""

            if assistant_text:
                action, parse_err = parse_action(assistant_text)
                if parse_err and error is None:
                    error = parse_err
            else:
                # LLM call failed — pick a sensible fallback based on current state
                # rather than immediately submitting
                action = _fallback_action(obs)

            # Environment step
            try:
                result = await env.step(action)
            except Exception as exc:
                error = sanitize_error(str(exc))
                # Log the failed step before breaking
                reward = 0.01  # fallback reward on error
                rewards.append(reward)
                steps_taken = step
                log_step(step, action_label(action), reward, True, error)
                break

            obs = result.observation
            reward = safe_score(result.reward or 0.01)
            done = result.done

            # Validate reward is in expected range
            if not (-1.0 <= reward <= 1.0):
                print(
                    f"[WARNING] Task {task_id}: reward {reward} outside [-1.0, 1.0]",
                    file=sys.stderr,
                    flush=True,
                )
                error = sanitize_error(f"reward {reward} outside [-1.0, 1.0]")

            rewards.append(reward)
            steps_taken = step
            log_step(step, action_label(action), reward, done, error)

            messages.append({"role": "assistant", "content": assistant_text})
            messages.append({"role": "user", "content": format_observation(obs)})

            if done:
                final_score = safe_score(obs.final_score or 0.01)
                break

        success = final_score >= SUCCESS_SCORE_THRESHOLD

        # Validate final_score is in [0, 1]
        if not (0.0 <= final_score <= 1.0):
            print(
                f"[WARNING] Task {task_id}: final_score {final_score} not in [0, 1]",
                file=sys.stderr,
                flush=True,
            )

    finally:
        # Safety net: if loop exited without a submit (e.g. connection drop),
        # attempt a final submit so the score is not lost.
        if final_score == 0.0 and obs is not None and obs.current_verilog is not None:
            try:
                result = await env.step(
                    VerirlAction(action_type="submit", message="safety submit")
                )
                final_score = safe_score(result.observation.final_score or 0.01)
                success = final_score >= SUCCESS_SCORE_THRESHOLD
                steps_taken += 1
                log_step(steps_taken, "submit", safe_score(result.reward or 0.01), True, None)
            except Exception:
                pass
        # Ensure final_score is never exactly 0.0 (validator requires strict (0, 1))
        if final_score == 0.0:
            final_score = 0.01
        success = final_score >= SUCCESS_SCORE_THRESHOLD
        # Ensure rewards list is never empty — [END] requires at least one reward value
        if not rewards:
            rewards = [0.01]
        if env is not None:
            try:
                await env.close()
            except Exception:
                pass
        log_end(success=success, steps=steps_taken, rewards=rewards, final_score=final_score)

    return final_score


# ---------------------------------------------------------------------------
# Task Enumeration & Validation
# ---------------------------------------------------------------------------


async def validate_environment(base_url: str) -> List[str]:
    """
    Validate that the environment is operational and all tasks are discoverable.

    Runs silently to avoid interfering with stdout format.
    Returns list of valid task IDs or empty list if validation fails.

    Args:
        base_url: Environment server URL

    Returns:
        List of task IDs if validation passes, empty list otherwise
    """
    # Known tasks — these must be in the environment
    task_manifest = [
        {"id": "mac_unit",        "difficulty": "easy",   "max_turns": 8},
        {"id": "relu_clip",       "difficulty": "easy",   "max_turns": 6},
        {"id": "barrel_shifter",  "difficulty": "easy",   "max_turns": 6},
        {"id": "axi_fifo",        "difficulty": "medium", "max_turns": 10},
        {"id": "register_file",   "difficulty": "medium", "max_turns": 8},
        {"id": "ring_buffer",     "difficulty": "medium", "max_turns": 10},
        {"id": "dot_product",     "difficulty": "medium", "max_turns": 8},
        {"id": "fir_filter",      "difficulty": "medium", "max_turns": 10},
        {"id": "systolic_array",  "difficulty": "hard",   "max_turns": 12},
        {"id": "fp16_adder",      "difficulty": "hard",   "max_turns": 15},
    ]

    task_ids = []
    env = verirl_env(base_url=base_url)

    for task in task_manifest:
        task_id = task["id"]
        try:
            result = await env.reset(task_id=task_id)
            # Submit empty code (should score 0)
            result = await env.step(VerirlAction(action_type="submit"))
            obs = result.observation
            final_score = safe_score(obs.final_score or 0.01)

            # Task is valid if we got a score
            if final_score is not None:
                task_ids.append(task_id)
        except Exception:
            # Silently skip failed validation
            pass

    await env.close()
    return task_ids


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------


async def main() -> None:
    # Try to enumerate tasks; if validation fails, use defaults
    task_ids = await validate_environment(ENV_BASE_URL)
    if not task_ids:
        # Fallback: use hardcoded task list (validation may have failed due to network)
        task_ids = [
            "mac_unit", "relu_clip", "barrel_shifter",
            "axi_fifo", "register_file", "ring_buffer", "dot_product", "fir_filter",
            "systolic_array", "fp16_adder",
        ]

    llm = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

    scores: dict[str, float] = {}
    total_start = time.time()

    for task_id in task_ids:
        scores[task_id] = await run_task(task_id, llm)

    # No summary output — spec requires ONLY [START]/[STEP]/[END] lines to stdout


if __name__ == "__main__":
    asyncio.run(main())