#!/usr/bin/env python3
"""
Inference script for Doc Quality Environment.

This script runs an LLM agent against the documentation quality assessment environment.
It demonstrates how an AI agent can evaluate and improve technical documentation.

Environment variables:
    API_BASE_URL: LLM API endpoint (default: https://router.huggingface.co/v1)
    MODEL_NAME: Model identifier (default: Qwen/Qwen2.5-7B-Instruct)
    HF_TOKEN: Hugging Face API token (required)
"""

import os
import json
import textwrap
from typing import Optional, List
from openai import OpenAI

from doc_quality_env.server.doc_quality_env_environment import DocQualityEnvironment
from doc_quality_env.models import DocQualityAction

# Configuration - read from environment variables
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN is None:
    raise ValueError("HF_TOKEN environment variable is required")

# Initialize OpenAI client with configurable API endpoint
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)

# Task configuration
TASKS = ["easy_api_doc", "medium_api_doc", "hard_guide_review"]
MAX_STEPS_PER_TASK = 10
TEMPERATURE = 0.7
MAX_TOKENS = 200


def log_start(task: str, env: str, model: str) -> None:
    """Log the start of an episode."""
    print(f"[START] task={task} env=doc_quality_env model={model}", flush=True)


def log_step(
    step: int, action: str, reward: float, done: bool, error: Optional[str]
) -> None:
    """Log a step in the episode."""
    error_val = error if error else "null"
    done_val = str(done).lower()
    print(
        f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
        flush=True,
    )


def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
    """Log the end of an episode."""
    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
    print(
        f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}",
        flush=True,
    )


def call_llm(prompt: str) -> str:
    """Call the LLM to get the agent's next action."""
    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert technical documentation reviewer. Provide clear, actionable feedback on documentation quality.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        raise RuntimeError(f"LLM call failed: {str(e)}")


def parse_agent_response(response: str) -> tuple:
    """
    Parse the LLM response into action components.
    Expected format: "ACTION_TYPE|CATEGORY|CONTENT"
    """
    try:
        parts = response.split("|", 2)
        if len(parts) >= 3:
            return parts[0].strip(), parts[1].strip(), parts[2].strip()

        # Fallback parsing - try to extract from text
        response_lower = response.lower()
        if "identify" in response_lower or "issue" in response_lower:
            action_type = "identify_issue"
        elif (
            "suggest" in response_lower
            or "improve" in response_lower
            or "fix" in response_lower
        ):
            action_type = "suggest_improvement"
        elif (
            "rate" in response_lower
            or "score" in response_lower
            or "quality" in response_lower
        ):
            action_type = "rate_quality"
        else:
            action_type = "identify_issue"

        return action_type, "clarity", response
    except:
        return "identify_issue", "clarity", response


def build_prompt(step: int, obs) -> str:
    """Build the prompt for the LLM based on current observation."""
    doc_preview = (
        obs.current_doc[:500] + "..." if len(obs.current_doc) > 500 else obs.current_doc
    )

    issues_str = (
        "\n".join(f"- {issue}" for issue in obs.issues_identified[-3:])
        if obs.issues_identified
        else "None yet"
    )
    known_str = "\n".join(f"- {issue}" for issue in obs.known_issues[:3])

    prompt = textwrap.dedent(f"""
    Task: {obs.task_name}
    Difficulty: {obs.task_difficulty}
    Step: {step}/{obs.max_steps}
    
    Documentation Preview:
    {doc_preview}
    
    Issues You've Already Identified:
    {issues_str}
    
    Hints (Sample Known Issues):
    {known_str}
    
    Last Feedback: {obs.feedback}
    
    Your Options:
    1. Identify another issue in the documentation (format: identify_issue|CATEGORY|DESCRIPTION)
    2. Suggest how to improve it (format: suggest_improvement|CATEGORY|SUGGESTION)
    3. Rate the overall quality (format: rate_quality|overall|SCORE_0_TO_1)
    
    Respond with ONE action in the format above. Be specific and actionable.
    """).strip()

    return prompt


def run_task_episode(env: DocQualityEnvironment, task_key: str) -> tuple:
    """Run a single episode on a task."""
    obs = env.reset()
    task_name = obs.task_name
    step_count = 0
    total_reward = 0.0
    all_rewards: List[float] = []
    success = False

    log_start(task_key, "doc_quality_env", MODEL_NAME)

    try:
        for step in range(1, MAX_STEPS_PER_TASK + 1):
            # Get agent action from LLM
            prompt = build_prompt(step, obs)
            llm_response = call_llm(prompt)

            # Parse the response
            action_type, category, content = parse_agent_response(llm_response)

            # Create action
            action = DocQualityAction(
                action_type=action_type, content=content, issue_category=category
            )

            # Execute action
            obs = env.step(action)
            step_count += 1
            reward = obs.reward
            all_rewards.append(reward)
            total_reward += reward

            # Log the step
            action_str = (
                f"{action_type}('{content[:30]}'...)"
                if len(content) > 30
                else f"{action_type}('{content}')"
            )
            log_step(step, action_str, reward, obs.done, None)

            if obs.done:
                success = True
                break

        # Final score based on issues found
        final_score = min(
            1.0, len(obs.issues_identified) / max(len(obs.known_issues), 1)
        )

    except Exception as e:
        final_score = total_reward / max(step_count, 1) if step_count > 0 else 0.0
        log_step(step_count + 1, f"error", 0.0, True, str(e))

    log_end(success, step_count, final_score, all_rewards)
    return final_score, success, all_rewards


def main():
    """Run the inference script on all tasks."""
    print("=" * 60, flush=True)
    print("Doc Quality Environment - Inference Script", flush=True)
    print(f"Model: {MODEL_NAME}", flush=True)
    print(f"API: {API_BASE_URL}", flush=True)
    print("=" * 60, flush=True)
    print("", flush=True)

    task_scores = []

    for task_key in TASKS:
        print(f"Running task: {task_key}", flush=True)

        env = DocQualityEnvironment()
        try:
            # Reset to initialize
            obs = env.reset()
            score, success, rewards = run_task_episode(env, task_key)
            task_scores.append(score)
        except Exception as e:
            print(f"[ERROR] Task {task_key} failed: {e}", flush=True)
            task_scores.append(0.0)
        finally:
            env.close()

        print("", flush=True)

    # Summary
    avg_score = sum(task_scores) / len(task_scores) if task_scores else 0.0
    print("=" * 60, flush=True)
    print(f"Summary: Average Score = {avg_score:.2f}", flush=True)
    print("=" * 60, flush=True)


if __name__ == "__main__":
    main()