import os
import gradio as gr
from typing import Iterator, List, Dict, Any, Tuple

from backend_hf_api import HFInferenceBackend, is_hf_api_available

SYSTEM_PROMPT_DEFAULT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant. Be concise and accurate.")
DEFAULT_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
DEFAULT_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
# Use a valid Nemotron repo by default; override via Space Variables if you want another.
DEFAULT_HF_API_MODEL = os.getenv("HF_API_MODEL", "NVIDIA/Nemotron-3-8B-Instruct")


def _msg_content_to_text(content: Any) -> str:
    if isinstance(content, str):
        return content
    if isinstance(content, dict) and isinstance(content.get("text"), str):
        return content["text"]
    return "" if content is None else str(content)


def _history_to_pairs(history: Any) -> List[Tuple[str, str]]:
    """Gradio v6 messages or legacy (user, assistant) pairs → (user, assistant) pairs."""
    pairs: List[Tuple[str, str]] = []
    if not history:
        return pairs

    if isinstance(history[0], dict):
        pending_user: str | None = None
        for m in history:
            role = m.get("role")
            text = _msg_content_to_text(m.get("content"))
            if role == "user":
                if pending_user is not None:
                    pairs.append((pending_user, ""))
                pending_user = text
            elif role == "assistant":
                if pending_user is None:
                    pairs.append(("", text))
                else:
                    pairs.append((pending_user, text))
                    pending_user = None
        if pending_user is not None:
            pairs.append((pending_user, ""))
        return pairs

    if isinstance(history[0], (list, tuple)) and len(history[0]) == 2:
        return [(str(u or ""), str(a or "")) for (u, a) in history]

    return [(str(history), "")]


def chat_fn(
    message: str,
    history: List[Dict[str, Any]] | List[Tuple[str, str]],
    model_name: str,
    system_prompt: str,
    temperature: float,
    max_new_tokens: int,
) -> Iterator[str]:
    if not is_hf_api_available():
        yield "[error] HF_TOKEN not set. Add it in Spaces → Settings → Secrets and restart."
        return
    try:
        backend = HFInferenceBackend(model_name or DEFAULT_HF_API_MODEL)
        pairs_history = _history_to_pairs(history)
        yield from backend.generate_stream(
            system_prompt=(system_prompt or SYSTEM_PROMPT_DEFAULT).strip(),
            history=pairs_history,
            user_msg=message,
            temperature=float(temperature),
            max_new_tokens=int(max_new_tokens),
        )
    except Exception as e:
        yield f"[error] {type(e).__name__}: {e}"


with gr.Blocks() as demo:
    gr.Markdown("# 🤖 HF Inference API Chatbot (Gradio v6)\nUses your **HF_TOKEN**. Preflight checks model to prevent crashes.")

    model_name = gr.Textbox(
        value=DEFAULT_HF_API_MODEL,
        label="HF model repo",
        placeholder="e.g., NVIDIA/Nemotron-3-8B-Instruct",
    )

    with gr.Accordion("Advanced", open=False) as adv:
        system_prompt = gr.Textbox(value=SYSTEM_PROMPT_DEFAULT, label="System prompt", lines=3)
        temperature = gr.Slider(0.0, 1.5, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
        max_new_tokens = gr.Slider(16, 4096, value=DEFAULT_MAX_NEW_TOKENS, step=16, label="Max new tokens")

    gr.ChatInterface(
        fn=chat_fn,
        title="Chat",
        examples=[
            ["Summarize why the sky is blue in 3 sentences.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
            ["Draft a friendly product blurb for a coffee mug.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
            ["Explain binary search with a tiny Python example.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
        ],
        cache_examples=False,
        additional_inputs=[model_name, system_prompt, temperature, max_new_tokens],
        additional_inputs_accordion=adv,
        save_history=True,
        editable=True,
        autoscroll=True,
    )

if __name__ == "__main__":
    demo.queue().launch()