from __future__ import annotations

import os
from typing import Any

import gradio as gr
import requests
from huggingface_hub import InferenceClient

COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"


def fetch_qwen35_models() -> list[dict[str, Any]]:
    try:
        response = requests.get(COLLECTION_API, timeout=30)
        response.raise_for_status()
        payload = response.json()
    except Exception:
        # Minimal fallback for resilience if HF collection API is transiently unavailable.
        return [
            {"id": "Qwen/Qwen3.5-35B-A3B", "live_providers": ["unknown"]},
            {"id": "Qwen/Qwen3.5-27B", "live_providers": ["unknown"]},
            {"id": "Qwen/Qwen3.5-9B", "live_providers": ["unknown"]},
            {"id": "Qwen/Qwen3.5-4B", "live_providers": ["unknown"]},
            {"id": "Qwen/Qwen3.5-2B", "live_providers": ["unknown"]},
            {"id": "Qwen/Qwen3.5-0.8B", "live_providers": ["unknown"]},
        ]

    models: list[dict[str, Any]] = []
    for item in payload.get("items", []):
        if item.get("type") != "model":
            continue
        model_id = item.get("id")
        if not model_id:
            continue

        providers = []
        for provider in item.get("availableInferenceProviders", []) or []:
            if provider.get("providerStatus") == "live" and provider.get("modelStatus") == "live":
                providers.append(str(provider.get("provider")))

        models.append(
            {
                "id": model_id,
                "live_providers": sorted(set(providers)),
            }
        )
    return models


MODEL_INFO = fetch_qwen35_models()
MODEL_IDS = [x["id"] for x in MODEL_INFO]
DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-35B-A3B"

PROVIDER_LOOKUP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}


def provider_note(model_id: str) -> str:
    providers = PROVIDER_LOOKUP.get(model_id, [])
    if providers:
        return f"Live inference providers: {', '.join(providers)}"
    return "No live provider listed by HF for this model right now. Try another model."


def generate_reply(
    message: str,
    history: list[tuple[str, str]],
    model_id: str,
    system_prompt: str,
    max_new_tokens: int,
    temperature: float,
    top_p: float,
) -> str:
    token = os.getenv("HF_TOKEN")
    client = InferenceClient(token=token, timeout=120)

    messages = []
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt.strip()})

    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    messages.append({"role": "user", "content": message})

    try:
        result = client.chat_completion(
            model=model_id,
            messages=messages,
            max_tokens=int(max_new_tokens),
            temperature=float(temperature),
            top_p=float(top_p),
        )
        reply = result.choices[0].message.content
        if isinstance(reply, str):
            return reply
        return str(reply)
    except Exception as exc:
        return (
            f"Model call failed for `{model_id}`.\n\n"
            f"Details: {exc}\n\n"
            "Try another model from the dropdown. Some models may not currently have a live provider."
        )


with gr.Blocks(title="Qwen3.5 Chat") as demo:
    gr.Markdown("# Qwen3.5 Chat")
    gr.Markdown(
        "Select a model from the official Qwen3.5 collection and chat. "
        "This Space uses Hugging Face Inference providers via `HF_TOKEN`."
    )

    model_dd = gr.Dropdown(
        choices=MODEL_IDS,
        value=DEFAULT_MODEL,
        label="Qwen3.5 Model",
        allow_custom_value=False,
    )
    provider_md = gr.Markdown(provider_note(DEFAULT_MODEL))

    with gr.Accordion("Generation Settings", open=False):
        system_prompt = gr.Textbox(
            label="System prompt",
            value="You are a helpful assistant.",
            lines=2,
        )
        max_new_tokens = gr.Slider(
            label="Max new tokens",
            minimum=64,
            maximum=4096,
            step=32,
            value=1024,
        )
        temperature = gr.Slider(
            label="Temperature",
            minimum=0.0,
            maximum=2.0,
            step=0.05,
            value=0.7,
        )
        top_p = gr.Slider(
            label="Top-p",
            minimum=0.1,
            maximum=1.0,
            step=0.05,
            value=0.9,
        )

    model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)

    gr.ChatInterface(
        fn=generate_reply,
        additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
    )

demo.queue(max_size=32).launch()