import os
import gradio as gr
from huggingface_hub import InferenceClient

# ============================================================
# COUNT FROGULA'S AI INTEGRATION STACK
# Full InferenceClient + OpenAI-compatible HF Router
# Models: Top LLMs, VLMs, Coders, TTS/ASR, Embeddings
# Router: https://router.huggingface.co/v1
# ============================================================

MODELS = {
    "Qwen3.5-397B-A17B (VLM)": "Qwen/Qwen3.5-397B-A17B",
    "Kimi-K2.5 (VLM)": "moonshotai/Kimi-K2.5",
    "GLM-5 (754B)": "zai-org/GLM-5",
    "Qwen3-Coder-Next": "Qwen/Qwen3-Coder-Next",
    "Llama-3.3-70B": "meta-llama/Llama-3.3-70B-Instruct",
    "Llama-3.1-8B": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "DeepSeek-R1": "deepseek-ai/DeepSeek-R1",
    "Qwen3-8B": "Qwen/Qwen3-8B",
    "GPT-OSS-120B": "openai/gpt-oss-120b",
    "GPT-OSS-20B": "openai/gpt-oss-20b",
}

SYSTEM_DEFAULT = """You are COUNT FROGULA's AI Integration Assistant.
You have access to the full HuggingFace AI stack:
- Top LLMs & VLMs via InferenceClient
- OpenAI-compatible endpoint at https://router.huggingface.co/v1
- smolagents agentic framework
- MCP tools integration
Be helpful, precise, and maximize capability in every response."""


def respond(
    message,
    history: list[dict],
    model_choice,
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token: gr.OAuthToken | None,
):
    token = hf_token.token if hf_token else os.environ.get("HF_TOKEN", "")
    model_id = MODELS.get(model_choice, "openai/gpt-oss-20b")

    client = InferenceClient(
        token=token,
        model=model_id,
        base_url="https://router.huggingface.co/v1",
    )

    messages = [{"role": "system", "content": system_message}]
    messages.extend(history)
    messages.append({"role": "user", "content": message})

    response = ""
    for chunk in client.chat.completions.create(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True,
    ):
        token_text = chunk.choices[0].delta.content or ""
        response += token_text
        yield response


with gr.Blocks(
    theme=gr.themes.Soft(primary_hue="purple", secondary_hue="green"),
    title="COUNT FROGULA's AI Integration Stack",
) as demo:
    gr.Markdown(
        """
    # COUNT FROGULA's AI Integration Stack
    **Full HF InferenceClient + OpenAI-compatible Router**
    > Models: Top LLMs, VLMs, Coders | Router: `https://router.huggingface.co/v1`
    """
    )

    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=list(MODELS.keys()),
            value="GPT-OSS-20B",
            label="Model",
            scale=2,
        )
        gr.LoginButton(scale=1)

    chatbot = gr.ChatInterface(
        respond,
        type="messages",
        additional_inputs=[
            model_dropdown,
            gr.Textbox(value=SYSTEM_DEFAULT, label="System Message", lines=4),
            gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max Tokens"),
            gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"),
            gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
            gr.OAuthToken(),
        ],
        examples=[
            ["Explain the HuggingFace InferenceClient API and how to use the OpenAI-compatible router at router.huggingface.co/v1"],
            ["Write a Python script using smolagents with HuggingFace tools to build an autonomous coding agent"],
            ["Compare Qwen3.5-397B, Kimi-K2.5, and GLM-5 for enterprise AI integration tasks"],
            ["Generate a complete Next.js + Vercel deployment config for a HuggingFace Spaces integration"],
        ],
        cache_examples=False,
    )

    gr.Markdown(
        """
    ---
    **Integration Stack:** InferenceClient | smolagents | MCP Tools | OpenAI Router
    **Collection:** [COUNT FROGULA's AI Integration Stack](https://huggingface.co/collections/COUNTfrogula/count-frogulas-ai-integration-stack)
    """
    )

if __name__ == "__main__":
    demo.launch()