import gradio as gr
import torch
from threading import Thread
from transformers import (
    SmolVLMProcessor,
    AutoModelForImageTextToText,
    TextIteratorStreamer,
)

# ======================
# INIT MODÈLE
# ======================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"

processor = SmolVLMProcessor.from_pretrained(MODEL_ID)
model = AutoModelForImageTextToText.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
).to(DEVICE).eval()


# ======================
# STREAMING INFERENCE
# ======================
def analyze_stream(text, image, max_tokens):
    if image is None and not text.strip():
        return "❌ Veuillez fournir un texte ou une image."

    content = []
    if image:
        content.append({"type": "image", "path": image})
    if text.strip():
        content.append({"type": "text", "text": text})

    messages = [{"role": "user", "content": content}]

    inputs = processor.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt",
    ).to(DEVICE)

    streamer = TextIteratorStreamer(
        processor,
        skip_prompt=True,
        skip_special_tokens=True,
    )

    Thread(
        target=model.generate,
        kwargs=dict(
            **inputs,
            streamer=streamer,
            max_new_tokens=max_tokens,
            do_sample=False,
            temperature=0.0,
        ),
    ).start()

    output = ""
    for token in streamer:
        output += token
        yield output


# ======================
# UI GRADIO
# ======================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## ⚡ SmolVLM2 – Analyse Temps Réel")

    with gr.Row():
        with gr.Column():
            txt = gr.Textbox(
                label="Question / Description",
                lines=3,
            )
            img = gr.Image(type="filepath", label="Image")
            max_tokens = gr.Slider(
                50, 400, value=200, step=50, label="Max Tokens"
            )
            btn = gr.Button("🚀 Analyser", variant="primary")

        with gr.Column():
            out = gr.Textbox(
                label="Réponse en Temps Réel",
                lines=14,
            )

    btn.click(
        fn=analyze_stream,
        inputs=[txt, img, max_tokens],
        outputs=out,
    )

demo.launch()