ericjedha's picture
Update app.py
0aa2638 verified
import gradio as gr
import torch
from threading import Thread
from transformers import (
SmolVLMProcessor,
AutoModelForImageTextToText,
TextIteratorStreamer,
)
# ======================
# INIT MODÈLE
# ======================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
processor = SmolVLMProcessor.from_pretrained(MODEL_ID)
model = AutoModelForImageTextToText.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
).to(DEVICE).eval()
# ======================
# STREAMING INFERENCE
# ======================
def analyze_stream(text, image, max_tokens):
if image is None and not text.strip():
return "❌ Veuillez fournir un texte ou une image."
content = []
if image:
content.append({"type": "image", "path": image})
if text.strip():
content.append({"type": "text", "text": text})
messages = [{"role": "user", "content": content}]
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_tensors="pt",
).to(DEVICE)
streamer = TextIteratorStreamer(
processor,
skip_prompt=True,
skip_special_tokens=True,
)
Thread(
target=model.generate,
kwargs=dict(
**inputs,
streamer=streamer,
max_new_tokens=max_tokens,
do_sample=False,
temperature=0.0,
),
).start()
output = ""
for token in streamer:
output += token
yield output
# ======================
# UI GRADIO
# ======================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## ⚡ SmolVLM2 – Analyse Temps Réel")
with gr.Row():
with gr.Column():
txt = gr.Textbox(
label="Question / Description",
lines=3,
)
img = gr.Image(type="filepath", label="Image")
max_tokens = gr.Slider(
50, 400, value=200, step=50, label="Max Tokens"
)
btn = gr.Button("🚀 Analyser", variant="primary")
with gr.Column():
out = gr.Textbox(
label="Réponse en Temps Réel",
lines=14,
)
btn.click(
fn=analyze_stream,
inputs=[txt, img, max_tokens],
outputs=out,
)
demo.launch()