SmolVLM2-HighlightGenerator

Sleeping

App Files Files Community

SmolVLM2-HighlightGenerator / app.py

ericjedha

Update app.py

0aa2638 verified about 1 month ago

raw

history blame contribute delete

2.49 kB

	import gradio as gr
	import torch
	from threading import Thread
	from transformers import (
	SmolVLMProcessor,
	AutoModelForImageTextToText,
	TextIteratorStreamer,
	)

	# ======================
	# INIT MODÈLE
	# ======================
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"

	processor = SmolVLMProcessor.from_pretrained(MODEL_ID)
	model = AutoModelForImageTextToText.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	).to(DEVICE).eval()


	# ======================
	# STREAMING INFERENCE
	# ======================
	def analyze_stream(text, image, max_tokens):
	if image is None and not text.strip():
	return "❌ Veuillez fournir un texte ou une image."

	content = []
	if image:
	content.append({"type": "image", "path": image})
	if text.strip():
	content.append({"type": "text", "text": text})

	messages = [{"role": "user", "content": content}]

	inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_tensors="pt",
	).to(DEVICE)

	streamer = TextIteratorStreamer(
	processor,
	skip_prompt=True,
	skip_special_tokens=True,
	)

	Thread(
	target=model.generate,
	kwargs=dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=max_tokens,
	do_sample=False,
	temperature=0.0,
	),
	).start()

	output = ""
	for token in streamer:
	output += token
	yield output


	# ======================
	# UI GRADIO
	# ======================
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("## ⚡ SmolVLM2 – Analyse Temps Réel")

	with gr.Row():
	with gr.Column():
	txt = gr.Textbox(
	label="Question / Description",
	lines=3,
	)
	img = gr.Image(type="filepath", label="Image")
	max_tokens = gr.Slider(
	50, 400, value=200, step=50, label="Max Tokens"
	)
	btn = gr.Button("🚀 Analyser", variant="primary")

	with gr.Column():
	out = gr.Textbox(
	label="Réponse en Temps Réel",
	lines=14,
	)

	btn.click(
	fn=analyze_stream,
	inputs=[txt, img, max_tokens],
	outputs=out,
	)

	demo.launch()