yuyan-chen's picture
Update app.py
153e3b7 verified
import torch
import gradio as gr
from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
import os
MODEL_ID = "yuyan-chen/Lep-Description-Qwen3-VL-2B-Instruct"
token = os.environ.get("HF_TOKEN")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True, token=token)
model = Qwen3VLForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto",
trust_remote_code=True,
token=token
)
def infer(image, text):
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": text},
],
}
]
inputs = processor(
messages,
return_tensors="pt"
).to(device)
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
temperature=0.7
)
return processor.decode(
output_ids[0],
skip_special_tokens=True
)
demo = gr.Interface(
fn=infer,
inputs=[
gr.Image(type="pil", label="Image"),
gr.Textbox(label="Prompt")
],
outputs=gr.Textbox(label="Response"),
title="Qwen3-VL-2B (Finetuned)",
description="Upload an image and ask a question."
)
demo.launch()