|
|
|
|
|
import gradio as gr |
|
|
from transformers import AutoProcessor, AutoModelForVision2Seq |
|
|
from PIL import Image |
|
|
import torch |
|
|
|
|
|
MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct" |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
print(f"Loading model: {MODEL_ID}") |
|
|
processor = AutoProcessor.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForVision2Seq.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
def describe_ingredients(image): |
|
|
"""Ask the model to identify ingredients in an image.""" |
|
|
prompt = "Describe the food in this image and list its likely ingredients." |
|
|
inputs = processor(images=image, text=prompt, return_tensors="pt").to(device) |
|
|
|
|
|
output = model.generate(**inputs, max_new_tokens=128) |
|
|
result = processor.batch_decode(output, skip_special_tokens=True)[0] |
|
|
return result.strip() |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=describe_ingredients, |
|
|
inputs=gr.Image(type="pil"), |
|
|
outputs="text", |
|
|
title="🍱 Food Ingredient Analyzer (Qwen2-VL)", |
|
|
description="Upload a food image — the model will describe it and list possible ingredients." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |