# app.py import gradio as gr from transformers import AutoProcessor, AutoModelForVision2Seq from PIL import Image import torch MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct" device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Loading model: {MODEL_ID}") processor = AutoProcessor.from_pretrained(MODEL_ID) model = AutoModelForVision2Seq.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) def describe_ingredients(image): """Ask the model to identify ingredients in an image.""" prompt = "Describe the food in this image and list its likely ingredients." inputs = processor(images=image, text=prompt, return_tensors="pt").to(device) output = model.generate(**inputs, max_new_tokens=128) result = processor.batch_decode(output, skip_special_tokens=True)[0] return result.strip() demo = gr.Interface( fn=describe_ingredients, inputs=gr.Image(type="pil"), outputs="text", title="🍱 Food Ingredient Analyzer (Qwen2-VL)", description="Upload a food image — the model will describe it and list possible ingredients." ) if __name__ == "__main__": demo.launch()