AlexHung29629's picture
Update app.py
c443b84 verified
raw
history blame
1.04 kB
import torch
import spaces
import gradio as gr
from transformers import pipeline
from PIL import Image
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
# Load model and processor
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-screen2words-large", dtype=torch.bfloat16).to("cuda")
model.eval()
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-screen2words-large")
# Define the function
@spaces.GPU
def describe_ui(image):
inputs = processor(images=image, text="describe this image: ", return_tensors="pt").to(dtype=torch.bfloat16, device="cuda")
predictions = model.generate(**inputs)
return processor.decode(predictions[0], skip_special_tokens=True)
# Launch the Gradio interface
gr.Interface(
fn=describe_ui,
inputs=gr.Image(type="pil"),
outputs="text",
title="UI Screen Describer (Pix2Struct)",
description="Upload a screenshot or UI image and get an automatic description powered by Google’s Pix2Struct model."
).launch()