AlexHung29629's picture
Update app.py
04e2e3b verified
raw
history blame
990 Bytes
import torch
import spaces
import gradio as gr
from transformers import pipeline
from PIL import Image
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
# Load model and processor
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-screen2words-large", dtype=torch.bfloat16).to("cuda")
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-screen2words-large")
# Define the function
@spaces.GPU
def describe_ui(image):
inputs = processor(images=image, text="describe this image: ", return_tensors="pt")
predictions = model.generate(**inputs)
return processor.decode(predictions[0], skip_special_tokens=True)
# Launch the Gradio interface
gr.Interface(
fn=describe_ui,
inputs=gr.Image(type="pil"),
outputs="text",
title="UI Screen Describer (Pix2Struct)",
description="Upload a screenshot or UI image and get an automatic description powered by Google’s Pix2Struct model."
).launch()