AlexHung29629's picture
Update app.py
37f620a verified
raw
history blame
1.02 kB
import torch
import spaces
import gradio as gr
from transformers import pipeline
from PIL import Image
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
# Load model and processor
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-screen2words-large", dtype=torch.bfloat16).to("cuda")
model.eval()
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-screen2words-large")
# Define the function
@spaces.GPU
def describe_ui(image):
inputs = processor(images=image, text="", return_tensors="pt").to(dtype=torch.bfloat16, device="cuda")
predictions = model.generate(**inputs)
return processor.decode(predictions[0], skip_special_tokens=False)
# Launch the Gradio interface
gr.Interface(
fn=describe_ui,
inputs=gr.Image(type="pil"),
outputs="text",
title="UI Screen Describer (Pix2Struct)",
description="Upload a screenshot or UI image and get an automatic description powered by Google’s Pix2Struct model."
).launch()