UI_Screen_Description_Generator_with_Pix2Struct

Sleeping

AlexHung29629 commited on Nov 3

Commit

b9f48bf

verified ·

1 Parent(s): c2763e8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,14 +2,18 @@ import spaces
 import gradio as gr
 from transformers import pipeline
 from PIL import Image
 # Load model and processor
-pipe = pipeline("image-text-to-text", model="google/pix2struct-screen2words-large")
 # Define the function
 @spaces.GPU
 def describe_ui(image):
-    outputs = pipe(image, text="describe this image.")
-    return outputs[0]['generated_text']
 # Launch the Gradio interface
 gr.Interface(

 import gradio as gr
 from transformers import pipeline
 from PIL import Image
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
 # Load model and processor
+model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-screen2words-large", torch_dtype=torch.bfloat16).to("cuda")
+processor = Pix2StructProcessor.from_pretrained("google/pix2struct-screen2words-large)
 # Define the function
 @spaces.GPU
 def describe_ui(image):
+    inputs = processor(images=image, text="describe this image: ", return_tensors="pt")
+    predictions = model.generate(**inputs)
+    return processor.decode(predictions[0], skip_special_tokens=True)
 # Launch the Gradio interface
 gr.Interface(