ui-refexp-click

Sleeping

App Files Files Community

root commited on Feb 28, 2023

Commit

7ba23b1

1 Parent(s): a8e6b52

fix: update params for API

Browse files

Signed-off-by: root <root@ip-172-31-73-185.ec2.internal>

Files changed (1) hide show

app.py +37 -28

app.py CHANGED Viewed

@@ -7,31 +7,37 @@ import html
 from transformers import DonutProcessor, VisionEncoderDecoderModel
-global model, processor, device
 def load_model(pretrained_revision: str = 'main'):
-    global model, processor, device
     pretrained_repo_name = 'ivelin/donut-refexp-click'
     # revision can be git commit hash, branch or tag
     # use 'main' for latest revision
-    print(f"Loading model checkpoint from repo: {pretrained_repo_name}, revision: {pretrained_revision}")
-    processor = DonutProcessor.from_pretrained(
         pretrained_repo_name, revision=pretrained_revision, use_auth_token="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")
-    processor.image_processor.do_align_long_axis = False
-    # do not manipulate image size and position
-    processor.image_processor.do_resize = False
-    processor.image_processor.do_thumbnail = False
-    processor.image_processor.do_pad = False
-    # processor.image_processor.do_rescale = False
-    processor.image_processor.do_normalize = True
-    print(f'processor image size: {processor.image_processor.size}')
-    model = VisionEncoderDecoderModel.from_pretrained(
         pretrained_repo_name, use_auth_token="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", revision=pretrained_revision)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model.to(device)
 def prepare_image_for_encoder(image=None, output_image_size=None):
     """
@@ -89,7 +95,7 @@ def translate_point_coords_from_out_to_in(point=None, input_image_size=None, out
         f"translated point={point}, resized_image_size: {resized_width, resized_height}")
-def process_refexp(image: Image, prompt: str, model_revision: str = 'main'):
     print(f"(image, prompt): {image}, {prompt}")
@@ -182,13 +188,16 @@ def process_refexp(image: Image, prompt: str, model_revision: str = 'main'):
     print(
         f"to image pixel values: x, y: {x, y}")
-    # draw center point circle
-    img1 = ImageDraw.Draw(image)
-    r = 30
-    shape = [(x-r, y-r), (x+r, y+r)]
-    img1.ellipse(shape, outline="green", width=20)
-    img1.ellipse(shape, outline="white", width=10)
     return image, center_point
@@ -221,7 +230,7 @@ examples = [["example_1.jpg", "select the setting icon from top right corner", "
             ]
 demo = gr.Interface(fn=process_refexp,
-                    inputs=[gr.Image(type="pil"), "text", "text"],
                     outputs=[gr.Image(type="pil"), "json"],
                     title=title,
                     description=description,
@@ -231,4 +240,4 @@ demo = gr.Interface(fn=process_refexp,
                     cache_examples=False
                     )
-demo.launch()  # share=True when running in a Jupyter Notebook

 from transformers import DonutProcessor, VisionEncoderDecoderModel
+global model, loaded_revision, processor, device
+model = None
+previous_revision=None
+processor=None
+device=None
+loaded_revision=None
 def load_model(pretrained_revision: str = 'main'):
+    global model, loaded_revision, processor, device
     pretrained_repo_name = 'ivelin/donut-refexp-click'
     # revision can be git commit hash, branch or tag
     # use 'main' for latest revision
+    print(f"Loading model checkpoint from repo: {pretrained_repo_name}, revision: {pretrained_revision}")
+    if processor is None or loaded_revision is None or loaded_revision != pretrained_revision:
+      loaded_revision=pretrained_revision
+      processor = DonutProcessor.from_pretrained(
         pretrained_repo_name, revision=pretrained_revision, use_auth_token="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")
+      processor.image_processor.do_align_long_axis = False
+      # do not manipulate image size and position
+      processor.image_processor.do_resize = False
+      processor.image_processor.do_thumbnail = False
+      processor.image_processor.do_pad = False
+      # processor.image_processor.do_rescale = False
+      processor.image_processor.do_normalize = True
+      print(f'processor image size: {processor.image_processor.size}')
+      model = VisionEncoderDecoderModel.from_pretrained(
         pretrained_repo_name, use_auth_token="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", revision=pretrained_revision)
+      print(f'model checkpoint loaded')
+      device = "cuda" if torch.cuda.is_available() else "cpu"
+      model.to(device)
 def prepare_image_for_encoder(image=None, output_image_size=None):
     """
         f"translated point={point}, resized_image_size: {resized_width, resized_height}")
+def process_refexp(image: Image, prompt: str, model_revision: str = 'main', return_annotated_image: bool = False):
     print(f"(image, prompt): {image}, {prompt}")
     print(
         f"to image pixel values: x, y: {x, y}")
+    if return_annotated_image:
+      # draw center point circle
+      img1 = ImageDraw.Draw(image)
+      r = 30
+      shape = [(x-r, y-r), (x+r, y+r)]
+      img1.ellipse(shape, outline="green", width=20)
+      img1.ellipse(shape, outline="white", width=10)
+    else:
+      # do not return image if its an API call to save bandwidth
+      image = None
     return image, center_point
             ]
 demo = gr.Interface(fn=process_refexp,
+                    inputs=[gr.Image(type="pil"), "text", "text", gr.Checkbox(value=True, label="Return Annotated Image", visible=False)],
                     outputs=[gr.Image(type="pil"), "json"],
                     title=title,
                     description=description,
                     cache_examples=False
                     )
+demo.launch(server_name="0.0.0.0")  # share=True when running in a Jupyter Notebook