Spaces:

Arulkumar03
/

GroundingDINO_SOTA_Zero_Shot_Model

Runtime error

App Files Files Community

Arulkumar03 commited on Oct 18, 2023

Commit

2b09f60

1 Parent(s): c3b4316

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -24

app.py CHANGED Viewed

@@ -93,28 +93,26 @@ def draw_mask(mask, image, random_color=True):
     return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil))
-def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     _, image_tensor = image_transform_grounding(init_image)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
-    # run grounidng
-    if task=='predict':
-        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
-        annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
-        image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
-        return image_with_box
-    elif task=='segment':
         boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
         segmented_frame_masks = segment(image_tensor, model, boxes=boxes)
         annotated_frame_with_mask = draw_mask(segmented_frame_masks[0][0], annotated_frame)
-        seg_with_bbox=Image.fromarray(annotated_frame_with_mask)
-        return seg_with_bbox
 if __name__ == "__main__":
@@ -136,9 +134,13 @@ if __name__ == "__main__":
         gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/Arulkumar03/SOTA-Grounding-DINO.ipynb'>Grounding DINO</a><h3><center>")
         gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(source='upload', type="pil")
                 grounding_caption = gr.Textbox(label="Detection Prompt")
                 run_button = gr.Button(label="Run")
                 with gr.Accordion("Advanced options", open=False):
@@ -154,18 +156,15 @@ if __name__ == "__main__":
                     type="pil",
                     # label="grounding results"
                 ).style(full_width=True, full_height=True)
-                # gallery = gr.Gallery(label="Generated images", show_label=False).style(
-                #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
         run_button.click(fn=run_grounding, inputs=[
-                        input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
         gr.Examples(
-          [["watermelon.jpg", "watermelon", 0.25, 0.25]],
-          inputs = [input_image, grounding_caption, box_threshold, text_threshold],
-          outputs = [gallery,gr.Choice(["segment", "classify"], label="Select Task")],
-          fn=run_grounding,
-          cache_examples=True,
-          label='Try this example input!'
-      )
     block.launch(share=False, show_api=False, show_error=True)

     return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil))
+def run_grounding(input_image,choice, grounding_caption, box_threshold, text_threshold,do_segmentation):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     _, image_tensor = image_transform_grounding(init_image)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
+    if choice == 'segment':
         boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
         segmented_frame_masks = segment(image_tensor, model, boxes=boxes)
         annotated_frame_with_mask = draw_mask(segmented_frame_masks[0][0], annotated_frame)
+    else:
+        # run grounding
+        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
+        annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
+    image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
+    return image_with_box
 if __name__ == "__main__":
         gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/Arulkumar03/SOTA-Grounding-DINO.ipynb'>Grounding DINO</a><h3><center>")
         gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(source='upload', type="pil")
+                choice = gr.Radio(
+                    ["segment", "classify"], default="segment", label="Choose Operation"
+                )
                 grounding_caption = gr.Textbox(label="Detection Prompt")
                 run_button = gr.Button(label="Run")
                 with gr.Accordion("Advanced options", open=False):
                     type="pil",
                     # label="grounding results"
                 ).style(full_width=True, full_height=True)
         run_button.click(fn=run_grounding, inputs=[
+                        input_image, choice, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
         gr.Examples(
+            [["watermelon.jpg", "segment", "watermelon", 0.25, 0.25]],
+            inputs=[input_image, choice, grounding_caption, box_threshold, text_threshold],
+            outputs=[gallery],
+            fn=run_grounding,
+            cache_examples=True,
+            label='Try this example input!'
+        )
     block.launch(share=False, show_api=False, show_error=True)