dounseen

Running on Zero

App Files Files Community

anas-gouda commited on Aug 26, 2024

Commit

f690e8c

1 Parent(s): 22d8cda

add dounseen demo

Browse files

Files changed (8) hide show

app.py +37 -16
models/dounseen/vit_b_16_epoch_199_augment.pth +3 -0
{checkpoints → models/sam2}/sam2_hiera_base_plus.pt +0 -0
{checkpoints → models/sam2}/sam2_hiera_large.pt +0 -0
{checkpoints → models/sam2}/sam2_hiera_small.pt +0 -0
{checkpoints → models/sam2}/sam2_hiera_tiny.pt +0 -0
requirements.txt +2 -1
utils/models.py +21 -7

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ import numpy as np
 import supervision as sv
 import torch
 from PIL import Image
-from utils.models import load_models, CHECKPOINT_NAMES, MODE_NAMES, \
-    MASK_GENERATION_MODE, BOX_PROMPT_MODE
 # TODO add presentation on YouTube and add link here
 MARKDOWN = """
@@ -23,14 +25,16 @@ DoUnseen is a python package for segmenting unseen objects.
 It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
 """
 EXAMPLES = [
-    ["tiny", 0.5,
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/rgb_images/000000.png",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_1.jpg",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_2.jpg",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_3.jpg",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_4.jpg",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_5.jpg",
-     "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_6.jpg",
      ]
 ]
@@ -42,7 +46,8 @@ if torch.cuda.get_device_properties(0).major >= 8:
     torch.backends.cudnn.allow_tf32 = True
 MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
-MASK_GENERATORS = load_models(device=DEVICE)
 @spaces.GPU
@@ -56,10 +61,26 @@ def process(
 ) -> Optional[Image.Image]:
     model = MASK_GENERATORS[checkpoint_dropdown]
     image = np.array(image_input.convert("RGB"))
-    result = model.generate(image)
-    detections = sv.Detections.from_sam(result)
     # TODO return is duplicated untill dounsenn model is implemented
-    return MASK_ANNOTATOR.annotate(image_input, detections), MASK_ANNOTATOR.annotate(image_input, detections)
 with gr.Blocks() as demo:
@@ -78,8 +99,8 @@ with gr.Blocks() as demo:
             submit_button_component = gr.Button(value='Submit', variant='primary')
             object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)]  # Set a smaller display size for the object images
         with gr.Column():
-            image_output_sam = gr.Image(type='pil', label='Image Output')
-            image_output_dounseen = gr.Image(type='pil', label='Image Output')
     with gr.Row():
         gr.Examples(
             fn=process,

 import supervision as sv
 import torch
 from PIL import Image
+import dounseen.utils as dounseen_utils
+import cv2
+from utils.models import load_sam2_models, CHECKPOINT_NAMES, MODE_NAMES, \
+    MASK_GENERATION_MODE, BOX_PROMPT_MODE, load_dounseen_model
 # TODO add presentation on YouTube and add link here
 MARKDOWN = """
 It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
 """
 EXAMPLES = [
+    [
+        "tiny",
+        0.5,
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/rgb_images/000000.png",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/1.jpg",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/2.jpg",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/3.jpg",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/4.jpg",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/5.jpg",
+        "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/6.jpg"
      ]
 ]
     torch.backends.cudnn.allow_tf32 = True
 MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
+MASK_GENERATORS = load_sam2_models(device=DEVICE)
+DOUNSEEN_MODEL = load_dounseen_model(device=DEVICE)
 @spaces.GPU
 ) -> Optional[Image.Image]:
     model = MASK_GENERATORS[checkpoint_dropdown]
     image = np.array(image_input.convert("RGB"))
+    sam2_result = model.generate(image)
+    detections = sv.Detections.from_sam(sam2_result)
+    # prepare sam2 output for the format expected by DoUnseen
+    masks = [ann['segmentation'] for ann in sam2_result]
+    bboxes = [ann['bbox'] for ann in sam2_result]
+    bboxes = [[int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] for bbox in bboxes]
+    # change bboxed from xywh to xyxy
+    bboxes = [[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]] for bbox in bboxes]
+    segments = dounseen_utils.get_image_segments_from_binary_masks(image, masks, bboxes)
+    gallery_dict = {'obj_000001': [np.array(object_image.convert("RGB")) for object_image in [object_image1, object_image2, object_image3, object_image4, object_image5, object_image6]]}
+    DOUNSEEN_MODEL.update_gallery(gallery_dict)
+    matched_query, score = DOUNSEEN_MODEL.find_object(segments, obj_name="obj_000001", method="max")
+    matched_query_ann_image = dounseen_utils.draw_segmented_image(image, [masks[matched_query]], [bboxes[matched_query]], classes_predictions=[0], classes_names=["obj_000001"])
+    # convert to PIL image
+    matched_query_ann_image = Image.fromarray(matched_query_ann_image)
     # TODO return is duplicated untill dounsenn model is implemented
+    return MASK_ANNOTATOR.annotate(image_input, detections), matched_query_ann_image
 with gr.Blocks() as demo:
             submit_button_component = gr.Button(value='Submit', variant='primary')
             object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)]  # Set a smaller display size for the object images
         with gr.Column():
+            image_output_sam = gr.Image(type='pil', label='SAM2 Output')
+            image_output_dounseen = gr.Image(type='pil', label='DoUnseen Output')
     with gr.Row():
         gr.Examples(
             fn=process,

models/dounseen/vit_b_16_epoch_199_augment.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74d99be3fe23504da3d9784a0c3c4715abffb1efa59ee0c092a0280170861a61
+size 347498842

{checkpoints → models/sam2}/sam2_hiera_base_plus.pt RENAMED Viewed

File without changes

{checkpoints → models/sam2}/sam2_hiera_large.pt RENAMED Viewed

File without changes

{checkpoints → models/sam2}/sam2_hiera_small.pt RENAMED Viewed

File without changes

{checkpoints → models/sam2}/sam2_hiera_tiny.pt RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ gradio
 supervision
 gradio_image_prompter
 opencv-python
-pytest

 supervision
 gradio_image_prompter
 opencv-python
+pytest
+git+https://github.com/AnasIbrahim/image_agnostic_segmentation.git@CASE_release

utils/models.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
 BOX_PROMPT_MODE = "box prompt"
 MASK_GENERATION_MODE = "mask generation"
@@ -11,14 +12,14 @@ MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE]
 CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
 CHECKPOINTS = {
-    "tiny": ["sam2_hiera_t.yaml", "checkpoints/sam2_hiera_tiny.pt"],
-    "small": ["sam2_hiera_s.yaml", "checkpoints/sam2_hiera_small.pt"],
-    "base_plus": ["sam2_hiera_b+.yaml", "checkpoints/sam2_hiera_base_plus.pt"],
-    "large": ["sam2_hiera_l.yaml", "checkpoints/sam2_hiera_large.pt"],
 }
-def load_models(
     device: torch.device
 ) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
     mask_generators = {}
@@ -26,12 +27,25 @@ def load_models(
         model = build_sam2(config, checkpoint, device=device)
         mask_generators[key] = SAM2AutomaticMaskGenerator(
             model=model,
-            points_per_side=32,
             points_per_batch=64,
             pred_iou_thresh=0.7,
             stability_score_thresh=0.92,
             stability_score_offset=0.7,
-            crop_n_layers=1,
             box_nms_thresh=0.7,
         )
     return mask_generators

 from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
 from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
+from dounseen.core import UnseenClassifier
 BOX_PROMPT_MODE = "box prompt"
 MASK_GENERATION_MODE = "mask generation"
 CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
 CHECKPOINTS = {
+    "tiny": ["sam2_hiera_t.yaml", "models/sam2/sam2_hiera_tiny.pt"],
+    "small": ["sam2_hiera_s.yaml", "models/sam2/sam2_hiera_small.pt"],
+    "base_plus": ["sam2_hiera_b+.yaml", "models/sam2/sam2_hiera_base_plus.pt"],
+    "large": ["sam2_hiera_l.yaml", "models/sam2/sam2_hiera_large.pt"],
 }
+def load_sam2_models(
     device: torch.device
 ) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
     mask_generators = {}
         model = build_sam2(config, checkpoint, device=device)
         mask_generators[key] = SAM2AutomaticMaskGenerator(
             model=model,
+            points_per_side=16,
             points_per_batch=64,
             pred_iou_thresh=0.7,
             stability_score_thresh=0.92,
             stability_score_offset=0.7,
+            crop_n_layers=0,
             box_nms_thresh=0.7,
         )
     return mask_generators
+def load_dounseen_model(
+        device: torch.device,
+) -> UnseenClassifier:
+    unseen_classifier = UnseenClassifier(
+        model_path="models/dounseen/vit_b_16_epoch_199_augment.pth",
+        gallery_images=None,
+        gallery_buffered_path=None,
+        augment_gallery=False,
+        batch_size=64,
+    )
+    return unseen_classifier