anas-gouda commited on
Commit
f690e8c
·
1 Parent(s): 22d8cda

add dounseen demo

Browse files
app.py CHANGED
@@ -6,9 +6,11 @@ import numpy as np
6
  import supervision as sv
7
  import torch
8
  from PIL import Image
 
 
9
 
10
- from utils.models import load_models, CHECKPOINT_NAMES, MODE_NAMES, \
11
- MASK_GENERATION_MODE, BOX_PROMPT_MODE
12
 
13
  # TODO add presentation on YouTube and add link here
14
  MARKDOWN = """
@@ -23,14 +25,16 @@ DoUnseen is a python package for segmenting unseen objects.
23
  It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
24
  """
25
  EXAMPLES = [
26
- ["tiny", 0.5,
27
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/rgb_images/000000.png",
28
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_1.jpg",
29
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_2.jpg",
30
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_3.jpg",
31
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_4.jpg",
32
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_5.jpg",
33
- "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/objects_gallery/obj_000001/obj_000001_6.jpg",
 
 
34
  ]
35
  ]
36
 
@@ -42,7 +46,8 @@ if torch.cuda.get_device_properties(0).major >= 8:
42
  torch.backends.cudnn.allow_tf32 = True
43
 
44
  MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
45
- MASK_GENERATORS = load_models(device=DEVICE)
 
46
 
47
 
48
  @spaces.GPU
@@ -56,10 +61,26 @@ def process(
56
  ) -> Optional[Image.Image]:
57
  model = MASK_GENERATORS[checkpoint_dropdown]
58
  image = np.array(image_input.convert("RGB"))
59
- result = model.generate(image)
60
- detections = sv.Detections.from_sam(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # TODO return is duplicated untill dounsenn model is implemented
62
- return MASK_ANNOTATOR.annotate(image_input, detections), MASK_ANNOTATOR.annotate(image_input, detections)
63
 
64
 
65
  with gr.Blocks() as demo:
@@ -78,8 +99,8 @@ with gr.Blocks() as demo:
78
  submit_button_component = gr.Button(value='Submit', variant='primary')
79
  object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)] # Set a smaller display size for the object images
80
  with gr.Column():
81
- image_output_sam = gr.Image(type='pil', label='Image Output')
82
- image_output_dounseen = gr.Image(type='pil', label='Image Output')
83
  with gr.Row():
84
  gr.Examples(
85
  fn=process,
 
6
  import supervision as sv
7
  import torch
8
  from PIL import Image
9
+ import dounseen.utils as dounseen_utils
10
+ import cv2
11
 
12
+ from utils.models import load_sam2_models, CHECKPOINT_NAMES, MODE_NAMES, \
13
+ MASK_GENERATION_MODE, BOX_PROMPT_MODE, load_dounseen_model
14
 
15
  # TODO add presentation on YouTube and add link here
16
  MARKDOWN = """
 
25
  It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
26
  """
27
  EXAMPLES = [
28
+ [
29
+ "tiny",
30
+ 0.5,
31
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/rgb_images/000000.png",
32
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/1.jpg",
33
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/2.jpg",
34
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/3.jpg",
35
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/4.jpg",
36
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/5.jpg",
37
+ "https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/6.jpg"
38
  ]
39
  ]
40
 
 
46
  torch.backends.cudnn.allow_tf32 = True
47
 
48
  MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
49
+ MASK_GENERATORS = load_sam2_models(device=DEVICE)
50
+ DOUNSEEN_MODEL = load_dounseen_model(device=DEVICE)
51
 
52
 
53
  @spaces.GPU
 
61
  ) -> Optional[Image.Image]:
62
  model = MASK_GENERATORS[checkpoint_dropdown]
63
  image = np.array(image_input.convert("RGB"))
64
+ sam2_result = model.generate(image)
65
+ detections = sv.Detections.from_sam(sam2_result)
66
+
67
+ # prepare sam2 output for the format expected by DoUnseen
68
+ masks = [ann['segmentation'] for ann in sam2_result]
69
+ bboxes = [ann['bbox'] for ann in sam2_result]
70
+ bboxes = [[int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] for bbox in bboxes]
71
+ # change bboxed from xywh to xyxy
72
+ bboxes = [[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]] for bbox in bboxes]
73
+ segments = dounseen_utils.get_image_segments_from_binary_masks(image, masks, bboxes)
74
+
75
+ gallery_dict = {'obj_000001': [np.array(object_image.convert("RGB")) for object_image in [object_image1, object_image2, object_image3, object_image4, object_image5, object_image6]]}
76
+ DOUNSEEN_MODEL.update_gallery(gallery_dict)
77
+ matched_query, score = DOUNSEEN_MODEL.find_object(segments, obj_name="obj_000001", method="max")
78
+ matched_query_ann_image = dounseen_utils.draw_segmented_image(image, [masks[matched_query]], [bboxes[matched_query]], classes_predictions=[0], classes_names=["obj_000001"])
79
+ # convert to PIL image
80
+ matched_query_ann_image = Image.fromarray(matched_query_ann_image)
81
+
82
  # TODO return is duplicated untill dounsenn model is implemented
83
+ return MASK_ANNOTATOR.annotate(image_input, detections), matched_query_ann_image
84
 
85
 
86
  with gr.Blocks() as demo:
 
99
  submit_button_component = gr.Button(value='Submit', variant='primary')
100
  object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)] # Set a smaller display size for the object images
101
  with gr.Column():
102
+ image_output_sam = gr.Image(type='pil', label='SAM2 Output')
103
+ image_output_dounseen = gr.Image(type='pil', label='DoUnseen Output')
104
  with gr.Row():
105
  gr.Examples(
106
  fn=process,
models/dounseen/vit_b_16_epoch_199_augment.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d99be3fe23504da3d9784a0c3c4715abffb1efa59ee0c092a0280170861a61
3
+ size 347498842
{checkpoints → models/sam2}/sam2_hiera_base_plus.pt RENAMED
File without changes
{checkpoints → models/sam2}/sam2_hiera_large.pt RENAMED
File without changes
{checkpoints → models/sam2}/sam2_hiera_small.pt RENAMED
File without changes
{checkpoints → models/sam2}/sam2_hiera_tiny.pt RENAMED
File without changes
requirements.txt CHANGED
@@ -4,4 +4,5 @@ gradio
4
  supervision
5
  gradio_image_prompter
6
  opencv-python
7
- pytest
 
 
4
  supervision
5
  gradio_image_prompter
6
  opencv-python
7
+ pytest
8
+ git+https://github.com/AnasIbrahim/image_agnostic_segmentation.git@CASE_release
utils/models.py CHANGED
@@ -4,6 +4,7 @@ import torch
4
  from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
5
  from sam2.build_sam import build_sam2
6
  from sam2.sam2_image_predictor import SAM2ImagePredictor
 
7
 
8
  BOX_PROMPT_MODE = "box prompt"
9
  MASK_GENERATION_MODE = "mask generation"
@@ -11,14 +12,14 @@ MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE]
11
 
12
  CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
13
  CHECKPOINTS = {
14
- "tiny": ["sam2_hiera_t.yaml", "checkpoints/sam2_hiera_tiny.pt"],
15
- "small": ["sam2_hiera_s.yaml", "checkpoints/sam2_hiera_small.pt"],
16
- "base_plus": ["sam2_hiera_b+.yaml", "checkpoints/sam2_hiera_base_plus.pt"],
17
- "large": ["sam2_hiera_l.yaml", "checkpoints/sam2_hiera_large.pt"],
18
  }
19
 
20
 
21
- def load_models(
22
  device: torch.device
23
  ) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
24
  mask_generators = {}
@@ -26,12 +27,25 @@ def load_models(
26
  model = build_sam2(config, checkpoint, device=device)
27
  mask_generators[key] = SAM2AutomaticMaskGenerator(
28
  model=model,
29
- points_per_side=32,
30
  points_per_batch=64,
31
  pred_iou_thresh=0.7,
32
  stability_score_thresh=0.92,
33
  stability_score_offset=0.7,
34
- crop_n_layers=1,
35
  box_nms_thresh=0.7,
36
  )
37
  return mask_generators
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
5
  from sam2.build_sam import build_sam2
6
  from sam2.sam2_image_predictor import SAM2ImagePredictor
7
+ from dounseen.core import UnseenClassifier
8
 
9
  BOX_PROMPT_MODE = "box prompt"
10
  MASK_GENERATION_MODE = "mask generation"
 
12
 
13
  CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
14
  CHECKPOINTS = {
15
+ "tiny": ["sam2_hiera_t.yaml", "models/sam2/sam2_hiera_tiny.pt"],
16
+ "small": ["sam2_hiera_s.yaml", "models/sam2/sam2_hiera_small.pt"],
17
+ "base_plus": ["sam2_hiera_b+.yaml", "models/sam2/sam2_hiera_base_plus.pt"],
18
+ "large": ["sam2_hiera_l.yaml", "models/sam2/sam2_hiera_large.pt"],
19
  }
20
 
21
 
22
+ def load_sam2_models(
23
  device: torch.device
24
  ) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
25
  mask_generators = {}
 
27
  model = build_sam2(config, checkpoint, device=device)
28
  mask_generators[key] = SAM2AutomaticMaskGenerator(
29
  model=model,
30
+ points_per_side=16,
31
  points_per_batch=64,
32
  pred_iou_thresh=0.7,
33
  stability_score_thresh=0.92,
34
  stability_score_offset=0.7,
35
+ crop_n_layers=0,
36
  box_nms_thresh=0.7,
37
  )
38
  return mask_generators
39
+
40
+
41
+ def load_dounseen_model(
42
+ device: torch.device,
43
+ ) -> UnseenClassifier:
44
+ unseen_classifier = UnseenClassifier(
45
+ model_path="models/dounseen/vit_b_16_epoch_199_augment.pth",
46
+ gallery_images=None,
47
+ gallery_buffered_path=None,
48
+ augment_gallery=False,
49
+ batch_size=64,
50
+ )
51
+ return unseen_classifier