Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
f690e8c
1
Parent(s):
22d8cda
add dounseen demo
Browse files- app.py +37 -16
- models/dounseen/vit_b_16_epoch_199_augment.pth +3 -0
- {checkpoints → models/sam2}/sam2_hiera_base_plus.pt +0 -0
- {checkpoints → models/sam2}/sam2_hiera_large.pt +0 -0
- {checkpoints → models/sam2}/sam2_hiera_small.pt +0 -0
- {checkpoints → models/sam2}/sam2_hiera_tiny.pt +0 -0
- requirements.txt +2 -1
- utils/models.py +21 -7
app.py
CHANGED
|
@@ -6,9 +6,11 @@ import numpy as np
|
|
| 6 |
import supervision as sv
|
| 7 |
import torch
|
| 8 |
from PIL import Image
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
from utils.models import
|
| 11 |
-
MASK_GENERATION_MODE, BOX_PROMPT_MODE
|
| 12 |
|
| 13 |
# TODO add presentation on YouTube and add link here
|
| 14 |
MARKDOWN = """
|
|
@@ -23,14 +25,16 @@ DoUnseen is a python package for segmenting unseen objects.
|
|
| 23 |
It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
|
| 24 |
"""
|
| 25 |
EXAMPLES = [
|
| 26 |
-
[
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
]
|
| 35 |
]
|
| 36 |
|
|
@@ -42,7 +46,8 @@ if torch.cuda.get_device_properties(0).major >= 8:
|
|
| 42 |
torch.backends.cudnn.allow_tf32 = True
|
| 43 |
|
| 44 |
MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
|
| 45 |
-
MASK_GENERATORS =
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
@spaces.GPU
|
|
@@ -56,10 +61,26 @@ def process(
|
|
| 56 |
) -> Optional[Image.Image]:
|
| 57 |
model = MASK_GENERATORS[checkpoint_dropdown]
|
| 58 |
image = np.array(image_input.convert("RGB"))
|
| 59 |
-
|
| 60 |
-
detections = sv.Detections.from_sam(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# TODO return is duplicated untill dounsenn model is implemented
|
| 62 |
-
return MASK_ANNOTATOR.annotate(image_input, detections),
|
| 63 |
|
| 64 |
|
| 65 |
with gr.Blocks() as demo:
|
|
@@ -78,8 +99,8 @@ with gr.Blocks() as demo:
|
|
| 78 |
submit_button_component = gr.Button(value='Submit', variant='primary')
|
| 79 |
object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)] # Set a smaller display size for the object images
|
| 80 |
with gr.Column():
|
| 81 |
-
image_output_sam = gr.Image(type='pil', label='
|
| 82 |
-
image_output_dounseen = gr.Image(type='pil', label='
|
| 83 |
with gr.Row():
|
| 84 |
gr.Examples(
|
| 85 |
fn=process,
|
|
|
|
| 6 |
import supervision as sv
|
| 7 |
import torch
|
| 8 |
from PIL import Image
|
| 9 |
+
import dounseen.utils as dounseen_utils
|
| 10 |
+
import cv2
|
| 11 |
|
| 12 |
+
from utils.models import load_sam2_models, CHECKPOINT_NAMES, MODE_NAMES, \
|
| 13 |
+
MASK_GENERATION_MODE, BOX_PROMPT_MODE, load_dounseen_model
|
| 14 |
|
| 15 |
# TODO add presentation on YouTube and add link here
|
| 16 |
MARKDOWN = """
|
|
|
|
| 25 |
It can be used as an extention to Segment-Anything Model (SAM) or used as a standalone to identify unseen objects.
|
| 26 |
"""
|
| 27 |
EXAMPLES = [
|
| 28 |
+
[
|
| 29 |
+
"tiny",
|
| 30 |
+
0.5,
|
| 31 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/master/demo/rgb_images/000000.png",
|
| 32 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/1.jpg",
|
| 33 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/2.jpg",
|
| 34 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/3.jpg",
|
| 35 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/4.jpg",
|
| 36 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/5.jpg",
|
| 37 |
+
"https://raw.githubusercontent.com/AnasIbrahim/image_agnostic_segmentation/CASE_release/demo/objects_gallery/obj_000001/6.jpg"
|
| 38 |
]
|
| 39 |
]
|
| 40 |
|
|
|
|
| 46 |
torch.backends.cudnn.allow_tf32 = True
|
| 47 |
|
| 48 |
MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
|
| 49 |
+
MASK_GENERATORS = load_sam2_models(device=DEVICE)
|
| 50 |
+
DOUNSEEN_MODEL = load_dounseen_model(device=DEVICE)
|
| 51 |
|
| 52 |
|
| 53 |
@spaces.GPU
|
|
|
|
| 61 |
) -> Optional[Image.Image]:
|
| 62 |
model = MASK_GENERATORS[checkpoint_dropdown]
|
| 63 |
image = np.array(image_input.convert("RGB"))
|
| 64 |
+
sam2_result = model.generate(image)
|
| 65 |
+
detections = sv.Detections.from_sam(sam2_result)
|
| 66 |
+
|
| 67 |
+
# prepare sam2 output for the format expected by DoUnseen
|
| 68 |
+
masks = [ann['segmentation'] for ann in sam2_result]
|
| 69 |
+
bboxes = [ann['bbox'] for ann in sam2_result]
|
| 70 |
+
bboxes = [[int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] for bbox in bboxes]
|
| 71 |
+
# change bboxed from xywh to xyxy
|
| 72 |
+
bboxes = [[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]] for bbox in bboxes]
|
| 73 |
+
segments = dounseen_utils.get_image_segments_from_binary_masks(image, masks, bboxes)
|
| 74 |
+
|
| 75 |
+
gallery_dict = {'obj_000001': [np.array(object_image.convert("RGB")) for object_image in [object_image1, object_image2, object_image3, object_image4, object_image5, object_image6]]}
|
| 76 |
+
DOUNSEEN_MODEL.update_gallery(gallery_dict)
|
| 77 |
+
matched_query, score = DOUNSEEN_MODEL.find_object(segments, obj_name="obj_000001", method="max")
|
| 78 |
+
matched_query_ann_image = dounseen_utils.draw_segmented_image(image, [masks[matched_query]], [bboxes[matched_query]], classes_predictions=[0], classes_names=["obj_000001"])
|
| 79 |
+
# convert to PIL image
|
| 80 |
+
matched_query_ann_image = Image.fromarray(matched_query_ann_image)
|
| 81 |
+
|
| 82 |
# TODO return is duplicated untill dounsenn model is implemented
|
| 83 |
+
return MASK_ANNOTATOR.annotate(image_input, detections), matched_query_ann_image
|
| 84 |
|
| 85 |
|
| 86 |
with gr.Blocks() as demo:
|
|
|
|
| 99 |
submit_button_component = gr.Button(value='Submit', variant='primary')
|
| 100 |
object_images = [gr.Image(type="pil", label=f"Object Image {i + 1}", width=256, height=256) for i in range(6)] # Set a smaller display size for the object images
|
| 101 |
with gr.Column():
|
| 102 |
+
image_output_sam = gr.Image(type='pil', label='SAM2 Output')
|
| 103 |
+
image_output_dounseen = gr.Image(type='pil', label='DoUnseen Output')
|
| 104 |
with gr.Row():
|
| 105 |
gr.Examples(
|
| 106 |
fn=process,
|
models/dounseen/vit_b_16_epoch_199_augment.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74d99be3fe23504da3d9784a0c3c4715abffb1efa59ee0c092a0280170861a61
|
| 3 |
+
size 347498842
|
{checkpoints → models/sam2}/sam2_hiera_base_plus.pt
RENAMED
|
File without changes
|
{checkpoints → models/sam2}/sam2_hiera_large.pt
RENAMED
|
File without changes
|
{checkpoints → models/sam2}/sam2_hiera_small.pt
RENAMED
|
File without changes
|
{checkpoints → models/sam2}/sam2_hiera_tiny.pt
RENAMED
|
File without changes
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ gradio
|
|
| 4 |
supervision
|
| 5 |
gradio_image_prompter
|
| 6 |
opencv-python
|
| 7 |
-
pytest
|
|
|
|
|
|
| 4 |
supervision
|
| 5 |
gradio_image_prompter
|
| 6 |
opencv-python
|
| 7 |
+
pytest
|
| 8 |
+
git+https://github.com/AnasIbrahim/image_agnostic_segmentation.git@CASE_release
|
utils/models.py
CHANGED
|
@@ -4,6 +4,7 @@ import torch
|
|
| 4 |
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
|
| 5 |
from sam2.build_sam import build_sam2
|
| 6 |
from sam2.sam2_image_predictor import SAM2ImagePredictor
|
|
|
|
| 7 |
|
| 8 |
BOX_PROMPT_MODE = "box prompt"
|
| 9 |
MASK_GENERATION_MODE = "mask generation"
|
|
@@ -11,14 +12,14 @@ MODE_NAMES = [BOX_PROMPT_MODE, MASK_GENERATION_MODE]
|
|
| 11 |
|
| 12 |
CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
|
| 13 |
CHECKPOINTS = {
|
| 14 |
-
"tiny": ["sam2_hiera_t.yaml", "
|
| 15 |
-
"small": ["sam2_hiera_s.yaml", "
|
| 16 |
-
"base_plus": ["sam2_hiera_b+.yaml", "
|
| 17 |
-
"large": ["sam2_hiera_l.yaml", "
|
| 18 |
}
|
| 19 |
|
| 20 |
|
| 21 |
-
def
|
| 22 |
device: torch.device
|
| 23 |
) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
|
| 24 |
mask_generators = {}
|
|
@@ -26,12 +27,25 @@ def load_models(
|
|
| 26 |
model = build_sam2(config, checkpoint, device=device)
|
| 27 |
mask_generators[key] = SAM2AutomaticMaskGenerator(
|
| 28 |
model=model,
|
| 29 |
-
points_per_side=
|
| 30 |
points_per_batch=64,
|
| 31 |
pred_iou_thresh=0.7,
|
| 32 |
stability_score_thresh=0.92,
|
| 33 |
stability_score_offset=0.7,
|
| 34 |
-
crop_n_layers=
|
| 35 |
box_nms_thresh=0.7,
|
| 36 |
)
|
| 37 |
return mask_generators
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
|
| 5 |
from sam2.build_sam import build_sam2
|
| 6 |
from sam2.sam2_image_predictor import SAM2ImagePredictor
|
| 7 |
+
from dounseen.core import UnseenClassifier
|
| 8 |
|
| 9 |
BOX_PROMPT_MODE = "box prompt"
|
| 10 |
MASK_GENERATION_MODE = "mask generation"
|
|
|
|
| 12 |
|
| 13 |
CHECKPOINT_NAMES = ["tiny", "small", "base_plus", "large"]
|
| 14 |
CHECKPOINTS = {
|
| 15 |
+
"tiny": ["sam2_hiera_t.yaml", "models/sam2/sam2_hiera_tiny.pt"],
|
| 16 |
+
"small": ["sam2_hiera_s.yaml", "models/sam2/sam2_hiera_small.pt"],
|
| 17 |
+
"base_plus": ["sam2_hiera_b+.yaml", "models/sam2/sam2_hiera_base_plus.pt"],
|
| 18 |
+
"large": ["sam2_hiera_l.yaml", "models/sam2/sam2_hiera_large.pt"],
|
| 19 |
}
|
| 20 |
|
| 21 |
|
| 22 |
+
def load_sam2_models(
|
| 23 |
device: torch.device
|
| 24 |
) -> Tuple[Dict[str, SAM2ImagePredictor], Dict[str, SAM2AutomaticMaskGenerator]]:
|
| 25 |
mask_generators = {}
|
|
|
|
| 27 |
model = build_sam2(config, checkpoint, device=device)
|
| 28 |
mask_generators[key] = SAM2AutomaticMaskGenerator(
|
| 29 |
model=model,
|
| 30 |
+
points_per_side=16,
|
| 31 |
points_per_batch=64,
|
| 32 |
pred_iou_thresh=0.7,
|
| 33 |
stability_score_thresh=0.92,
|
| 34 |
stability_score_offset=0.7,
|
| 35 |
+
crop_n_layers=0,
|
| 36 |
box_nms_thresh=0.7,
|
| 37 |
)
|
| 38 |
return mask_generators
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def load_dounseen_model(
|
| 42 |
+
device: torch.device,
|
| 43 |
+
) -> UnseenClassifier:
|
| 44 |
+
unseen_classifier = UnseenClassifier(
|
| 45 |
+
model_path="models/dounseen/vit_b_16_epoch_199_augment.pth",
|
| 46 |
+
gallery_images=None,
|
| 47 |
+
gallery_buffered_path=None,
|
| 48 |
+
augment_gallery=False,
|
| 49 |
+
batch_size=64,
|
| 50 |
+
)
|
| 51 |
+
return unseen_classifier
|