mart9992 commited on Jan 5, 2024

Commit

062a1ef

1 Parent(s): b93bdbf

m

Browse files

Files changed (23) hide show

grounded_sam_demo.py +51 -159
segment_anything/segment_anything.egg-info/PKG-INFO +15 -0
segment_anything/segment_anything.egg-info/SOURCES.txt +26 -0
segment_anything/segment_anything.egg-info/dependency_links.txt +1 -0
segment_anything/segment_anything.egg-info/requires.txt +13 -0
segment_anything/segment_anything.egg-info/top_level.txt +1 -0
segment_anything/segment_anything/__pycache__/__init__.cpython-310.pyc +0 -0
segment_anything/segment_anything/__pycache__/automatic_mask_generator.cpython-310.pyc +0 -0
segment_anything/segment_anything/__pycache__/build_sam.cpython-310.pyc +0 -0
segment_anything/segment_anything/__pycache__/build_sam_hq.cpython-310.pyc +0 -0
segment_anything/segment_anything/__pycache__/predictor.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/__init__.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/common.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/image_encoder.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/mask_decoder.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/mask_decoder_hq.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/prompt_encoder.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/sam.cpython-310.pyc +0 -0
segment_anything/segment_anything/modeling/__pycache__/transformer.cpython-310.pyc +0 -0
segment_anything/segment_anything/utils/__pycache__/__init__.cpython-310.pyc +0 -0
segment_anything/segment_anything/utils/__pycache__/amg.cpython-310.pyc +0 -0
segment_anything/segment_anything/utils/__pycache__/transforms.cpython-310.pyc +0 -0
test.py +16 -12

grounded_sam_demo.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import argparse
 import os
 import copy
@@ -16,8 +17,8 @@ from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases
 # segment anything
 from segment_anything import (
-    sam_model_registry,
-    sam_hq_model_registry,
     SamPredictor
 )
 import cv2
@@ -25,27 +26,13 @@ import numpy as np
 import matplotlib.pyplot as plt
-def load_image(image_path):
-    # load image
-    image_pil = Image.open(image_path).convert("RGB")  # load image
-    transform = T.Compose(
-        [
-            T.RandomResize([800], max_size=1333),
-            T.ToTensor(),
-            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
-        ]
-    )
-    image, _ = transform(image_pil, None)  # 3, h, w
-    return image_pil, image
 def load_model(model_config_path, model_checkpoint_path, device):
     args = SLConfig.fromfile(model_config_path)
     args.device = device
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
-    load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
     print(load_res)
     _ = model.eval()
     return model
@@ -72,136 +59,38 @@ def get_grounding_output(model, image, caption, box_threshold, text_threshold, w
     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
     logits_filt.shape[0]
-    # get phrase
-    tokenlizer = model.tokenizer
-    tokenized = tokenlizer(caption)
-    # build pred
-    pred_phrases = []
-    for logit, box in zip(logits_filt, boxes_filt):
-        pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
-        if with_logits:
-            pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
-        else:
-            pred_phrases.append(pred_phrase)
-    return boxes_filt, pred_phrases
-def show_mask(mask, ax, random_color=False):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        color = np.array([30/255, 144/255, 255/255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    ax.imshow(mask_image)
-def show_box(box, ax, label):
-    x0, y0 = box[0], box[1]
-    w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
-    ax.text(x0, y0, label)
-def save_mask_data(output_dir, mask_list, box_list, label_list):
-    value = 0  # 0 for background
-    mask_img = torch.zeros(mask_list.shape[-2:])
-    for idx, mask in enumerate(mask_list):
-        mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
-    plt.figure(figsize=(10, 10))
-    plt.imshow(mask_img.numpy())
-    plt.axis('off')
-    plt.savefig(os.path.join(output_dir, 'mask.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0)
-    json_data = [{
-        'value': value,
-        'label': 'background'
-    }]
-    for label, box in zip(label_list, box_list):
-        value += 1
-        name, logit = label.split('(')
-        logit = logit[:-1] # the last is ')'
-        json_data.append({
-            'value': value,
-            'label': name,
-            'logit': float(logit),
-            'box': box.numpy().tolist(),
-        })
-    with open(os.path.join(output_dir, 'mask.json'), 'w') as f:
-        json.dump(json_data, f)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounded-Segment-Anything Demo", add_help=True)
-    parser.add_argument("--config", type=str, required=True, help="path to config file")
-    parser.add_argument(
-        "--grounded_checkpoint", type=str, required=True, help="path to checkpoint file"
-    )
-    parser.add_argument(
-        "--sam_version", type=str, default="vit_h", required=False, help="SAM ViT version: vit_b / vit_l / vit_h"
-    )
-    parser.add_argument(
-        "--sam_checkpoint", type=str, required=False, help="path to sam checkpoint file"
-    )
-    parser.add_argument(
-        "--sam_hq_checkpoint", type=str, default=None, help="path to sam-hq checkpoint file"
-    )
-    parser.add_argument(
-        "--use_sam_hq", action="store_true", help="using sam-hq for prediction"
-    )
-    parser.add_argument("--input_image", type=str, required=True, help="path to image file")
-    parser.add_argument("--text_prompt", type=str, required=True, help="text prompt")
-    parser.add_argument(
-        "--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
-    )
-    parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
-    parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
-    parser.add_argument("--device", type=str, default="cpu", help="running on cpu only!, default=False")
-    args = parser.parse_args()
-    # cfg
-    config_file = args.config  # change the path of the model config file
-    grounded_checkpoint = args.grounded_checkpoint  # change the path of the model
-    sam_version = args.sam_version
-    sam_checkpoint = args.sam_checkpoint
-    sam_hq_checkpoint = args.sam_hq_checkpoint
-    use_sam_hq = args.use_sam_hq
-    image_path = args.input_image
-    text_prompt = args.text_prompt
-    output_dir = args.output_dir
-    box_threshold = args.box_threshold
-    text_threshold = args.text_threshold
-    device = args.device
-    # make dir
-    os.makedirs(output_dir, exist_ok=True)
-    # load image
-    image_pil, image = load_image(image_path)
-    # load model
-    model = load_model(config_file, grounded_checkpoint, device=device)
-    # visualize raw image
-    image_pil.save(os.path.join(output_dir, "raw_image.jpg"))
-    # run grounding dino model
-    boxes_filt, pred_phrases = get_grounding_output(
-        model, image, text_prompt, box_threshold, text_threshold, device=device
-    )
-    # initialize SAM
-    if use_sam_hq:
-        predictor = SamPredictor(sam_hq_model_registry[sam_version](checkpoint=sam_hq_checkpoint).to(device))
-    else:
-        predictor = SamPredictor(sam_model_registry[sam_version](checkpoint=sam_checkpoint).to(device))
-    image = cv2.imread(image_path)
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
-    size = image_pil.size
     H, W = size[1], size[0]
     for i in range(boxes_filt.size(0)):
         boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
@@ -209,27 +98,30 @@ if __name__ == "__main__":
         boxes_filt[i][2:] += boxes_filt[i][:2]
     boxes_filt = boxes_filt.cpu()
-    transformed_boxes = predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = predictor.predict_torch(
-        point_coords = None,
-        point_labels = None,
-        boxes = transformed_boxes.to(device),
-        multimask_output = False,
     )
-    # draw output image
-    plt.figure(figsize=(10, 10))
-    plt.imshow(image)
-    for mask in masks:
-        show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
-    for box, label in zip(boxes_filt, pred_phrases):
-        show_box(box.numpy(), plt.gca(), label)
     plt.axis('off')
-    plt.savefig(
-        os.path.join(output_dir, "grounded_sam_output.jpg"),
-        bbox_inches="tight", dpi=300, pad_inches=0.0
-    )
-    save_mask_data(output_dir, masks, boxes_filt, pred_phrases)

+from GroundingDINO.groundingdino.datasets.transforms import Compose, RandomResize, ToTensor, Normalize
+from io import BytesIO
 import os
 import copy
 # segment anything
 from segment_anything import (
+    build_sam,
+    build_sam_hq,
     SamPredictor
 )
 import cv2
 import matplotlib.pyplot as plt
 def load_model(model_config_path, model_checkpoint_path, device):
     args = SLConfig.fromfile(model_config_path)
     args.device = device
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
+    load_res = model.load_state_dict(
+        clean_state_dict(checkpoint["model"]), strict=False)
     print(load_res)
     _ = model.eval()
     return model
     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
     logits_filt.shape[0]
+    return boxes_filt
+def grounded_sam_demo(input_pil, config_file, grounded_checkpoint, sam_checkpoint,
+                      text_prompt, box_threshold=0.3, text_threshold=0.25,
+                      device="cuda"):
+    # Convert PIL image to tensor with normalization
+    transform = Compose([
+        RandomResize([800], max_size=1333),
+        ToTensor(),
+        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+    ])
+    if input_pil.mode != "RGB":
+        input_pil = input_pil.convert("RGB")
+    image, _ = transform(input_pil, None)
+    # Load model
+    model = load_model(config_file, grounded_checkpoint, device=device)
+    # Get grounding dino model output
+    boxes_filt = get_grounding_output(
+        model, image, text_prompt, box_threshold, text_threshold, device=device)
+    # Initialize SAM
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
+    image = cv2.cvtColor(np.array(input_pil), cv2.COLOR_RGB2BGR)
     predictor.set_image(image)
+    size = input_pil.size
     H, W = size[1], size[0]
     for i in range(boxes_filt.size(0)):
         boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
         boxes_filt[i][2:] += boxes_filt[i][:2]
     boxes_filt = boxes_filt.cpu()
+    transformed_boxes = predictor.transform.apply_boxes_torch(
+        boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = predictor.predict_torch(
+        point_coords=None,
+        point_labels=None,
+        boxes=transformed_boxes.to(device),
+        multimask_output=False,
     )
+    # Create mask image
+    value = 0  # 0 for background
+    mask_img = torch.zeros(masks.shape[-2:])
+    for idx, mask in enumerate(masks):
+        mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
+    fig = plt.figure(figsize=(10, 10))
+    plt.imshow(mask_img.numpy())
     plt.axis('off')
+    buf = BytesIO()
+    plt.savefig(buf, format='png', bbox_inches="tight",
+                dpi=300, pad_inches=0.0)
+    buf.seek(0)
+    out_pil = Image.open(buf)
+    return out_pil

segment_anything/segment_anything.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,15 @@

+Metadata-Version: 2.1
+Name: segment-anything
+Version: 1.0
+License-File: LICENSE
+Provides-Extra: all
+Requires-Dist: matplotlib; extra == "all"
+Requires-Dist: pycocotools; extra == "all"
+Requires-Dist: opencv-python; extra == "all"
+Requires-Dist: onnx; extra == "all"
+Requires-Dist: onnxruntime; extra == "all"
+Provides-Extra: dev
+Requires-Dist: flake8; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: mypy; extra == "dev"

segment_anything/segment_anything.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+LICENSE
+README.md
+setup.cfg
+setup.py
+segment_anything/__init__.py
+segment_anything/automatic_mask_generator.py
+segment_anything/build_sam.py
+segment_anything/build_sam_hq.py
+segment_anything/predictor.py
+segment_anything.egg-info/PKG-INFO
+segment_anything.egg-info/SOURCES.txt
+segment_anything.egg-info/dependency_links.txt
+segment_anything.egg-info/requires.txt
+segment_anything.egg-info/top_level.txt
+segment_anything/modeling/__init__.py
+segment_anything/modeling/common.py
+segment_anything/modeling/image_encoder.py
+segment_anything/modeling/mask_decoder.py
+segment_anything/modeling/mask_decoder_hq.py
+segment_anything/modeling/prompt_encoder.py
+segment_anything/modeling/sam.py
+segment_anything/modeling/transformer.py
+segment_anything/utils/__init__.py
+segment_anything/utils/amg.py
+segment_anything/utils/onnx.py
+segment_anything/utils/transforms.py

segment_anything/segment_anything.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

segment_anything/segment_anything.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+[all]
+matplotlib
+pycocotools
+opencv-python
+onnx
+onnxruntime
+[dev]
+flake8
+isort
+black
+mypy

segment_anything/segment_anything.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ segment_anything

segment_anything/segment_anything/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (572 Bytes). View file

segment_anything/segment_anything/__pycache__/automatic_mask_generator.cpython-310.pyc ADDED Viewed

Binary file (11.4 kB). View file

segment_anything/segment_anything/__pycache__/build_sam.cpython-310.pyc ADDED Viewed

Binary file (2.16 kB). View file

segment_anything/segment_anything/__pycache__/build_sam_hq.cpython-310.pyc ADDED Viewed

Binary file (2.41 kB). View file

segment_anything/segment_anything/__pycache__/predictor.cpython-310.pyc ADDED Viewed

Binary file (10.1 kB). View file

segment_anything/segment_anything/modeling/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (458 Bytes). View file

segment_anything/segment_anything/modeling/__pycache__/common.cpython-310.pyc ADDED Viewed

Binary file (1.76 kB). View file

segment_anything/segment_anything/modeling/__pycache__/image_encoder.cpython-310.pyc ADDED Viewed

Binary file (12.7 kB). View file

segment_anything/segment_anything/modeling/__pycache__/mask_decoder.cpython-310.pyc ADDED Viewed

Binary file (5.54 kB). View file

segment_anything/segment_anything/modeling/__pycache__/mask_decoder_hq.cpython-310.pyc ADDED Viewed

Binary file (6.62 kB). View file

segment_anything/segment_anything/modeling/__pycache__/prompt_encoder.cpython-310.pyc ADDED Viewed

Binary file (7.69 kB). View file

segment_anything/segment_anything/modeling/__pycache__/sam.cpython-310.pyc ADDED Viewed

Binary file (6.67 kB). View file

segment_anything/segment_anything/modeling/__pycache__/transformer.cpython-310.pyc ADDED Viewed

Binary file (6.61 kB). View file

segment_anything/segment_anything/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (165 Bytes). View file

segment_anything/segment_anything/utils/__pycache__/amg.cpython-310.pyc ADDED Viewed

Binary file (12.1 kB). View file

segment_anything/segment_anything/utils/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (3.94 kB). View file

test.py CHANGED Viewed

@@ -4,33 +4,37 @@ import torch
 import requests
 from PIL import Image
 from io import BytesIO
-is_production = True
 os.chdir("/repository" if is_production else ".")
 os.environ['AM_I_DOCKER'] = 'False'
 os.environ['BUILD_WITH_CUDA'] = 'True'
 os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7/' if is_production else '/usr/local/cuda-12.1/'
-# Install Segment Anything
-subprocess.run(["python", "-m", "pip", "install", "-e", "segment_anything"])
-# Install Grounding DINO
-subprocess.run(["python", "-m", "pip", "install", "-e", "GroundingDINO"])
-subprocess.run("wget https://huggingface.co/Uminosachi/sam-hq/resolve/main/sam_hq_vit_h.pth -O ./sam_hq_vit_h.pth", shell=True)
-# Install diffusers
-subprocess.run(["pip", "install", "--upgrade", "diffusers[torch]"])
-# Install osx
 subprocess.run(["git", "submodule", "update", "--init", "--recursive"])
 subprocess.run(["bash", "grounded-sam-osx/install.sh"], cwd="grounded-sam-osx")
-# Install RAM & Tag2Text
 subprocess.run(["git", "clone", "https://github.com/xinyu1205/recognize-anything.git"])
-subprocess.run(["pip", "install", "-r", "./recognize-anything/requirements.txt"])
-subprocess.run(["pip", "install", "-e", "./recognize-anything/"])
 from grounded_sam_demo import grounded_sam_demo
 import numpy as np

 import requests
 from PIL import Image
 from io import BytesIO
+import subprocess
+import sys
+def pip_command(command):
+    subprocess.check_call([sys.executable, "-m", "pip"] + command.split())
+is_production = False
 os.chdir("/repository" if is_production else ".")
 os.environ['AM_I_DOCKER'] = 'False'
 os.environ['BUILD_WITH_CUDA'] = 'True'
 os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7/' if is_production else '/usr/local/cuda-12.1/'
+pip_command("install -e segment_anything")
+pip_command("install -e GroundingDINO")
+response = requests.get("https://huggingface.co/Uminosachi/sam-hq/resolve/main/sam_hq_vit_h.pth")
+with open('./sam_hq_vit_h.pth', 'wb') as file:
+    file.write(response.content)
+pip_command("install --upgrade diffusers[torch]")
 subprocess.run(["git", "submodule", "update", "--init", "--recursive"])
 subprocess.run(["bash", "grounded-sam-osx/install.sh"], cwd="grounded-sam-osx")
 subprocess.run(["git", "clone", "https://github.com/xinyu1205/recognize-anything.git"])
+pip_command("install -r ./recognize-anything/requirements.txt")
+pip_command("install -e ./recognize-anything/")
 from grounded_sam_demo import grounded_sam_demo
 import numpy as np