Change using wrapper. but fail

Browse files

Files changed (5) hide show

.gitignore +1 -0
Makefile +8 -0
load_model.py +45 -0
requirements.txt +3 -0
wrapper.py +41 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

Makefile ADDED Viewed

	@@ -0,0 +1,8 @@

+PYTHON=3.9
+BASENAME=sam-vit-h-encoder-torchscript
+env:
+	conda create -n $(BASENAME)  python=$(PYTHON) -y
+setup:
+	pip install -r requirements.txt

load_model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+import urllib
+import torch
+from segment_anything import sam_model_registry
+from segment_anything.modeling import Sam
+from wrapper import ImageEncoderViTWrapper
+CHECKPOINT_PATH = os.path.join(os.path.expanduser("~"), ".cache", "SAM")
+CHECKPOINT_NAME = "sam_vit_h_4b8939.pth"
+CHECKPOINT_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
+MODEL_TYPE = "default"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_model(
+    checkpoint_path: str = CHECKPOINT_PATH,
+    checkpoint_name: str = CHECKPOINT_NAME,
+    checkpoint_url: str = CHECKPOINT_URL,
+    model_type: str = MODEL_TYPE,
+) -> Sam:
+    if not os.path.exists(checkpoint_path):
+        os.makedirs(checkpoint_path)
+    checkpoint = os.path.join(checkpoint_path, checkpoint_name)
+    if not os.path.exists(checkpoint):
+        print("Downloading the model weights...")
+        urllib.request.urlretrieve(checkpoint_url, checkpoint)
+        print(f"The model weights saved as {checkpoint}")
+    print(f"Load the model weights from {checkpoint}")
+    return sam_model_registry[model_type](checkpoint=checkpoint)
+if __name__ == "__main__":
+    # model = load_model().image_encoder.eval().to(device)
+    image_encoder = load_model().image_encoder
+    print(type(image_encoder))
+    image_encoder_wrapper = ImageEncoderViTWrapper(image_encoder).eval().to(device)
+    image_encoder_wrapper.change_block()
+    print(type(image_encoder_wrapper.image_encoder.blocks[0]))
+    with torch.jit.optimized_execution(True):
+        script_model = torch.jit.script(image_encoder_wrapper)
+    script_model.save("model.pt")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch                     == 2.0.0
+torchvision               == 0.15.1
+git+https://github.com/facebookresearch/segment-anything.git

wrapper.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import torch.nn as nn
+from segment_anything.modeling import ImageEncoderViT
+from segment_anything.modeling.image_encoder import Block, window_partition, window_unpartition
+class BlockWrapper(nn.Module):
+    def __init__(self, block: Block):
+        super().__init__()
+        self.block = block
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        shortcut = x
+        x = self.block.norm1(x)
+        # Window partition
+        if self.block.window_size > 0:
+            H, W = x.shape[1], x.shape[2]
+            x, pad_hw = window_partition(x, self.block.window_size)
+            x = self.block.attn(x)
+            # Reverse window partition
+            x = window_unpartition(x, self.block.window_size, pad_hw, (H, W))
+        else:
+            x = self.block.attn(x)
+        x = shortcut + x
+        x = x + self.block.mlp(self.block.norm2(x))
+        return x
+class ImageEncoderViTWrapper(nn.Module):
+    def __init__(self, image_encoder: ImageEncoderViT):
+        super().__init__()
+        self.image_encoder = image_encoder
+    def change_block(self):
+        block_wrappers = nn.ModuleList()
+        for block in self.image_encoder.blocks:
+            block_wrappers.append(BlockWrapper(block))
+        self.image_encoder.blocks = block_wrappers