wli1995 commited on
Commit
c183d33
·
verified ·
1 Parent(s): 7e1afbf

Upload folder using huggingface_hub

Browse files
Files changed (41) hide show
  1. .gitattributes +12 -0
  2. README.md +46 -3
  3. ax_model/mobile_sam_decoder_620E.axmodel +3 -0
  4. ax_model/mobile_sam_decoder_650.axmodel +3 -0
  5. ax_model/mobile_sam_encoder_620E.axmodel +3 -0
  6. ax_model/mobile_sam_encoder_650.axmodel +3 -0
  7. config.json +0 -0
  8. images/box_mask_box_0.jpg +0 -0
  9. images/box_mask_box_1.jpg +0 -0
  10. images/box_mask_box_2.jpg +0 -0
  11. images/box_mask_box_3.jpg +0 -0
  12. images/box_mask_ovlap_box_0.jpg +3 -0
  13. images/box_mask_ovlap_box_1.jpg +3 -0
  14. images/box_mask_ovlap_box_2.jpg +3 -0
  15. images/box_mask_ovlap_box_3.jpg +3 -0
  16. images/car.jpg +3 -0
  17. images/point_mask_ovlap_point_0.jpg +3 -0
  18. images/point_mask_ovlap_point_1.jpg +3 -0
  19. images/point_mask_ovlap_point_2.jpg +3 -0
  20. images/point_mask_point_0.jpg +0 -0
  21. images/point_mask_point_1.jpg +0 -0
  22. images/point_mask_point_2.jpg +0 -0
  23. images/test.jpg +3 -0
  24. images/truck.jpg +3 -0
  25. onnx/mobile_sam_decoder_slim.onnx +3 -0
  26. onnx/mobile_sam_encoder.onnx +3 -0
  27. python_ax/__pycache__/sam_decoder.cpython-312.pyc +0 -0
  28. python_ax/__pycache__/sam_decoder.cpython-313.pyc +0 -0
  29. python_ax/__pycache__/sam_encoder.cpython-312.pyc +0 -0
  30. python_ax/__pycache__/sam_encoder.cpython-313.pyc +0 -0
  31. python_ax/main.py +99 -0
  32. python_ax/sam_decoder.py +40 -0
  33. python_ax/sam_encoder.py +67 -0
  34. python_onnx/__pycache__/sam_decoder.cpython-312.pyc +0 -0
  35. python_onnx/__pycache__/sam_decoder.cpython-313.pyc +0 -0
  36. python_onnx/__pycache__/sam_encoder.cpython-312.pyc +0 -0
  37. python_onnx/__pycache__/sam_encoder.cpython-313.pyc +0 -0
  38. python_onnx/main.py +70 -0
  39. python_onnx/sam_decoder.py +38 -0
  40. python_onnx/sam_encoder.py +62 -0
  41. requirements.txt +2 -0
.gitattributes CHANGED
@@ -35,3 +35,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  ax_model/mobile_sam_decoder_620E.axmodel filter=lfs diff=lfs merge=lfs -text
37
  ax_model/mobile_sam_decoder_650.axmodel filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  ax_model/mobile_sam_decoder_620E.axmodel filter=lfs diff=lfs merge=lfs -text
37
  ax_model/mobile_sam_decoder_650.axmodel filter=lfs diff=lfs merge=lfs -text
38
+ ax_model/mobile_sam_encoder_620E.axmodel filter=lfs diff=lfs merge=lfs -text
39
+ ax_model/mobile_sam_encoder_650.axmodel filter=lfs diff=lfs merge=lfs -text
40
+ images/box_mask_ovlap_box_0.jpg filter=lfs diff=lfs merge=lfs -text
41
+ images/box_mask_ovlap_box_1.jpg filter=lfs diff=lfs merge=lfs -text
42
+ images/box_mask_ovlap_box_2.jpg filter=lfs diff=lfs merge=lfs -text
43
+ images/box_mask_ovlap_box_3.jpg filter=lfs diff=lfs merge=lfs -text
44
+ images/car.jpg filter=lfs diff=lfs merge=lfs -text
45
+ images/point_mask_ovlap_point_0.jpg filter=lfs diff=lfs merge=lfs -text
46
+ images/point_mask_ovlap_point_1.jpg filter=lfs diff=lfs merge=lfs -text
47
+ images/point_mask_ovlap_point_2.jpg filter=lfs diff=lfs merge=lfs -text
48
+ images/test.jpg filter=lfs diff=lfs merge=lfs -text
49
+ images/truck.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,46 @@
1
- ---
2
- license: afl-3.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MobileSAM
2
+ 基于MobileSAM的图像分割Pipeline,支持多种输入提示(框、点、掩码),支持650N/620E系列平台的模型推理。
3
+
4
+ 支持芯片:
5
+ - AX650N
6
+ - AX620E
7
+ - AX630C
8
+
9
+
10
+ 支持硬件
11
+
12
+ - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
13
+ - [M.2 Accelerator card](https://docs.m5stack.com/zh_CN/ai_hardware/LLM-8850_Card)
14
+
15
+ 原始模型请参考
16
+ - [MobileSAM Github](https://github.com/ChaoningZhang/MobileSAM)
17
+
18
+ ## 性能对比
19
+
20
+ - 输入图片大小 1024x1024
21
+
22
+ |Chip| Models | Latency (ms) | CMM Usage (MiB) |
23
+ |----| --------------------- | ---------------------- | -------------- |
24
+ |650N| mobile_sam_encoder |49.495 | 48.334 |
25
+ |650N| mobile_sam_encoder |520.044 | 63.231 |
26
+ |620E| mobile_sam_decoder |9.930 | 16.703 |
27
+ |620E| mobile_sam_decoder |36.382 | 14.970 |
28
+
29
+ ## 模型转换
30
+ - 模型转换工具链[Pulsar2](https://huggingface.co/AXERA-TECH/Pulsar2)
31
+ - 转换文档[Model Convert](https://github.com/AXERA-TECH/MobileSAM.axera/tree/master/convert)
32
+
33
+ ## 环境准备
34
+ - NPU Python API: [pyaxengine](https://github.com/AXERA-TECH/pyaxengine)
35
+
36
+ 安装需要的python库
37
+ ```pip install -r requirements.txt```
38
+
39
+ ## 运行
40
+ ```shell
41
+ cd python_ax
42
+ python3 main.py -i ../images/test.jpg -c 650
43
+ ```
44
+ output:
45
+ ![point](./images/point_mask_ovlap_point_1.jpg)
46
+ ![box](./images/box_mask_ovlap_box_1.jpg)
ax_model/mobile_sam_decoder_620E.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f968ca9f3cb313de45cc8519b62dcae2c834613f58abce85a14f8d7cd8cc7fd
3
+ size 7049493
ax_model/mobile_sam_decoder_650.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa87a19cfa3a95518d789cc1e5684b1d8d3c8d19b4e1e8155de5b5f5627020e
3
+ size 7031379
ax_model/mobile_sam_encoder_620E.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e941235ea497f63d91176cb6dfe856e52ccfcf6ec26157d75bcbd43aa31af851
3
+ size 25503415
ax_model/mobile_sam_encoder_650.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ad4b13478d7adfb6db44661933e5273207dfb15c1a74588fda860944501f9f
3
+ size 16467039
config.json ADDED
File without changes
images/box_mask_box_0.jpg ADDED
images/box_mask_box_1.jpg ADDED
images/box_mask_box_2.jpg ADDED
images/box_mask_box_3.jpg ADDED
images/box_mask_ovlap_box_0.jpg ADDED

Git LFS Details

  • SHA256: 197f10e18d13510b96fe6da74b1a1d2197027701e0099b24c0dd8c1340a848f1
  • Pointer size: 131 Bytes
  • Size of remote file: 585 kB
images/box_mask_ovlap_box_1.jpg ADDED

Git LFS Details

  • SHA256: be1b8f90e503f2e955a88f3a48a2d68b064c3fd3a5dd0bf23f746c186b8f80ac
  • Pointer size: 131 Bytes
  • Size of remote file: 579 kB
images/box_mask_ovlap_box_2.jpg ADDED

Git LFS Details

  • SHA256: de277674b1b4e00b9796a0003b6efede6bd87bae13425dcd6a234e8cfea05c9d
  • Pointer size: 131 Bytes
  • Size of remote file: 583 kB
images/box_mask_ovlap_box_3.jpg ADDED

Git LFS Details

  • SHA256: 516eb00ad16bd4f89dd3441da2f794795adce94d3a957b8c6ee3b99671b032e5
  • Pointer size: 131 Bytes
  • Size of remote file: 581 kB
images/car.jpg ADDED

Git LFS Details

  • SHA256: 7073dfecb5a3ecafb6152124113163a0ea1c1c70f92999ec892b519eca63e3d3
  • Pointer size: 131 Bytes
  • Size of remote file: 168 kB
images/point_mask_ovlap_point_0.jpg ADDED

Git LFS Details

  • SHA256: f35318a3746d83e2a66ec33c30f96ec704f8ba4b7f78bdf094b64913551bc9df
  • Pointer size: 131 Bytes
  • Size of remote file: 579 kB
images/point_mask_ovlap_point_1.jpg ADDED

Git LFS Details

  • SHA256: 62b75267e082fc50c207ea66c8001f48f083010eb5f3c8190c73e29272d8c252
  • Pointer size: 131 Bytes
  • Size of remote file: 579 kB
images/point_mask_ovlap_point_2.jpg ADDED

Git LFS Details

  • SHA256: 28d8fb43f9de17cdfc71456daffa9a4a1ed134cfb5c0022bf1beedfd4fb15566
  • Pointer size: 131 Bytes
  • Size of remote file: 572 kB
images/point_mask_point_0.jpg ADDED
images/point_mask_point_1.jpg ADDED
images/point_mask_point_2.jpg ADDED
images/test.jpg ADDED

Git LFS Details

  • SHA256: e7c4b752ef447bfec409888cea8709be15c01d0f6bf91bd16b7762deb90950dc
  • Pointer size: 131 Bytes
  • Size of remote file: 325 kB
images/truck.jpg ADDED

Git LFS Details

  • SHA256: 941715e721c8864324a1425b445ea4dde0498b995c45ddce0141a58971c6ff99
  • Pointer size: 131 Bytes
  • Size of remote file: 271 kB
onnx/mobile_sam_decoder_slim.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7587d54e1df69e90573ad8b5672686e7ae8a295761fd3fd5d7ea45710ff92bb
3
+ size 20593937
onnx/mobile_sam_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e35f2d687beafe0e327d4197ba015bca22a73142c65899572278d04fbecb149
3
+ size 27996273
python_ax/__pycache__/sam_decoder.cpython-312.pyc ADDED
Binary file (3.37 kB). View file
 
python_ax/__pycache__/sam_decoder.cpython-313.pyc ADDED
Binary file (3.49 kB). View file
 
python_ax/__pycache__/sam_encoder.cpython-312.pyc ADDED
Binary file (3.32 kB). View file
 
python_ax/__pycache__/sam_encoder.cpython-313.pyc ADDED
Binary file (3.27 kB). View file
 
python_ax/main.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sam_encoder import SAMEncoder
2
+ from sam_decoder import SAMDecoder
3
+ import cv2
4
+ import numpy as np
5
+ import argparse
6
+ import os
7
+
8
+ if __name__ == "__main__":
9
+
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("--img_path", "-i", type=str, default="../images/test.jpg", help="input image path")
12
+ parser.add_argument("--output_dir", "-o", type=str, default="./output", help="result path")
13
+ parser.add_argument("--chip", "-c", type=str, default="650", help="650 or 620E")
14
+ args = parser.parse_args()
15
+
16
+ encoder = SAMEncoder(f"../ax_model/mobile_sam_encoder_{args.chip}.axmodel")
17
+ decoder = SAMDecoder(f"../ax_model/mobile_sam_decoder_{args.chip}.axmodel")
18
+
19
+ image = cv2.imread(args.img_path)
20
+ h, w, _ = image.shape
21
+ image_embedding, scale = encoder.encode(image)
22
+
23
+ # test.jpg
24
+ point0 = (910, 641)
25
+ point1 = (1488, 607)
26
+ point2 = (579, 704)
27
+ # truck.jpg
28
+ # point0 = (500, 375)
29
+ os.makedirs(args.output_dir, exist_ok=True)
30
+
31
+ for i, point in enumerate([point0, point1, point2]):
32
+ image_draw = image.copy()
33
+
34
+ output = decoder.decode(image_embedding[0], point = point,scale = scale)
35
+ idx = output[0].argmax()
36
+
37
+ image_draw = cv2.circle(image_draw, (int(point[0]), int(point[1])), 10, (0,255,0), -1)
38
+ mask = output[1][:,idx,:,:][0]
39
+ mask_mat = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
40
+ mask_mat[mask>0] = 255
41
+ mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)),interpolation=cv2.INTER_LINEAR)
42
+ mask_mat = mask_mat[:h, :w]
43
+ cv2.imwrite(f"{args.output_dir}/point_mask_point_{i}.jpg", mask_mat)
44
+ mask_ovlap = np.zeros((mask_mat.shape[0], mask_mat.shape[1], 3), dtype=np.uint8)
45
+ mask_ovlap[mask_mat>0] = [0, 255, 0]
46
+ image_ovlap = cv2.addWeighted(image_draw, 1, mask_ovlap, 0.5, 0)
47
+ cv2.imwrite(f"{args.output_dir}/point_mask_ovlap_point_{i}.jpg", image_ovlap)
48
+
49
+ # for i in range(4):
50
+ # mask = output[1][:,i,:,:][0]
51
+ # mask_mat = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
52
+ # mask_mat[mask>0] = 255
53
+ # mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)))
54
+ # mask_mat = mask_mat[:h, :w,:]
55
+ # cv2.imwrite(f"./output_ax/point_mask_{i}.jpg", mask_mat)
56
+
57
+ # box: topleft x, topleft y, width, height
58
+ # test.jpg
59
+ box0 = (910 - 160, 641 - 430, 380, 940)
60
+ box1 = (479, 482, 191, 518)
61
+ box2 = (1345, 333, 289, 701)
62
+ box3 = (1, 357, 311, 751)
63
+ # truck.jpg
64
+ # box0 = (1375, 550, 1650 - 1375, 800 - 550)
65
+ # box1 = (75, 275, 1725 - 75, 850 - 275)
66
+ # box2 = (425, 600, 700 - 425, 875 - 600)
67
+ # box3 = (1240, 675, 1400 - 1240, 750 - 675)
68
+ # car.jpg
69
+ # box0 = (450, 170, 520 - 450, 350 - 170)
70
+ # box1 = (350, 190, 450 - 350, 350 - 190)
71
+ # box2 = (500, 170, 580 - 500, 350 - 170)
72
+ # box3 = (580, 170, 640 - 580, 350 - 170)
73
+ for i, box in enumerate([box0, box1, box2, box3]):
74
+ image_draw = image.copy()
75
+ output = decoder.decode(image_embedding[0], box = box,scale = scale)
76
+ idx = output[0].argmax()
77
+
78
+ image_draw = cv2.rectangle(image_draw, (int(box[0]), int(box[1])), (int(box[0]+box[2]), int(box[1]+box[3])), (0,255,0), 2)
79
+ # cv2.imwrite(f"{args.output_dir}/box_image_{i}.jpg", image)
80
+
81
+ mask = output[1][:,idx,:,:][0]
82
+ mask_mat = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
83
+ mask_mat[mask>0] = 255
84
+ mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)),interpolation=cv2.INTER_LINEAR)
85
+ mask_mat = mask_mat[:h, :w]
86
+ cv2.imwrite(f"{args.output_dir}/box_mask_box_{i}.jpg", mask_mat)
87
+ mask_ovlap = np.zeros((mask_mat.shape[0], mask_mat.shape[1], 3), dtype=np.uint8)
88
+ mask_ovlap[mask_mat>0] = [0, 255, 0]
89
+ image_ovlap = cv2.addWeighted(image_draw, 1, mask_ovlap, 0.5, 0)
90
+ cv2.imwrite(f"{args.output_dir}/box_mask_ovlap_box_{i}.jpg", image_ovlap)
91
+
92
+ # for i in range(4):
93
+ # mask = output[1][:,i,:,:][0]
94
+ # mask_mat = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
95
+ # mask_mat[mask>0] = 255
96
+ # mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)))
97
+ # mask_mat = mask_mat[:h, :w,:]
98
+ # cv2.imwrite(f"./output_ax/box_mask_{i}.jpg", mask_mat)
99
+
python_ax/sam_decoder.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import axengine as axe
3
+
4
+
5
+ class SAMDecoder:
6
+
7
+ def __init__(self, model_path):
8
+ self.sess = axe.InferenceSession(model_path)
9
+ for input in self.sess.get_inputs():
10
+ print(input.name, input.shape)
11
+ for output in self.sess.get_outputs():
12
+ print(output.name, output.shape)
13
+
14
+ self.mask = np.zeros((1, 1, 256, 256), np.float32)
15
+ self.has_mask = np.array([0], np.float32)
16
+
17
+ def decode(self, image_embedding, point = None, box = None, scale = None):
18
+ if point is not None:
19
+ point = np.array(point).astype(np.float32) * scale
20
+ point_coords = np.array([point, (0,0), (0,0), (0,0), (0,0)]).astype(np.float32).reshape((1, -1, 2))
21
+ point_labels = np.array([1, 0, 0, 0, 0], np.float32).reshape((1, -1))
22
+ elif box is not None:
23
+ box = np.array(box).astype(np.float32)*scale
24
+ x, y, w, h = box
25
+ center = np.array([x + w/2, y + h/2], np.float32)
26
+ topleft = np.array([x, y], np.float32)
27
+ bottomright = np.array([x + w, y + h], np.float32)
28
+ point_coords = np.array([center, topleft, bottomright, (0,0), (0,0)]).astype(np.float32).reshape((1, -1, 2))
29
+ point_labels = np.array([1, 2, 3, 0, 0], np.float32).reshape((1, -1))
30
+ else:
31
+ raise ValueError("Either point or box must be provided.")
32
+ inputs = {
33
+ "image_embeddings": image_embedding,
34
+ "point_coords": point_coords,
35
+ "point_labels": point_labels,
36
+ "mask_input": self.mask,
37
+ "has_mask_input": self.has_mask,
38
+ }
39
+ outputs = self.sess.run(None, inputs)
40
+ return outputs
python_ax/sam_encoder.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axengine
2
+ import cv2
3
+ import numpy as np
4
+
5
+ class SAMEncoder:
6
+ def __init__(self,model_path):
7
+ self.sess = axengine.InferenceSession(model_path)
8
+ for input in self.sess.get_inputs():
9
+ print(input.name, input.shape)
10
+ for output in self.sess.get_outputs():
11
+ print(output.name, output.shape)
12
+ self.input_shape = (1024, 1024)
13
+
14
+ def letterbox(self, image, target_size, color=(114, 114, 114)):
15
+ """
16
+ 将图像调整为目标大小,同时保持原始长宽比,并填充空白区域。
17
+
18
+ :param image: 输入图像 (H, W, C)
19
+ :param target_size: 目标尺寸 (width, height)
20
+ :param color: 填充颜色 (B, G, R)
21
+ :return: 调整后的图像,缩放比例,填充区域
22
+ """
23
+ original_height, original_width = image.shape[:2]
24
+ target_width, target_height = target_size
25
+
26
+ # 计算缩放比例
27
+ scale = min(target_width / original_width, target_height / original_height)
28
+ new_width = int(original_width * scale)
29
+ new_height = int(original_height * scale)
30
+
31
+ # 调整图像大小
32
+ resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
33
+
34
+ # 计算填充
35
+ pad_width = (target_width - new_width) // 2
36
+ pad_height = (target_height - new_height) // 2
37
+
38
+ # 填充图像
39
+ padded_image = cv2.copyMakeBorder(
40
+ resized_image,
41
+ 0 , target_height - new_height ,
42
+ 0, target_width - new_width ,
43
+ cv2.BORDER_CONSTANT,
44
+ value=color
45
+ )
46
+
47
+ return padded_image, scale, (pad_width, pad_height)
48
+
49
+ def preprocess(self,image):
50
+ padded_image, scale, (pad_width, pad_height) = self.letterbox(image, self.input_shape)
51
+
52
+ padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
53
+ padded_image = np.expand_dims(padded_image, axis=0)
54
+ return padded_image, scale
55
+
56
+ def encode(self,image):
57
+ padded_image, scale = self.preprocess(image)
58
+
59
+ # mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1,1,3))
60
+ # std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1,1,3))
61
+ # padded_image = (padded_image.astype(np.float32)/255 - mean)/std
62
+
63
+ # padded_image = np.transpose(padded_image, (2, 0, 1))
64
+ # padded_image = np.expand_dims(padded_image, axis=0)
65
+
66
+
67
+ return self.sess.run(None,{self.sess.get_inputs()[0].name:padded_image}), scale
python_onnx/__pycache__/sam_decoder.cpython-312.pyc ADDED
Binary file (2.99 kB). View file
 
python_onnx/__pycache__/sam_decoder.cpython-313.pyc ADDED
Binary file (3.07 kB). View file
 
python_onnx/__pycache__/sam_encoder.cpython-312.pyc ADDED
Binary file (3.56 kB). View file
 
python_onnx/__pycache__/sam_encoder.cpython-313.pyc ADDED
Binary file (3.49 kB). View file
 
python_onnx/main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sam_encoder import SAMEncoder
2
+ from sam_decoder import SAMDecoder
3
+ import cv2
4
+ import numpy as np
5
+ import argparse
6
+ import os
7
+
8
+ if __name__ == "__main__":
9
+
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("--img_path", "-i", type=str, default="../images/test.jpg", help="input image path")
12
+ parser.add_argument("--output_dir", "-o", type=str, default="./output", help="result path")
13
+ args = parser.parse_args()
14
+
15
+ encoder = SAMEncoder(f"../onnx/mobile_sam_encoder.onnx")
16
+ decoder = SAMDecoder(f"../onnx/mobile_sam_decoder_slim.onnx")
17
+
18
+ image = cv2.imread(args.img_path)
19
+ h, w, _ = image.shape
20
+ image_embedding, scale = encoder.encode(image)
21
+ print("Scale:", scale)
22
+
23
+ point0 = (910, 641)
24
+ point1 = (1488, 607)
25
+ point2 = (579, 704)
26
+
27
+ os.makedirs(args.output_dir, exist_ok=True)
28
+ for i, point in enumerate([point0, point1, point2]):
29
+ image_draw = image.copy()
30
+
31
+ output = decoder.decode(image_embedding[0], point = point,scale = scale)
32
+ idx = output[0].argmax()
33
+
34
+ image_draw = cv2.circle(image_draw, (int(point[0]), int(point[1])), 10, (0,255,0), -1)
35
+ mask = output[1][:,idx,:,:][0]
36
+ mask_mat = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
37
+ mask_mat[mask>0] = 255
38
+ mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)),interpolation=cv2.INTER_LINEAR)
39
+ mask_mat = mask_mat[:h, :w]
40
+ cv2.imwrite(f"{args.output_dir}/point_mask_point_{i}.jpg", mask_mat)
41
+ mask_ovlap = np.zeros((mask_mat.shape[0], mask_mat.shape[1], 3), dtype=np.uint8)
42
+ mask_ovlap[mask_mat>0] = [0, 255, 0]
43
+ image_ovlap = cv2.addWeighted(image_draw, 1, mask_ovlap, 0.5, 0)
44
+ cv2.imwrite(f"{args.output_dir}/point_mask_ovlap_point_{i}.jpg", image_ovlap)
45
+
46
+
47
+ box0 = (910 - 160, 641 - 430, 380, 940)
48
+ box1 = (479, 482, 191, 518)
49
+ box2 = (1345, 333, 289, 701)
50
+ box3 = (1, 357, 311, 751)
51
+
52
+ for i, box in enumerate([box0, box1, box2, box3]):
53
+ image_draw = image.copy()
54
+ output = decoder.decode(image_embedding[0], box = box,scale = scale)
55
+ idx = output[0].argmax()
56
+
57
+ image_draw = cv2.rectangle(image_draw, (int(box[0]), int(box[1])), (int(box[0]+box[2]), int(box[1]+box[3])), (0,255,0), 2)
58
+ # cv2.imwrite(f"{args.output_dir}/box_image_{i}.jpg", image)
59
+
60
+ mask = output[1][:,idx,:,:][0]
61
+ mask_mat = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
62
+ mask_mat[mask>0] = 255
63
+ mask_mat = cv2.resize(mask_mat, (max(w, h),max(w, h)),interpolation=cv2.INTER_LINEAR)
64
+ mask_mat = mask_mat[:h, :w]
65
+ cv2.imwrite(f"{args.output_dir}/box_mask_box_{i}.jpg", mask_mat)
66
+ mask_ovlap = np.zeros((mask_mat.shape[0], mask_mat.shape[1], 3), dtype=np.uint8)
67
+ mask_ovlap[mask_mat>0] = [0, 255, 0]
68
+ image_ovlap = cv2.addWeighted(image_draw, 1, mask_ovlap, 0.5, 0)
69
+ cv2.imwrite(f"{args.output_dir}/box_mask_ovlap_box_{i}.jpg", image_ovlap)
70
+
python_onnx/sam_decoder.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime
2
+ import cv2
3
+ import numpy as np
4
+
5
+
6
+ class SAMDecoder:
7
+
8
+ def __init__(self, model_path):
9
+ self.sess = onnxruntime.InferenceSession(model_path)
10
+
11
+
12
+ self.mask = np.zeros((1, 1, 256, 256), np.float32)
13
+ self.has_mask = np.array([0], np.float32)
14
+
15
+ def decode(self, image_embedding, point = None, box = None, scale = None):
16
+ if point is not None:
17
+ point = np.array(point).astype(np.float32) * scale
18
+ point_coords = np.array([point, (0,0), (0,0), (0,0), (0,0)]).astype(np.float32).reshape((1, -1, 2))
19
+ point_labels = np.array([1, 0, 0, 0, 0], np.float32).reshape((1, -1))
20
+ elif box is not None:
21
+ box = np.array(box).astype(np.float32)*scale
22
+ x, y, w, h = box
23
+ center = np.array([x + w/2, y + h/2], np.float32)
24
+ topleft = np.array([x, y], np.float32)
25
+ bottomright = np.array([x + w, y + h], np.float32)
26
+ point_coords = np.array([center, topleft, bottomright, (0,0), (0,0)]).astype(np.float32).reshape((1, -1, 2))
27
+ point_labels = np.array([1, 2, 3, 0, 0], np.float32).reshape((1, -1))
28
+ else:
29
+ raise ValueError("Either point or box must be provided.")
30
+ inputs = {
31
+ "image_embeddings": image_embedding,
32
+ "point_coords": point_coords,
33
+ "point_labels": point_labels,
34
+ "mask_input": self.mask,
35
+ "has_mask_input": self.has_mask,
36
+ }
37
+ outputs = self.sess.run(None, inputs)
38
+ return outputs
python_onnx/sam_encoder.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime
2
+ import cv2
3
+ import numpy as np
4
+
5
+ class SAMEncoder:
6
+ def __init__(self,model_path):
7
+ self.sess = onnxruntime.InferenceSession(model_path)
8
+ self.input_shape = (1024, 1024)
9
+
10
+ def letterbox(self, image, target_size, color=(114, 114, 114)):
11
+ """
12
+ 将图像调整为目标大小,同时保持原始长宽比,并填充空白区域。
13
+
14
+ :param image: 输入图像 (H, W, C)
15
+ :param target_size: 目标尺寸 (width, height)
16
+ :param color: 填充颜色 (B, G, R)
17
+ :return: 调整后的图像,缩放比例,填充区域
18
+ """
19
+ original_height, original_width = image.shape[:2]
20
+ target_width, target_height = target_size
21
+
22
+ # 计算缩放比例
23
+ scale = min(target_width / original_width, target_height / original_height)
24
+ new_width = int(original_width * scale)
25
+ new_height = int(original_height * scale)
26
+
27
+ # 调整图像大小
28
+ resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
29
+
30
+ # 计算填充
31
+ pad_width = (target_width - new_width) // 2
32
+ pad_height = (target_height - new_height) // 2
33
+
34
+ # 填充图像
35
+ padded_image = cv2.copyMakeBorder(
36
+ resized_image,
37
+ 0 , target_height - new_height ,
38
+ 0, target_width - new_width ,
39
+ cv2.BORDER_CONSTANT,
40
+ value=color
41
+ )
42
+
43
+ return padded_image, scale, (pad_width, pad_height)
44
+
45
+ def preprocess(self,image):
46
+ padded_image, scale, (pad_width, pad_height) = self.letterbox(image, self.input_shape)
47
+
48
+ padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
49
+ return padded_image, scale
50
+
51
+ def encode(self,image):
52
+ padded_image, scale = self.preprocess(image)
53
+
54
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1,1,3))
55
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1,1,3))
56
+ padded_image = (padded_image.astype(np.float32)/255 - mean)/std
57
+
58
+ padded_image = np.transpose(padded_image, (2, 0, 1))
59
+ padded_image = np.expand_dims(padded_image, axis=0)
60
+
61
+
62
+ return self.sess.run(None,{self.sess.get_inputs()[0].name:padded_image}), scale
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ numpy
2
+ opencv-python