lwanming commited on
Commit
dc04921
·
verified ·
1 Parent(s): 325ddf3

Upload 3 files

Browse files
optimize_sam_decoder.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnx
2
+ from onnx import helper, TensorProto
3
+ import numpy as np
4
+
5
+ # Optimize the SAM decoder model to accept dynamic original image size input
6
+ # to enable constant folding with freeDimensionOverride option in onnxruntime.
7
+ def optimize_sa_model(model_path: str, out_path: str, is_fp16: bool = False):
8
+ model = onnx.load(model_path)
9
+
10
+ graph = model.graph
11
+
12
+ old_input_name="orig_im_size"
13
+ new_input_name="orig_im_size_shape"
14
+
15
+ # 1) Find old input
16
+ old_inputs = {vi.name: vi for vi in graph.input}
17
+ assert old_input_name in old_inputs, f"Input {old_input_name} not found"
18
+ old_vi = old_inputs[old_input_name]
19
+
20
+ # 2) Remove old input and add new input
21
+ graph.input.remove(old_vi)
22
+ # new input with shape: [height, width]
23
+ new_input_vi = helper.make_tensor_value_info(new_input_name, TensorProto.FLOAT, ["height", "width"])
24
+ graph.input.extend([new_input_vi])
25
+
26
+ # Check if new_input_name exists in the graph inputs
27
+ if new_input_name not in [input.name for input in graph.input]:
28
+ raise ValueError(f"Input '{new_input_name}' does not exist in the graph inputs.")
29
+
30
+ # 3) Insert Shape node: Shape(X) -> shape_X(INT64 1D tensor [H, W])
31
+ shape_output_name = old_input_name # keep the same name as old input
32
+ shape_node = helper.make_node(
33
+ "Shape",
34
+ inputs=[new_input_name],
35
+ outputs=[shape_output_name],
36
+ name="shape_of_orig_im_size_shape"
37
+ )
38
+ # Insert the Shape node
39
+ graph.node.insert(0, shape_node)
40
+
41
+ # 4) The origin input dtype is not INT64, need to add Cast node
42
+ # But the original model has already a Cast node after the input, ignore it
43
+
44
+ if is_fp16:
45
+ # For fp16 model, since CPU kernel doesn't support constant folding
46
+ # for fp16 data type, we need to convert some fp16 constants and input/output info to fp32
47
+ fp16_constants = ["/Constant_85", "/Constant_86"]
48
+
49
+ # Convert fp16 constants in fp16_constants to fp32
50
+ for node in graph.node:
51
+ if node.op_type == "Constant" and node.name in fp16_constants:
52
+ print(node.name)
53
+ # Locate the "value" attribute of the Constant node
54
+ for attr in node.attribute:
55
+ if attr.name == "value":
56
+ # Extract the tensor value
57
+ tensor = onnx.numpy_helper.to_array(attr.t)
58
+
59
+ # Convert the tensor to the target data type
60
+ new_tensor = tensor.astype(np.float32)
61
+
62
+ # Create a new ONNX tensor with the updated data type
63
+ attr.t.CopyFrom(onnx.numpy_helper.from_array(new_tensor))
64
+ break
65
+ else:
66
+ raise ValueError(f"Constant node '{node.name}' does not have a 'value' attribute.")
67
+
68
+ fp16_nodes = ["/ReduceMax", "/Reciprocal", "/Mul_19", "/Mul_20", "/Add_11", "/Floor"]
69
+ # Change fp16 nodes in fp16_nodes to fp32
70
+ for node in graph.node:
71
+ if node.name in fp16_nodes:
72
+ print(f"Processing node: {node.name}")
73
+ for input_name in node.input:
74
+ for value_info in graph.value_info:
75
+ if value_info.name == input_name:
76
+ value_info.type.tensor_type.elem_type = TensorProto.FLOAT
77
+ print(f" - Change input: {input_name} to fp32")
78
+
79
+ for output_name in node.output:
80
+ for value_info in graph.value_info:
81
+ if value_info.name == output_name:
82
+ value_info.type.tensor_type.elem_type = TensorProto.FLOAT
83
+ print(f" - Change output: {output_name} to fp32")
84
+
85
+ # Change /Cast_9 to fp32
86
+ for node in graph.node:
87
+ if node.name == "/Cast_9":
88
+ node.attribute[0].i = TensorProto.FLOAT
89
+ print(f"Changed /Cast_9 to fp32")
90
+ break
91
+ onnx.checker.check_model(model)
92
+ onnx.save(model, out_path)
93
+ print(f"Saved to {out_path}")
94
+
95
+ # the original int8 decoder model: https://huggingface.co/schmuell/sam-b-fp16/blob/main/sam_vit_b-decoder-int8.onnx
96
+ # optimize_sa_model("sam_vit_b-decoder-int8.onnx", "sam_vit_b-decoder-int8-orig-img-size-dynamic.onnx", False)
97
+ # the original fp32 decoder model: https://huggingface.co/schmuell/sam-b-fp16/blob/main/sam_vit_b_01ec64.decoder.onnx
98
+ optimize_sa_model("sam_vit_b_01ec64.decoder-fp16.onnx", "sam_vit_b_01ec64.decoder-orig-img-size-dynamic-fp16.onnx", True)
sam_vit_b-decoder-orig-img-size-dynamic-int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8fa33f888797c28ffe856fd451de4ee65bdb6dcd76b5d2543c3ee7a9572ad3
3
+ size 4741477
sam_vit_b_01ec64.decoder-orig-img-size-dynamic-fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e57fec1224053507918232b6d687724f633c6a447072b71bea6d7c9168fc5e
3
+ size 8437283