LosReturn commited on
Commit
4185256
·
verified ·
1 Parent(s): 624bf12

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. LICENSE +28 -0
  3. README.md +18 -0
  4. assets/bus.jpg +3 -0
  5. config/config.json +60 -0
  6. data/images.tar.gz +3 -0
  7. src/inference.py +112 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/bus.jpg filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2026, AXERA
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deformable-Detr.axera
2
+ Deformable-Detr DEMO on Axera NPU.
3
+
4
+ ### 1. 工程下载
5
+ ```
6
+ git clone https://github.com/AXERA-TECH/deformable-detr.axera.git
7
+ ```
8
+
9
+ ### 2. 模型转换
10
+ ```
11
+ pulsar2 build --config ./config/config.json
12
+ ```
13
+ ### 3. 板端运行
14
+ ```
15
+ python inference.py --model detr.axmodel --img ./assets/bus.jpg --output out.jpg --thresh 0.6
16
+ ```
17
+ ### 4. 结果展示
18
+ ![result](./output/out.jpg)
assets/bus.jpg ADDED

Git LFS Details

  • SHA256: 33b198a1d2839bb9ac4c65d61f9e852196793cae9a0781360859425f6022b69c
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
config/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input": "./onnx_model/deformable_detr_ax.onnx",
3
+ "output_dir": "./output",
4
+ "output_name": "detr.axmodel",
5
+ "target_hardware": "AX650",
6
+ "model_type": "ONNX",
7
+ "npuMode": "NPU3",
8
+ "onnx_opt": {
9
+ "disable_onnx_optimization": false,
10
+ "enable_onnxsim": false,
11
+ "model_check": false,
12
+ "disable_transformation_check": false
13
+ },
14
+ "quant": {
15
+ "precision_analysis": true,
16
+ "calibration_method": "MinMax",
17
+ "precision_analysis_method": "EndToEnd",
18
+ "precision_analysis_mode": "NPUBackend",
19
+ "input_configs": [
20
+ {
21
+ "tensor_name": "DEFAULT",
22
+ "calibration_dataset": "./data/images.tar.gz",
23
+ "calibration_mean": [123.675, 116.28, 103.53],
24
+ "calibration_std": [58.395, 57.12, 57.375]
25
+ }
26
+ ],
27
+ "layer_configs": [
28
+ {
29
+ "start_tensor_names": [
30
+ "DEFAULT"
31
+ ],
32
+ "end_tensor_names": [
33
+ "DEFAULT"
34
+ ],
35
+ "data_type": "U16"
36
+ }
37
+ ],
38
+ "conv_bias_data_type": "S32",
39
+ "transformer_opt_level": 0,
40
+ "enable_smooth_quant": false
41
+ },
42
+ "input_processors": [
43
+ {
44
+ "tensor_name": "input",
45
+ "src_format": "RGB",
46
+ "src_dtype": "FP32",
47
+ "src_layout": "NHWC",
48
+ "tensor_format": "RGB"
49
+ }
50
+ ],
51
+ "output_processors": [
52
+ {
53
+ "tensor_name": "DEFAULT",
54
+ "tensor_layout": "NCHW"
55
+ }
56
+ ],
57
+ "compiler": {
58
+ "check": 0
59
+ }
60
+ }
data/images.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad908e27b753be3daabc7db0cbeab657887b4a9b41d3232d87f4f98e4e4a3eb
3
+ size 1094082
src/inference.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import numpy as np
3
+ import sys
4
+ import os
5
+
6
+ try:
7
+ import axengine as ort
8
+ print("Running on AXera NPU (axengine)...")
9
+ except ImportError:
10
+ import onnxruntime as ort
11
+ print("Running on CPU/GPU (onnxruntime)...")
12
+
13
+ from PIL import Image, ImageDraw, ImageFont
14
+
15
+ NORMALIZATION_ENABLED = False
16
+ MEAN = np.array([123.675, 116.28, 103.53], dtype=np.float32)
17
+ STD = np.array([58.395, 57.12, 57.375], dtype=np.float32)
18
+
19
+ CLASSES = [
20
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic_light",
21
+ "fire_hydrant", "stop_sign", "parking_meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
22
+ "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
23
+ "skis", "snowboard", "sports_ball", "kite", "baseball_bat", "baseball_glove", "skateboard", "surfboard",
24
+ "tennis_racket", "bottle", "wine_glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
25
+ "sandwich", "orange", "broccoli", "carrot", "hot_dog", "pizza", "donut", "cake", "chair", "couch",
26
+ "potted_plant", "bed", "dining_table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
27
+ "cell_phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
28
+ "scissors", "teddy_bear", "hair_drier", "toothbrush"
29
+ ]
30
+
31
+ def preprocess_normalized(image_path, input_h, input_w, layout="NCHW"):
32
+ raw_image = Image.open(image_path).convert("RGB")
33
+ img_w, img_h = raw_image.size
34
+
35
+ scale = min(input_w / img_w, input_h / img_h)
36
+ new_w, new_h = int(img_w * scale), int(img_h * scale)
37
+ resized_image = raw_image.resize((new_w, new_h), Image.BILINEAR)
38
+
39
+ canvas = Image.new("RGB", (input_w, input_h), (0, 0, 0))
40
+ canvas.paste(resized_image, (0, 0))
41
+ image_data = np.array(canvas, dtype=np.float32)
42
+
43
+ if NORMALIZATION_ENABLED:
44
+ image_data = (image_data - MEAN) / STD
45
+
46
+ if layout == "NCHW":
47
+ image_data = image_data.transpose(2, 0, 1)
48
+
49
+ image_data = np.expand_dims(image_data, 0)
50
+ return image_data, raw_image, {"original_size": (img_w, img_h), "scale": scale}
51
+
52
+ def main():
53
+ parser = argparse.ArgumentParser()
54
+ parser.add_argument("--model", type=str, required=True)
55
+ parser.add_argument("--img", type=str, required=True)
56
+ parser.add_argument("--output", type=str, default="result.jpg")
57
+ parser.add_argument("--thresh", type=float, default=0.3)
58
+ opt = parser.parse_args()
59
+
60
+ session = ort.InferenceSession(opt.model)
61
+ input_meta = session.get_inputs()[0]
62
+
63
+ if input_meta.shape[1] == 3:
64
+ layout, h, w = "NCHW", input_meta.shape[2], input_meta.shape[3]
65
+ else:
66
+ layout, h, w = "NHWC", input_meta.shape[1], input_meta.shape[2]
67
+
68
+ img_tensor, raw_img, meta = preprocess_normalized(opt.img, h, w, layout)
69
+ outputs = session.run(None, {input_meta.name: img_tensor})
70
+
71
+ dets = outputs[0][0]
72
+ labels = outputs[1][0]
73
+ scores = dets[:, 4]
74
+ keep = scores >= opt.thresh
75
+
76
+ v_dets = dets[keep]
77
+ v_labels = labels[keep]
78
+
79
+ orig_w, orig_h = meta["original_size"]
80
+ scale = meta["scale"]
81
+
82
+ print(f"Detected {len(v_dets)} objects.")
83
+
84
+ if len(v_dets) > 0:
85
+ draw = ImageDraw.Draw(raw_img)
86
+ try:
87
+ font = ImageFont.truetype("DejaVuSans.ttf", 18)
88
+ except:
89
+ font = ImageFont.load_default()
90
+
91
+ for i in range(len(v_dets)):
92
+ box = v_dets[i, :4] / scale
93
+ score = v_dets[i, 4]
94
+ label_id = int(v_labels[i])
95
+
96
+ x1, y1, x2, y2 = box
97
+ x1, x2 = np.clip([x1, x2], 0, orig_w)
98
+ y1, y2 = np.clip([y1, y2], 0, orig_h)
99
+
100
+ draw.rectangle([x1, y1, x2, y2], outline="lime", width=3)
101
+
102
+ name = CLASSES[label_id] if label_id < len(CLASSES) else f"obj_{label_id}"
103
+ text = f"{name} {score:.2f}"
104
+
105
+ draw.rectangle([x1, y1-20, x1+100, y1], fill="lime")
106
+ draw.text((x1+2, y1-20), text, fill="black", font=font)
107
+
108
+ raw_img.save(opt.output)
109
+ print(f"Result saved to {opt.output}")
110
+
111
+ if __name__ == "__main__":
112
+ main()