QRCode_det / python /QRCode_axmodel_infer_Nanodet.py
wzf19947's picture
增加nanodet、更新各平台模型
45b2cae
import os
import glob
import time
import cv2
import numpy as np
import pyzbar.pyzbar as pyzbar
import axengine as axe
import math
names = ["QRCode"]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def model_load(model):
session = axe.InferenceSession(model)
input_name = session.get_inputs()[0].name
output_names = [ x.name for x in session.get_outputs()]
return session, output_names
def data_process_cv2(frame, input_shape):
im0 = cv2.imread(frame)
img = cv2.resize(im0, input_shape, interpolation=cv2.INTER_AREA)
org_data = img.copy()
img = np.ascontiguousarray(img)
img = np.expand_dims(img, 0)
return img, im0, org_data
def multiclass_nms(
multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None
):
num_classes = multi_scores.shape[1] - 1 # exclude background
# Reshape bboxes
if multi_bboxes.shape[1] > 4:
# (N, 4*C) -> (N, C, 4)
bboxes = multi_bboxes.reshape(multi_scores.shape[0], -1, 4)
else:
# (N, 4) -> (N, 1, 4) -> (N, C, 4) via repeat
bboxes = np.tile(multi_bboxes[:, None, :], (1, num_classes, 1))
scores = multi_scores[:, :-1].copy() # (N, C)
# Apply score factors if provided
if score_factors is not None:
scores = scores * score_factors[:, None]
# Filter by score threshold
valid_mask = scores > score_thr # (N, C)
# Get indices where valid
valid_indices = np.where(valid_mask)
if len(valid_indices[0]) == 0:
# No valid boxes
return np.zeros((0, 5), dtype=np.float32), np.zeros((0,), dtype=np.int64)
# Extract valid bboxes, scores, labels
bbox_indices, class_indices = valid_indices
bboxes_valid = bboxes[bbox_indices, class_indices] # (K, 4)
scores_valid = scores[valid_indices] # (K,)
labels_valid = class_indices.astype(np.int64) # (K,)
# Concatenate bboxes and scores for NMS input: (K, 5)
dets_input = np.concatenate([bboxes_valid, scores_valid[:, None]], axis=1) # (K, 5)
# Perform NMS (you need a NumPy NMS implementation)
keep = nms_numpy(dets_input, iou_threshold=nms_cfg.get('iou_threshold', 0.5))
dets = dets_input[keep]
labels = labels_valid[keep]
if max_num > 0 and len(keep) > max_num:
dets = dets[:max_num]
labels = labels[:max_num]
return dets, labels
def nms_numpy(dets, iou_threshold=0.5):
if dets.size == 0:
return []
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1] # descending order
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(iou <= iou_threshold)[0]
order = order[inds + 1]
return keep
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
nms_cfg_ = nms_cfg.copy()
class_agnostic = nms_cfg_.pop("class_agnostic", class_agnostic)
if class_agnostic:
boxes_for_nms = boxes
else:
max_coordinate = boxes.max()
# offsets = idxs * (max_coordinate + 1)
offsets = idxs.astype(boxes.dtype) * (max_coordinate + 1)
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop("type", "nms") # unused in numpy version
split_thr = nms_cfg_.pop("split_thr", 10000)
if len(boxes_for_nms) < split_thr:
# Call your NumPy NMS function (e.g., nms_numpy)
keep = nms_numpy(boxes_for_nms, scores, **nms_cfg_)
keep = np.array(keep, dtype=np.int64)
boxes = boxes[keep]
scores = scores[keep]
else:
# Large case: process per class/group
total_mask = np.zeros(scores.shape, dtype=bool)
unique_ids = np.unique(idxs)
for id_val in unique_ids:
mask = (idxs == id_val)
mask_indices = np.where(mask)[0] # indices where condition is True
if len(mask_indices) == 0:
continue
keep_in_group = nms_numpy(
boxes_for_nms[mask_indices],
scores[mask_indices],
**nms_cfg_
)
keep_in_group = np.array(keep_in_group, dtype=np.int64)
selected_global_indices = mask_indices[keep_in_group]
total_mask[selected_global_indices] = True
keep = np.where(total_mask)[0]
# Sort by scores descending
sorted_indices = np.argsort(-scores[keep]) # negative for descending
keep = keep[sorted_indices]
boxes = boxes[keep]
scores = scores[keep]
# Concatenate boxes and scores -> (K, 5)
dets = np.concatenate([boxes, scores[:, None]], axis=-1)
return dets, keep
def scale_boxes_no_letter(img1_shape, boxes, img0_shape):
gain = (img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
boxes[..., [0, 2]] /= gain[1]
boxes[..., [1, 3]] /= gain[0]
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])
_COLORS = (
np.array(
[
0.000,
0.447,
0.741,
0.850,
0.325,
0.098,
0.929,
0.694,
0.125,
0.494,
0.184,
0.556,
0.466,
0.674,
0.188,
0.301,
0.745,
0.933,
0.635,
0.078,
0.184,
0.300,
0.300,
0.300,
0.600,
0.600,
0.600,
1.000,
0.000,
0.000,
1.000,
0.500,
0.000,
0.749,
0.749,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
1.000,
0.667,
0.000,
1.000,
0.333,
0.333,
0.000,
0.333,
0.667,
0.000,
0.333,
1.000,
0.000,
0.667,
0.333,
0.000,
0.667,
0.667,
0.000,
0.667,
1.000,
0.000,
1.000,
0.333,
0.000,
1.000,
0.667,
0.000,
1.000,
1.000,
0.000,
0.000,
0.333,
0.500,
0.000,
0.667,
0.500,
0.000,
1.000,
0.500,
0.333,
0.000,
0.500,
0.333,
0.333,
0.500,
0.333,
0.667,
0.500,
0.333,
1.000,
0.500,
0.667,
0.000,
0.500,
0.667,
0.333,
0.500,
0.667,
0.667,
0.500,
0.667,
1.000,
0.500,
1.000,
0.000,
0.500,
1.000,
0.333,
0.500,
1.000,
0.667,
0.500,
1.000,
1.000,
0.500,
0.000,
0.333,
1.000,
0.000,
0.667,
1.000,
0.000,
1.000,
1.000,
0.333,
0.000,
1.000,
0.333,
0.333,
1.000,
0.333,
0.667,
1.000,
0.333,
1.000,
1.000,
0.667,
0.000,
1.000,
0.667,
0.333,
1.000,
0.667,
0.667,
1.000,
0.667,
1.000,
1.000,
1.000,
0.000,
1.000,
1.000,
0.333,
1.000,
1.000,
0.667,
1.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.143,
0.143,
0.143,
0.286,
0.286,
0.286,
0.429,
0.429,
0.429,
0.571,
0.571,
0.571,
0.714,
0.714,
0.714,
0.857,
0.857,
0.857,
0.000,
0.447,
0.741,
0.314,
0.717,
0.741,
0.50,
0.5,
0,
]
)
.astype(np.float32)
.reshape(-1, 3)
)
def distance2bbox(points, distance, max_shape=None):
x1 = points[..., 0] - distance[..., 0]
y1 = points[..., 1] - distance[..., 1]
x2 = points[..., 0] + distance[..., 2]
y2 = points[..., 1] + distance[..., 3]
if max_shape is not None:
x1 = np.clip(x1, a_min=0, a_max=max_shape[1])
y1 = np.clip(y1, a_min=0, a_max=max_shape[0])
x2 = np.clip(x2, a_min=0, a_max=max_shape[1])
y2 = np.clip(y2, a_min=0, a_max=max_shape[0])
return np.stack([x1, y1, x2, y2], axis=-1)
def integral_numpy(x, reg_max=16):
"""
NumPy equivalent of the Integral layer in NanoDet.
Computes: sum(softmax(logits) * [0, 1, ..., reg_max]) for each of the 4 directions.
Args:
x (np.ndarray): Input array of shape (..., 4 * (reg_max + 1))
reg_max (int): Maximum value of discrete set. Default: 16.
Returns:
np.ndarray: Integral result of shape (..., 4)
"""
# Save original leading shape (e.g., (N,) or (N, H, W))
leading_shape = x.shape[:-1] # everything except last dim
total_channels = x.shape[-1]
assert total_channels == 4 * (reg_max + 1), \
f"Last dimension must be 4*(reg_max+1)={4*(reg_max+1)}, but got {total_channels}"
# Reshape to (..., 4, reg_max + 1)
x = x.reshape(*leading_shape, 4, reg_max + 1)
# Apply softmax along the last axis (dim=-1)
# For numerical stability: subtract max
x_max = np.max(x, axis=-1, keepdims=True)
exp_x = np.exp(x - x_max)
softmax_x = exp_x / np.sum(exp_x, axis=-1, keepdims=True) # (..., 4, reg_max+1)
# Project vector: [0, 1, 2, ..., reg_max]
project = np.arange(reg_max + 1, dtype=x.dtype) # shape (reg_max+1,)
# Compute weighted sum: sum(softmax_x * project) over last dimension
# Broadcasting: (..., 4, reg_max+1) * (reg_max+1,) -> (..., 4, reg_max+1)
integral_result = np.sum(softmax_x * project, axis=-1) # (..., 4)
return integral_result
def overlay_bbox_cv(img, dets, class_names, score_thresh):
all_box = []
for label in dets:
for bbox in dets[label]:
score = bbox[-1]
if score > score_thresh:
x0, y0, x1, y1 = [int(i) for i in bbox[:4]]
all_box.append([label, x0, y0, x1, y1, score])
all_box.sort(key=lambda v: v[5])
# for box in all_box:
# label, x0, y0, x1, y1, score = box
# # color = self.cmap(i)[:3]
# color = (_COLORS[label] * 255).astype(np.uint8).tolist()
# text = "{}:{:.1f}%".format(class_names[label], score * 100)
# txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, 255)
# font = cv2.FONT_HERSHEY_SIMPLEX
# txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
# cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
# cv2.rectangle(
# img,
# (x0, y0 - txt_size[1] - 1),
# (x0 + txt_size[0] + txt_size[1], y0 - 1),
# color,
# -1,
# )
# cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
return img, all_box
class NanoDetONNXInfer:
def __init__(self, model_path, imgsz=[416, 416]):
self.model_path = model_path
self.session, self.output_names = model_load(self.model_path)
self.imgsz = imgsz
self.reg_max = 7
self.reg_max1= self.reg_max + 1
self.distribution_project = np.arange(self.reg_max + 1)
self.nc = len(names)
self.no = self.nc + self.reg_max1 * 4
self.stride = [8, 16, 32, 64]
def get_bboxes(self, cls_preds, reg_preds):
"""Decode the outputs to bboxes.
Args:
cls_preds (Tensor): Shape (num_imgs, num_points, num_classes).
reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)).
img_metas (dict): Dict of image info.
Returns:
results_list (list[tuple]): List of detection bboxes and labels.
"""
b = cls_preds.shape[0]
featmap_sizes = [
(math.ceil(self.imgsz[0] / stride), math.ceil(self.imgsz[1]) / stride)
for stride in self.stride
]
# get grid cells of one image
mlvl_center_priors = [
self.get_single_level_center_priors(
b,
featmap_sizes[i],
stride,
dtype=np.float32,
)
for i, stride in enumerate(self.stride)
]
center_priors = np.concatenate(mlvl_center_priors, axis=1)
integral_result = integral_numpy(reg_preds, reg_max=self.reg_max) # (N, 4)
scale = center_priors[..., 2][..., None] # shape (N, 1) or (N, H, W, 1)
dis_preds = integral_result * scale
bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=self.imgsz)
scores = 1.0 / (1.0 + np.exp(-cls_preds)) # sigmoid
result_list = []
for i in range(b):
# add a dummy background class at the end of all labels
# same with mmdetection2.0
score, bbox = scores[i], bboxes[i]
padding = np.zeros((score.shape[0], 1), dtype=score.dtype)
score = np.concatenate([score, padding], axis=1)
results = multiclass_nms(
bbox,
score,
score_thr=0.05,
nms_cfg=dict(type="nms", iou_threshold=0.6),
max_num=100,
)
result_list.append(results)
return result_list
def get_single_level_center_priors(self,batch_size, featmap_size, stride, dtype):
h, w = featmap_size
x_range = (np.arange(w, dtype=dtype)) * stride
y_range = (np.arange(h, dtype=dtype)) * stride
y, x = np.meshgrid(y_range, x_range, indexing='ij')
y = y.flatten()
x = x.flatten()
strides = np.full((x.shape[0],), stride, dtype=dtype)
priors = np.stack([x, y, strides, strides], axis=-1)
return np.tile(priors[None, :, :], (batch_size, 1, 1))
def detect_objects(self, image, save_path):
outputs=[]
im, im0, org_data = data_process_cv2(image, self.imgsz)
img_name = os.path.basename(image).split('.')[0]
infer_start_time = time.time()
x = self.session.run(None, {self.session.get_inputs()[0].name: im})
infer_end_time = time.time()
print(f"infer time: {infer_end_time - infer_start_time:.4f}s")
x = [np.transpose(x[i],(0,3,1,2)) for i in range(4)] #to nchw
for i in range(len(x)):
reg_pred = x[i][:, :self.reg_max1 * 4,:,:]
cls_pred = x[i][:, self.reg_max1 * 4:,:,:]
out = np.concatenate([cls_pred, reg_pred], axis=1)
outputs.append(out.reshape(out.shape[0], out.shape[1], -1))
preds = np.concatenate(outputs, axis=2).transpose(0, 2, 1)
cls_scores = preds[:, :, :self.nc]
bbox_preds = preds[:, :, self.nc:]
pred = self.get_bboxes(cls_scores, bbox_preds)[0]
res = self.post_process(pred, org_data, im0, save_path, img_name)
result_img, bbox_res = overlay_bbox_cv(im0, res, names, score_thresh=0.35)
return bbox_res, result_img
def post_process(self, result, im, im0, save_path, img_name):
det_result = {}
det_bboxes, det_labels = result
det_bboxes[:, :4] = scale_boxes_no_letter(im.shape[:2], det_bboxes[:, :4], im0.shape).round()
classes = det_labels
for i in range(self.nc):
inds = classes == i
det_result[i] = np.concatenate(
[
det_bboxes[inds, :4].astype(np.float32),
det_bboxes[inds, 4:5].astype(np.float32),
],
axis=1,
).tolist()
return det_result
class QRCodeDecoder:
def crop_qr_regions(self, image, regions):
"""
根据检测到的边界框裁剪二维码区域
"""
cropped_images = []
for idx, region in enumerate(regions):
label, x1, y1, x2, y2, score = region
# 外扩15个像素缓解因检测截断造成无法识别的情况,视检测情况而定
x1-=15
y1-=15
x2+=15
y2+=15
# 裁剪图像
cropped = image[y1:y2, x1:x2]
if cropped.size > 0:
cropped_images.append({
'image': cropped,
'bbox': region,
})
return cropped_images
def decode_qrcode_pyzbar(self, cropped_image):
"""
使用pyzbar解码二维码
"""
try:
# 转换为灰度图像
if len(cropped_image.shape) == 3:
gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
else:
gray = cropped_image
# 使用pyzbar解码
decoded_objects = pyzbar.decode(gray)
results = []
for obj in decoded_objects:
try:
data = obj.data.decode('utf-8')
results.append({
'data': data,
'type': obj.type,
'points': obj.polygon
})
except:
continue
return results
except Exception as e:
print(f"decode error: {e}")
return []
if __name__ == '__main__':
import time
detector = NanoDetONNXInfer(model_path='./nanodet-plus-m_416_QR.axmodel',imgsz=[416,416])
decoder = QRCodeDecoder()
img_path = './qrcode_test'
det_path='./det_res'
crop_path='./crop_res'
os.makedirs(det_path, exist_ok=True)
os.makedirs(crop_path, exist_ok=True)
imgs = glob.glob(f"{img_path}/*.jpg")
totoal = len(imgs)
success = 0
fail = 0
start_time = time.time()
for idx,img in enumerate(imgs):
pic_name=os.path.basename(img).split('.')[0]
loop_start_time = time.time()
det_result, res_img = detector.detect_objects(img,det_path)
# cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img)
# print('det_result:',det_result)
# Crop deteted QRCode & decode QRCode by pyzbar
cropped_images = decoder.crop_qr_regions(res_img, det_result)
# for i,cropped in enumerate(cropped_images):
# cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image'])
all_decoded_results = []
for i, cropped_data in enumerate(cropped_images):
decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image'])
all_decoded_results.extend(decoded_results)
# for result in decoded_results:
# print(f"decode result: {result['data']} (type: {result['type']})")
if all_decoded_results:
success += 1
print(f"{pic_name} 识别成功!")
else:
fail += 1
print(f"{pic_name} 识别失败!")
loop_end_time = time.time()
print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒")
end_time = time.time() # 记录总结束时间
total_time = end_time - start_time # 记录总耗时
print(f"总共测试图片数量: {totoal}")
print(f"识别成功数量: {success}")
print(f"识别失败数量: {fail}")
print(f"识别成功率: {success/totoal*100:.2f}%")
print(f"整体处理耗时: {total_time:.4f} 秒")
print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒")