import os import glob import time import cv2 import numpy as np import pyzbar.pyzbar as pyzbar import axengine as axe import math names = ["QRCode"] def sigmoid(x): return 1 / (1 + np.exp(-x)) def model_load(model): session = axe.InferenceSession(model) input_name = session.get_inputs()[0].name output_names = [ x.name for x in session.get_outputs()] return session, output_names def data_process_cv2(frame, input_shape): im0 = cv2.imread(frame) img = cv2.resize(im0, input_shape, interpolation=cv2.INTER_AREA) org_data = img.copy() img = np.ascontiguousarray(img) img = np.expand_dims(img, 0) return img, im0, org_data def multiclass_nms( multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None ): num_classes = multi_scores.shape[1] - 1 # exclude background # Reshape bboxes if multi_bboxes.shape[1] > 4: # (N, 4*C) -> (N, C, 4) bboxes = multi_bboxes.reshape(multi_scores.shape[0], -1, 4) else: # (N, 4) -> (N, 1, 4) -> (N, C, 4) via repeat bboxes = np.tile(multi_bboxes[:, None, :], (1, num_classes, 1)) scores = multi_scores[:, :-1].copy() # (N, C) # Apply score factors if provided if score_factors is not None: scores = scores * score_factors[:, None] # Filter by score threshold valid_mask = scores > score_thr # (N, C) # Get indices where valid valid_indices = np.where(valid_mask) if len(valid_indices[0]) == 0: # No valid boxes return np.zeros((0, 5), dtype=np.float32), np.zeros((0,), dtype=np.int64) # Extract valid bboxes, scores, labels bbox_indices, class_indices = valid_indices bboxes_valid = bboxes[bbox_indices, class_indices] # (K, 4) scores_valid = scores[valid_indices] # (K,) labels_valid = class_indices.astype(np.int64) # (K,) # Concatenate bboxes and scores for NMS input: (K, 5) dets_input = np.concatenate([bboxes_valid, scores_valid[:, None]], axis=1) # (K, 5) # Perform NMS (you need a NumPy NMS implementation) keep = nms_numpy(dets_input, iou_threshold=nms_cfg.get('iou_threshold', 0.5)) dets = dets_input[keep] labels = labels_valid[keep] if max_num > 0 and len(keep) > max_num: dets = dets[:max_num] labels = labels[:max_num] return dets, labels def nms_numpy(dets, iou_threshold=0.5): if dets.size == 0: return [] x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] # descending order keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h iou = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(iou <= iou_threshold)[0] order = order[inds + 1] return keep def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): nms_cfg_ = nms_cfg.copy() class_agnostic = nms_cfg_.pop("class_agnostic", class_agnostic) if class_agnostic: boxes_for_nms = boxes else: max_coordinate = boxes.max() # offsets = idxs * (max_coordinate + 1) offsets = idxs.astype(boxes.dtype) * (max_coordinate + 1) boxes_for_nms = boxes + offsets[:, None] nms_type = nms_cfg_.pop("type", "nms") # unused in numpy version split_thr = nms_cfg_.pop("split_thr", 10000) if len(boxes_for_nms) < split_thr: # Call your NumPy NMS function (e.g., nms_numpy) keep = nms_numpy(boxes_for_nms, scores, **nms_cfg_) keep = np.array(keep, dtype=np.int64) boxes = boxes[keep] scores = scores[keep] else: # Large case: process per class/group total_mask = np.zeros(scores.shape, dtype=bool) unique_ids = np.unique(idxs) for id_val in unique_ids: mask = (idxs == id_val) mask_indices = np.where(mask)[0] # indices where condition is True if len(mask_indices) == 0: continue keep_in_group = nms_numpy( boxes_for_nms[mask_indices], scores[mask_indices], **nms_cfg_ ) keep_in_group = np.array(keep_in_group, dtype=np.int64) selected_global_indices = mask_indices[keep_in_group] total_mask[selected_global_indices] = True keep = np.where(total_mask)[0] # Sort by scores descending sorted_indices = np.argsort(-scores[keep]) # negative for descending keep = keep[sorted_indices] boxes = boxes[keep] scores = scores[keep] # Concatenate boxes and scores -> (K, 5) dets = np.concatenate([boxes, scores[:, None]], axis=-1) return dets, keep def scale_boxes_no_letter(img1_shape, boxes, img0_shape): gain = (img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) boxes[..., [0, 2]] /= gain[1] boxes[..., [1, 3]] /= gain[0] clip_boxes(boxes, img0_shape) return boxes def clip_boxes(boxes, shape): boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) _COLORS = ( np.array( [ 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, 0.714, 0.857, 0.857, 0.857, 0.000, 0.447, 0.741, 0.314, 0.717, 0.741, 0.50, 0.5, 0, ] ) .astype(np.float32) .reshape(-1, 3) ) def distance2bbox(points, distance, max_shape=None): x1 = points[..., 0] - distance[..., 0] y1 = points[..., 1] - distance[..., 1] x2 = points[..., 0] + distance[..., 2] y2 = points[..., 1] + distance[..., 3] if max_shape is not None: x1 = np.clip(x1, a_min=0, a_max=max_shape[1]) y1 = np.clip(y1, a_min=0, a_max=max_shape[0]) x2 = np.clip(x2, a_min=0, a_max=max_shape[1]) y2 = np.clip(y2, a_min=0, a_max=max_shape[0]) return np.stack([x1, y1, x2, y2], axis=-1) def integral_numpy(x, reg_max=16): """ NumPy equivalent of the Integral layer in NanoDet. Computes: sum(softmax(logits) * [0, 1, ..., reg_max]) for each of the 4 directions. Args: x (np.ndarray): Input array of shape (..., 4 * (reg_max + 1)) reg_max (int): Maximum value of discrete set. Default: 16. Returns: np.ndarray: Integral result of shape (..., 4) """ # Save original leading shape (e.g., (N,) or (N, H, W)) leading_shape = x.shape[:-1] # everything except last dim total_channels = x.shape[-1] assert total_channels == 4 * (reg_max + 1), \ f"Last dimension must be 4*(reg_max+1)={4*(reg_max+1)}, but got {total_channels}" # Reshape to (..., 4, reg_max + 1) x = x.reshape(*leading_shape, 4, reg_max + 1) # Apply softmax along the last axis (dim=-1) # For numerical stability: subtract max x_max = np.max(x, axis=-1, keepdims=True) exp_x = np.exp(x - x_max) softmax_x = exp_x / np.sum(exp_x, axis=-1, keepdims=True) # (..., 4, reg_max+1) # Project vector: [0, 1, 2, ..., reg_max] project = np.arange(reg_max + 1, dtype=x.dtype) # shape (reg_max+1,) # Compute weighted sum: sum(softmax_x * project) over last dimension # Broadcasting: (..., 4, reg_max+1) * (reg_max+1,) -> (..., 4, reg_max+1) integral_result = np.sum(softmax_x * project, axis=-1) # (..., 4) return integral_result def overlay_bbox_cv(img, dets, class_names, score_thresh): all_box = [] for label in dets: for bbox in dets[label]: score = bbox[-1] if score > score_thresh: x0, y0, x1, y1 = [int(i) for i in bbox[:4]] all_box.append([label, x0, y0, x1, y1, score]) all_box.sort(key=lambda v: v[5]) # for box in all_box: # label, x0, y0, x1, y1, score = box # # color = self.cmap(i)[:3] # color = (_COLORS[label] * 255).astype(np.uint8).tolist() # text = "{}:{:.1f}%".format(class_names[label], score * 100) # txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, 255) # font = cv2.FONT_HERSHEY_SIMPLEX # txt_size = cv2.getTextSize(text, font, 0.5, 2)[0] # cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) # cv2.rectangle( # img, # (x0, y0 - txt_size[1] - 1), # (x0 + txt_size[0] + txt_size[1], y0 - 1), # color, # -1, # ) # cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1) return img, all_box class NanoDetONNXInfer: def __init__(self, model_path, imgsz=[416, 416]): self.model_path = model_path self.session, self.output_names = model_load(self.model_path) self.imgsz = imgsz self.reg_max = 7 self.reg_max1= self.reg_max + 1 self.distribution_project = np.arange(self.reg_max + 1) self.nc = len(names) self.no = self.nc + self.reg_max1 * 4 self.stride = [8, 16, 32, 64] def get_bboxes(self, cls_preds, reg_preds): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). img_metas (dict): Dict of image info. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ b = cls_preds.shape[0] featmap_sizes = [ (math.ceil(self.imgsz[0] / stride), math.ceil(self.imgsz[1]) / stride) for stride in self.stride ] # get grid cells of one image mlvl_center_priors = [ self.get_single_level_center_priors( b, featmap_sizes[i], stride, dtype=np.float32, ) for i, stride in enumerate(self.stride) ] center_priors = np.concatenate(mlvl_center_priors, axis=1) integral_result = integral_numpy(reg_preds, reg_max=self.reg_max) # (N, 4) scale = center_priors[..., 2][..., None] # shape (N, 1) or (N, H, W, 1) dis_preds = integral_result * scale bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=self.imgsz) scores = 1.0 / (1.0 + np.exp(-cls_preds)) # sigmoid result_list = [] for i in range(b): # add a dummy background class at the end of all labels # same with mmdetection2.0 score, bbox = scores[i], bboxes[i] padding = np.zeros((score.shape[0], 1), dtype=score.dtype) score = np.concatenate([score, padding], axis=1) results = multiclass_nms( bbox, score, score_thr=0.05, nms_cfg=dict(type="nms", iou_threshold=0.6), max_num=100, ) result_list.append(results) return result_list def get_single_level_center_priors(self,batch_size, featmap_size, stride, dtype): h, w = featmap_size x_range = (np.arange(w, dtype=dtype)) * stride y_range = (np.arange(h, dtype=dtype)) * stride y, x = np.meshgrid(y_range, x_range, indexing='ij') y = y.flatten() x = x.flatten() strides = np.full((x.shape[0],), stride, dtype=dtype) priors = np.stack([x, y, strides, strides], axis=-1) return np.tile(priors[None, :, :], (batch_size, 1, 1)) def detect_objects(self, image, save_path): outputs=[] im, im0, org_data = data_process_cv2(image, self.imgsz) img_name = os.path.basename(image).split('.')[0] infer_start_time = time.time() x = self.session.run(None, {self.session.get_inputs()[0].name: im}) infer_end_time = time.time() print(f"infer time: {infer_end_time - infer_start_time:.4f}s") x = [np.transpose(x[i],(0,3,1,2)) for i in range(4)] #to nchw for i in range(len(x)): reg_pred = x[i][:, :self.reg_max1 * 4,:,:] cls_pred = x[i][:, self.reg_max1 * 4:,:,:] out = np.concatenate([cls_pred, reg_pred], axis=1) outputs.append(out.reshape(out.shape[0], out.shape[1], -1)) preds = np.concatenate(outputs, axis=2).transpose(0, 2, 1) cls_scores = preds[:, :, :self.nc] bbox_preds = preds[:, :, self.nc:] pred = self.get_bboxes(cls_scores, bbox_preds)[0] res = self.post_process(pred, org_data, im0, save_path, img_name) result_img, bbox_res = overlay_bbox_cv(im0, res, names, score_thresh=0.35) return bbox_res, result_img def post_process(self, result, im, im0, save_path, img_name): det_result = {} det_bboxes, det_labels = result det_bboxes[:, :4] = scale_boxes_no_letter(im.shape[:2], det_bboxes[:, :4], im0.shape).round() classes = det_labels for i in range(self.nc): inds = classes == i det_result[i] = np.concatenate( [ det_bboxes[inds, :4].astype(np.float32), det_bboxes[inds, 4:5].astype(np.float32), ], axis=1, ).tolist() return det_result class QRCodeDecoder: def crop_qr_regions(self, image, regions): """ 根据检测到的边界框裁剪二维码区域 """ cropped_images = [] for idx, region in enumerate(regions): label, x1, y1, x2, y2, score = region # 外扩15个像素缓解因检测截断造成无法识别的情况,视检测情况而定 x1-=15 y1-=15 x2+=15 y2+=15 # 裁剪图像 cropped = image[y1:y2, x1:x2] if cropped.size > 0: cropped_images.append({ 'image': cropped, 'bbox': region, }) return cropped_images def decode_qrcode_pyzbar(self, cropped_image): """ 使用pyzbar解码二维码 """ try: # 转换为灰度图像 if len(cropped_image.shape) == 3: gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) else: gray = cropped_image # 使用pyzbar解码 decoded_objects = pyzbar.decode(gray) results = [] for obj in decoded_objects: try: data = obj.data.decode('utf-8') results.append({ 'data': data, 'type': obj.type, 'points': obj.polygon }) except: continue return results except Exception as e: print(f"decode error: {e}") return [] if __name__ == '__main__': import time detector = NanoDetONNXInfer(model_path='./nanodet-plus-m_416_QR.axmodel',imgsz=[416,416]) decoder = QRCodeDecoder() img_path = './qrcode_test' det_path='./det_res' crop_path='./crop_res' os.makedirs(det_path, exist_ok=True) os.makedirs(crop_path, exist_ok=True) imgs = glob.glob(f"{img_path}/*.jpg") totoal = len(imgs) success = 0 fail = 0 start_time = time.time() for idx,img in enumerate(imgs): pic_name=os.path.basename(img).split('.')[0] loop_start_time = time.time() det_result, res_img = detector.detect_objects(img,det_path) # cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img) # print('det_result:',det_result) # Crop deteted QRCode & decode QRCode by pyzbar cropped_images = decoder.crop_qr_regions(res_img, det_result) # for i,cropped in enumerate(cropped_images): # cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) all_decoded_results = [] for i, cropped_data in enumerate(cropped_images): decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) all_decoded_results.extend(decoded_results) # for result in decoded_results: # print(f"decode result: {result['data']} (type: {result['type']})") if all_decoded_results: success += 1 print(f"{pic_name} 识别成功!") else: fail += 1 print(f"{pic_name} 识别失败!") loop_end_time = time.time() print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") end_time = time.time() # 记录总结束时间 total_time = end_time - start_time # 记录总耗时 print(f"总共测试图片数量: {totoal}") print(f"识别成功数量: {success}") print(f"识别失败数量: {fail}") print(f"识别成功率: {success/totoal*100:.2f}%") print(f"整体处理耗时: {total_time:.4f} 秒") print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒")