QRCode_det / python /QRCode_axmodel_infer_Nanodet.py

wzf19947

增加nanodet、更新各平台模型

45b2cae 20 days ago

21.8 kB

	import os
	import glob
	import time
	import cv2
	import numpy as np
	import pyzbar.pyzbar as pyzbar
	import axengine as axe
	import math

	names = ["QRCode"]

	def sigmoid(x):
	return 1 / (1 + np.exp(-x))
	def model_load(model):
	session = axe.InferenceSession(model)
	input_name = session.get_inputs()[0].name
	output_names = [ x.name for x in session.get_outputs()]
	return session, output_names

	def data_process_cv2(frame, input_shape):
	im0 = cv2.imread(frame)
	img = cv2.resize(im0, input_shape, interpolation=cv2.INTER_AREA)
	org_data = img.copy()
	img = np.ascontiguousarray(img)
	img = np.expand_dims(img, 0)
	return img, im0, org_data


	def multiclass_nms(
	multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None
	):
	num_classes = multi_scores.shape[1] - 1 # exclude background

	# Reshape bboxes
	if multi_bboxes.shape[1] > 4:
	# (N, 4*C) -> (N, C, 4)
	bboxes = multi_bboxes.reshape(multi_scores.shape[0], -1, 4)
	else:
	# (N, 4) -> (N, 1, 4) -> (N, C, 4) via repeat
	bboxes = np.tile(multi_bboxes[:, None, :], (1, num_classes, 1))

	scores = multi_scores[:, :-1].copy() # (N, C)

	# Apply score factors if provided
	if score_factors is not None:
	scores = scores * score_factors[:, None]

	# Filter by score threshold
	valid_mask = scores > score_thr # (N, C)

	# Get indices where valid
	valid_indices = np.where(valid_mask)
	if len(valid_indices[0]) == 0:
	# No valid boxes
	return np.zeros((0, 5), dtype=np.float32), np.zeros((0,), dtype=np.int64)

	# Extract valid bboxes, scores, labels
	bbox_indices, class_indices = valid_indices
	bboxes_valid = bboxes[bbox_indices, class_indices] # (K, 4)
	scores_valid = scores[valid_indices] # (K,)
	labels_valid = class_indices.astype(np.int64) # (K,)

	# Concatenate bboxes and scores for NMS input: (K, 5)
	dets_input = np.concatenate([bboxes_valid, scores_valid[:, None]], axis=1) # (K, 5)

	# Perform NMS (you need a NumPy NMS implementation)
	keep = nms_numpy(dets_input, iou_threshold=nms_cfg.get('iou_threshold', 0.5))

	dets = dets_input[keep]
	labels = labels_valid[keep]

	if max_num > 0 and len(keep) > max_num:
	dets = dets[:max_num]
	labels = labels[:max_num]

	return dets, labels
	def nms_numpy(dets, iou_threshold=0.5):
	if dets.size == 0:
	return []

	x1 = dets[:, 0]
	y1 = dets[:, 1]
	x2 = dets[:, 2]
	y2 = dets[:, 3]
	scores = dets[:, 4]

	areas = (x2 - x1 + 1) * (y2 - y1 + 1)
	order = scores.argsort()[::-1] # descending order

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)

	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h

	iou = inter / (areas[i] + areas[order[1:]] - inter)
	inds = np.where(iou <= iou_threshold)[0]
	order = order[inds + 1]

	return keep
	def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
	nms_cfg_ = nms_cfg.copy()
	class_agnostic = nms_cfg_.pop("class_agnostic", class_agnostic)

	if class_agnostic:
	boxes_for_nms = boxes
	else:
	max_coordinate = boxes.max()
	# offsets = idxs * (max_coordinate + 1)
	offsets = idxs.astype(boxes.dtype) * (max_coordinate + 1)
	boxes_for_nms = boxes + offsets[:, None]

	nms_type = nms_cfg_.pop("type", "nms") # unused in numpy version
	split_thr = nms_cfg_.pop("split_thr", 10000)

	if len(boxes_for_nms) < split_thr:
	# Call your NumPy NMS function (e.g., nms_numpy)
	keep = nms_numpy(boxes_for_nms, scores, **nms_cfg_)
	keep = np.array(keep, dtype=np.int64)
	boxes = boxes[keep]
	scores = scores[keep]
	else:
	# Large case: process per class/group
	total_mask = np.zeros(scores.shape, dtype=bool)
	unique_ids = np.unique(idxs)

	for id_val in unique_ids:
	mask = (idxs == id_val)
	mask_indices = np.where(mask)[0] # indices where condition is True

	if len(mask_indices) == 0:
	continue

	keep_in_group = nms_numpy(
	boxes_for_nms[mask_indices],
	scores[mask_indices],
	**nms_cfg_
	)
	keep_in_group = np.array(keep_in_group, dtype=np.int64)
	selected_global_indices = mask_indices[keep_in_group]
	total_mask[selected_global_indices] = True

	keep = np.where(total_mask)[0]
	# Sort by scores descending
	sorted_indices = np.argsort(-scores[keep]) # negative for descending
	keep = keep[sorted_indices]
	boxes = boxes[keep]
	scores = scores[keep]

	# Concatenate boxes and scores -> (K, 5)
	dets = np.concatenate([boxes, scores[:, None]], axis=-1)
	return dets, keep

	def scale_boxes_no_letter(img1_shape, boxes, img0_shape):
	gain = (img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])

	boxes[..., [0, 2]] /= gain[1]
	boxes[..., [1, 3]] /= gain[0]
	clip_boxes(boxes, img0_shape)
	return boxes

	def clip_boxes(boxes, shape):
	boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
	boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])

	_COLORS = (
	np.array(
	[
	0.000,
	0.447,
	0.741,
	0.850,
	0.325,
	0.098,
	0.929,
	0.694,
	0.125,
	0.494,
	0.184,
	0.556,
	0.466,
	0.674,
	0.188,
	0.301,
	0.745,
	0.933,
	0.635,
	0.078,
	0.184,
	0.300,
	0.300,
	0.300,
	0.600,
	0.600,
	0.600,
	1.000,
	0.000,
	0.000,
	1.000,
	0.500,
	0.000,
	0.749,
	0.749,
	0.000,
	0.000,
	1.000,
	0.000,
	0.000,
	0.000,
	1.000,
	0.667,
	0.000,
	1.000,
	0.333,
	0.333,
	0.000,
	0.333,
	0.667,
	0.000,
	0.333,
	1.000,
	0.000,
	0.667,
	0.333,
	0.000,
	0.667,
	0.667,
	0.000,
	0.667,
	1.000,
	0.000,
	1.000,
	0.333,
	0.000,
	1.000,
	0.667,
	0.000,
	1.000,
	1.000,
	0.000,
	0.000,
	0.333,
	0.500,
	0.000,
	0.667,
	0.500,
	0.000,
	1.000,
	0.500,
	0.333,
	0.000,
	0.500,
	0.333,
	0.333,
	0.500,
	0.333,
	0.667,
	0.500,
	0.333,
	1.000,
	0.500,
	0.667,
	0.000,
	0.500,
	0.667,
	0.333,
	0.500,
	0.667,
	0.667,
	0.500,
	0.667,
	1.000,
	0.500,
	1.000,
	0.000,
	0.500,
	1.000,
	0.333,
	0.500,
	1.000,
	0.667,
	0.500,
	1.000,
	1.000,
	0.500,
	0.000,
	0.333,
	1.000,
	0.000,
	0.667,
	1.000,
	0.000,
	1.000,
	1.000,
	0.333,
	0.000,
	1.000,
	0.333,
	0.333,
	1.000,
	0.333,
	0.667,
	1.000,
	0.333,
	1.000,
	1.000,
	0.667,
	0.000,
	1.000,
	0.667,
	0.333,
	1.000,
	0.667,
	0.667,
	1.000,
	0.667,
	1.000,
	1.000,
	1.000,
	0.000,
	1.000,
	1.000,
	0.333,
	1.000,
	1.000,
	0.667,
	1.000,
	0.333,
	0.000,
	0.000,
	0.500,
	0.000,
	0.000,
	0.667,
	0.000,
	0.000,
	0.833,
	0.000,
	0.000,
	1.000,
	0.000,
	0.000,
	0.000,
	0.167,
	0.000,
	0.000,
	0.333,
	0.000,
	0.000,
	0.500,
	0.000,
	0.000,
	0.667,
	0.000,
	0.000,
	0.833,
	0.000,
	0.000,
	1.000,
	0.000,
	0.000,
	0.000,
	0.167,
	0.000,
	0.000,
	0.333,
	0.000,
	0.000,
	0.500,
	0.000,
	0.000,
	0.667,
	0.000,
	0.000,
	0.833,
	0.000,
	0.000,
	1.000,
	0.000,
	0.000,
	0.000,
	0.143,
	0.143,
	0.143,
	0.286,
	0.286,
	0.286,
	0.429,
	0.429,
	0.429,
	0.571,
	0.571,
	0.571,
	0.714,
	0.714,
	0.714,
	0.857,
	0.857,
	0.857,
	0.000,
	0.447,
	0.741,
	0.314,
	0.717,
	0.741,
	0.50,
	0.5,
	0,
	]
	)
	.astype(np.float32)
	.reshape(-1, 3)
	)

	def distance2bbox(points, distance, max_shape=None):
	x1 = points[..., 0] - distance[..., 0]
	y1 = points[..., 1] - distance[..., 1]
	x2 = points[..., 0] + distance[..., 2]
	y2 = points[..., 1] + distance[..., 3]
	if max_shape is not None:
	x1 = np.clip(x1, a_min=0, a_max=max_shape[1])
	y1 = np.clip(y1, a_min=0, a_max=max_shape[0])
	x2 = np.clip(x2, a_min=0, a_max=max_shape[1])
	y2 = np.clip(y2, a_min=0, a_max=max_shape[0])
	return np.stack([x1, y1, x2, y2], axis=-1)

	def integral_numpy(x, reg_max=16):
	"""
	NumPy equivalent of the Integral layer in NanoDet.

	Computes: sum(softmax(logits) * [0, 1, ..., reg_max]) for each of the 4 directions.

	Args:
	x (np.ndarray): Input array of shape (..., 4 * (reg_max + 1))
	reg_max (int): Maximum value of discrete set. Default: 16.

	Returns:
	np.ndarray: Integral result of shape (..., 4)
	"""
	# Save original leading shape (e.g., (N,) or (N, H, W))
	leading_shape = x.shape[:-1] # everything except last dim
	total_channels = x.shape[-1]

	assert total_channels == 4 * (reg_max + 1), \
	f"Last dimension must be 4(reg_max+1)={4(reg_max+1)}, but got {total_channels}"

	# Reshape to (..., 4, reg_max + 1)
	x = x.reshape(*leading_shape, 4, reg_max + 1)

	# Apply softmax along the last axis (dim=-1)
	# For numerical stability: subtract max
	x_max = np.max(x, axis=-1, keepdims=True)
	exp_x = np.exp(x - x_max)
	softmax_x = exp_x / np.sum(exp_x, axis=-1, keepdims=True) # (..., 4, reg_max+1)

	# Project vector: [0, 1, 2, ..., reg_max]
	project = np.arange(reg_max + 1, dtype=x.dtype) # shape (reg_max+1,)

	# Compute weighted sum: sum(softmax_x * project) over last dimension
	# Broadcasting: (..., 4, reg_max+1) * (reg_max+1,) -> (..., 4, reg_max+1)
	integral_result = np.sum(softmax_x * project, axis=-1) # (..., 4)

	return integral_result

	def overlay_bbox_cv(img, dets, class_names, score_thresh):
	all_box = []
	for label in dets:
	for bbox in dets[label]:
	score = bbox[-1]
	if score > score_thresh:
	x0, y0, x1, y1 = [int(i) for i in bbox[:4]]
	all_box.append([label, x0, y0, x1, y1, score])
	all_box.sort(key=lambda v: v[5])
	# for box in all_box:
	# label, x0, y0, x1, y1, score = box
	# # color = self.cmap(i)[:3]
	# color = (_COLORS[label] * 255).astype(np.uint8).tolist()
	# text = "{}:{:.1f}%".format(class_names[label], score * 100)
	# txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, 255)
	# font = cv2.FONT_HERSHEY_SIMPLEX
	# txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
	# cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)

	# cv2.rectangle(
	# img,
	# (x0, y0 - txt_size[1] - 1),
	# (x0 + txt_size[0] + txt_size[1], y0 - 1),
	# color,
	# -1,
	# )
	# cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
	return img, all_box

	class NanoDetONNXInfer:
	def __init__(self, model_path, imgsz=[416, 416]):
	self.model_path = model_path
	self.session, self.output_names = model_load(self.model_path)
	self.imgsz = imgsz
	self.reg_max = 7
	self.reg_max1= self.reg_max + 1
	self.distribution_project = np.arange(self.reg_max + 1)
	self.nc = len(names)
	self.no = self.nc + self.reg_max1 * 4
	self.stride = [8, 16, 32, 64]

	def get_bboxes(self, cls_preds, reg_preds):
	"""Decode the outputs to bboxes.
	Args:
	cls_preds (Tensor): Shape (num_imgs, num_points, num_classes).
	reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)).
	img_metas (dict): Dict of image info.

	Returns:
	results_list (list[tuple]): List of detection bboxes and labels.
	"""
	b = cls_preds.shape[0]

	featmap_sizes = [
	(math.ceil(self.imgsz[0] / stride), math.ceil(self.imgsz[1]) / stride)
	for stride in self.stride
	]

	# get grid cells of one image
	mlvl_center_priors = [
	self.get_single_level_center_priors(
	b,
	featmap_sizes[i],
	stride,
	dtype=np.float32,
	)
	for i, stride in enumerate(self.stride)
	]

	center_priors = np.concatenate(mlvl_center_priors, axis=1)
	integral_result = integral_numpy(reg_preds, reg_max=self.reg_max) # (N, 4)
	scale = center_priors[..., 2][..., None] # shape (N, 1) or (N, H, W, 1)
	dis_preds = integral_result * scale
	bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=self.imgsz)
	scores = 1.0 / (1.0 + np.exp(-cls_preds)) # sigmoid
	result_list = []
	for i in range(b):
	# add a dummy background class at the end of all labels
	# same with mmdetection2.0
	score, bbox = scores[i], bboxes[i]
	padding = np.zeros((score.shape[0], 1), dtype=score.dtype)
	score = np.concatenate([score, padding], axis=1)
	results = multiclass_nms(
	bbox,
	score,
	score_thr=0.05,
	nms_cfg=dict(type="nms", iou_threshold=0.6),
	max_num=100,
	)
	result_list.append(results)
	return result_list
	def get_single_level_center_priors(self,batch_size, featmap_size, stride, dtype):
	h, w = featmap_size
	x_range = (np.arange(w, dtype=dtype)) * stride
	y_range = (np.arange(h, dtype=dtype)) * stride
	y, x = np.meshgrid(y_range, x_range, indexing='ij')
	y = y.flatten()
	x = x.flatten()
	strides = np.full((x.shape[0],), stride, dtype=dtype)
	priors = np.stack([x, y, strides, strides], axis=-1)
	return np.tile(priors[None, :, :], (batch_size, 1, 1))

	def detect_objects(self, image, save_path):
	outputs=[]
	im, im0, org_data = data_process_cv2(image, self.imgsz)
	img_name = os.path.basename(image).split('.')[0]
	infer_start_time = time.time()
	x = self.session.run(None, {self.session.get_inputs()[0].name: im})
	infer_end_time = time.time()
	print(f"infer time: {infer_end_time - infer_start_time:.4f}s")
	x = [np.transpose(x[i],(0,3,1,2)) for i in range(4)] #to nchw
	for i in range(len(x)):
	reg_pred = x[i][:, :self.reg_max1 * 4,:,:]
	cls_pred = x[i][:, self.reg_max1 * 4:,:,:]
	out = np.concatenate([cls_pred, reg_pred], axis=1)
	outputs.append(out.reshape(out.shape[0], out.shape[1], -1))
	preds = np.concatenate(outputs, axis=2).transpose(0, 2, 1)

	cls_scores = preds[:, :, :self.nc]
	bbox_preds = preds[:, :, self.nc:]
	pred = self.get_bboxes(cls_scores, bbox_preds)[0]
	res = self.post_process(pred, org_data, im0, save_path, img_name)
	result_img, bbox_res = overlay_bbox_cv(im0, res, names, score_thresh=0.35)
	return bbox_res, result_img
	def post_process(self, result, im, im0, save_path, img_name):
	det_result = {}
	det_bboxes, det_labels = result
	det_bboxes[:, :4] = scale_boxes_no_letter(im.shape[:2], det_bboxes[:, :4], im0.shape).round()
	classes = det_labels
	for i in range(self.nc):
	inds = classes == i
	det_result[i] = np.concatenate(
	[
	det_bboxes[inds, :4].astype(np.float32),
	det_bboxes[inds, 4:5].astype(np.float32),
	],
	axis=1,
	).tolist()

	return det_result

	class QRCodeDecoder:
	def crop_qr_regions(self, image, regions):
	"""
	根据检测到的边界框裁剪二维码区域
	"""
	cropped_images = []
	for idx, region in enumerate(regions):
	label, x1, y1, x2, y2, score = region
	# 外扩15个像素缓解因检测截断造成无法识别的情况，视检测情况而定
	x1-=15
	y1-=15
	x2+=15
	y2+=15
	# 裁剪图像
	cropped = image[y1:y2, x1:x2]
	if cropped.size > 0:
	cropped_images.append({
	'image': cropped,
	'bbox': region,
	})
	return cropped_images

	def decode_qrcode_pyzbar(self, cropped_image):
	"""
	使用pyzbar解码二维码
	"""
	try:
	# 转换为灰度图像
	if len(cropped_image.shape) == 3:
	gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
	else:
	gray = cropped_image
	# 使用pyzbar解码
	decoded_objects = pyzbar.decode(gray)
	results = []
	for obj in decoded_objects:
	try:
	data = obj.data.decode('utf-8')
	results.append({
	'data': data,
	'type': obj.type,
	'points': obj.polygon
	})
	except:
	continue

	return results
	except Exception as e:
	print(f"decode error: {e}")
	return []

	if __name__ == '__main__':
	import time

	detector = NanoDetONNXInfer(model_path='./nanodet-plus-m_416_QR.axmodel',imgsz=[416,416])
	decoder = QRCodeDecoder()
	img_path = './qrcode_test'
	det_path='./det_res'
	crop_path='./crop_res'
	os.makedirs(det_path, exist_ok=True)
	os.makedirs(crop_path, exist_ok=True)
	imgs = glob.glob(f"{img_path}/*.jpg")
	totoal = len(imgs)
	success = 0
	fail = 0
	start_time = time.time()
	for idx,img in enumerate(imgs):
	pic_name=os.path.basename(img).split('.')[0]
	loop_start_time = time.time()
	det_result, res_img = detector.detect_objects(img,det_path)
	# cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img)
	# print('det_result:',det_result)
	# Crop deteted QRCode & decode QRCode by pyzbar
	cropped_images = decoder.crop_qr_regions(res_img, det_result)
	# for i,cropped in enumerate(cropped_images):
	# cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image'])

	all_decoded_results = []
	for i, cropped_data in enumerate(cropped_images):
	decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image'])
	all_decoded_results.extend(decoded_results)

	# for result in decoded_results:
	# print(f"decode result: {result['data']} (type: {result['type']})")
	if all_decoded_results:
	success += 1
	print(f"{pic_name} 识别成功！")
	else:
	fail += 1
	print(f"{pic_name} 识别失败！")
	loop_end_time = time.time()
	print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒")

	end_time = time.time() # 记录总结束时间
	total_time = end_time - start_time # 记录总耗时

	print(f"总共测试图片数量: {totoal}")
	print(f"识别成功数量: {success}")
	print(f"识别失败数量: {fail}")
	print(f"识别成功率: {success/totoal*100:.2f}%")
	print(f"整体处理耗时: {total_time:.4f} 秒")
	print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒")