diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..01e2bca4a26a52b8fa6ade219b8fb5950e00ab34 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.axmodel filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..736c31eba86dba3090b99b9863987aba34487279 --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +--- +license: mit +pipeline_tag: image-detection +--- + +# CodeFormer + +This version of CoderFormer has been converted to run on the Axera NPU using **w8a16** quantization. + +This model has been optimized with the following LoRA: + +Compatible with Pulsar2 version: 4.2 + +## Convert tools links: + +For those who are interested in model conversion, you can try to export axmodel through + +- [The repo of AXera Platform](https://github.com/wzf19947/QRCode_det), which you can get the detail of guide + +- [Pulsar2 Link, How to Convert ONNX to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/pulsar2/introduction.html) + + +## Support Platform + +- AX650 + - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html) + - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html) + +|Chips|model|cost| +|--|--|--| +|AX650|yolov5n|0.73 ms| +|AX650|yolov8n|1.31 ms| +|AX650|yolov9t|1.89 ms| +|AX650|yolov10n|1.44 ms| +|AX650|yolo11n|1.39 ms| +|AX650|yolo11n|2.49 ms| +|AX650|DEIMv2_femto(u16)|1.79 ms| + +## How to use + +Download all files from this repository to the device + +``` + +root@ax650:~/QRCode_det# tree +. +|-- model +| `-- deimv2_hgnetv2_femto_coco_npu3.axmodel +| `-- yolov5n_npu3.axmodel +| `-- yolov8n_npu3.axmodel +|-- python +|`-- QRCode_axmodel_infer_DEIMv2.py +|`-- QRCode_axmodel_infer_v5.py +|`-- QRCode_axmodel_infer_v8.py +|`-- QRCode_onnx_infer_DEIMv2.py +|`-- QRCode_onnx_infer_v5.py +|`-- QRCode_onnx_infer_v8.py +|`-- requirements.txt + + + +``` + +### Inference + +Input Data: + +``` +|-- images +| `-- qrcode_01.jpg +| `-- qrcode_02.jpg +| `-- qrcode_03.jpg +| `-- qrcode_04.jpg... +``` + + +#### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) + +``` +root@ax650:~/QRCode# python3 QRCode_axmodel_infer_DEIMv2.py +[INFO] Available providers: ['AxEngineExecutionProvider'] +[INFO] Using provider: AxEngineExecutionProvider +[INFO] Chip type: ChipType.MC50 +[INFO] VNPU type: VNPUType.DISABLED +[INFO] Engine version: 2.12.0s +[INFO] Model type: 2 (triple core) +[INFO] Compiler version: 4.2 b98901c3 +识别成功! +图片 ./qrcode_test/qrcode_01.jpg 处理耗时: 0.2165 秒 +识别成功! +图片 ./qrcode_test/qrcode_02.jpg 处理耗时: 0.1540 秒 +识别成功! +图片 ./qrcode_test/qrcode_03.jpg 处理耗时: 0.1456 秒 +识别成功! +图片 ./qrcode_test/qrcode_05.jpg 处理耗时: 0.1449 秒 + +``` + +Output: +![alt text](result.png) \ No newline at end of file diff --git a/images/qrcode_01.jpg b/images/qrcode_01.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c910e037bd614770ab2da424465f41e000365ef Binary files /dev/null and b/images/qrcode_01.jpg differ diff --git a/images/qrcode_02.jpg b/images/qrcode_02.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68d23a977eb0dd6ba8899469256cb2b163070641 Binary files /dev/null and b/images/qrcode_02.jpg differ diff --git a/images/qrcode_03.jpg b/images/qrcode_03.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6fc8cdcf25c5e963ad9a0047e740ab2d3ddd08a3 Binary files /dev/null and b/images/qrcode_03.jpg differ diff --git a/images/qrcode_05.jpg b/images/qrcode_05.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5fd93f27777fe5b412fbcfcf240cf84882eaded1 Binary files /dev/null and b/images/qrcode_05.jpg differ diff --git a/images/qrcode_06.jpg b/images/qrcode_06.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e40c02ba5a1e22757375f67733d8f8d365fa926f Binary files /dev/null and b/images/qrcode_06.jpg differ diff --git a/images/qrcode_08.jpg b/images/qrcode_08.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c14003d90c4b641a9d021bc3f0b3bfaa3aeee616 Binary files /dev/null and b/images/qrcode_08.jpg differ diff --git a/images/qrcode_09.jpg b/images/qrcode_09.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b771ff0c36117e1381575463fee4a49f59de431 Binary files /dev/null and b/images/qrcode_09.jpg differ diff --git a/images/qrcode_11.jpg b/images/qrcode_11.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b9f1bf5d4fd5e33780768213c034ee4af3742b42 Binary files /dev/null and b/images/qrcode_11.jpg differ diff --git a/images/qrcode_12.jpg b/images/qrcode_12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e206101368edafac9a9a05d5de77ebad00f12053 Binary files /dev/null and b/images/qrcode_12.jpg differ diff --git a/images/qrcode_13.jpg b/images/qrcode_13.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a3a46e7928610a468e0cffffb4ec9a8e16ca664b Binary files /dev/null and b/images/qrcode_13.jpg differ diff --git a/images/qrcode_14.jpg b/images/qrcode_14.jpg new file mode 100644 index 0000000000000000000000000000000000000000..116b25b6db5300c5b12f49f44e5e1881c692057b Binary files /dev/null and b/images/qrcode_14.jpg differ diff --git a/images/qrcode_15.jpg b/images/qrcode_15.jpg new file mode 100644 index 0000000000000000000000000000000000000000..29a3e605b450911e23b6885cdfc41902b94d34ff Binary files /dev/null and b/images/qrcode_15.jpg differ diff --git a/images/qrcode_16.jpg b/images/qrcode_16.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e3ae507a12d18387e4512d031d8993802a91ed14 Binary files /dev/null and b/images/qrcode_16.jpg differ diff --git a/images/qrcode_17.jpg b/images/qrcode_17.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a2aafb82b2501e017e8297cb3db1299b8f924c6b Binary files /dev/null and b/images/qrcode_17.jpg differ diff --git a/images/qrcode_18.jpg b/images/qrcode_18.jpg new file mode 100644 index 0000000000000000000000000000000000000000..51b7d7ad347fcfd81ae59d366cae8687117f7de8 Binary files /dev/null and b/images/qrcode_18.jpg differ diff --git a/images/qrcode_19.jpg b/images/qrcode_19.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e6fa2217350737abcd1b4d7ebee1fb76dea713d8 Binary files /dev/null and b/images/qrcode_19.jpg differ diff --git a/images/qrcode_20.jpg b/images/qrcode_20.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b2a6c09e678dfdde0429008d8c1e18adbeed308f Binary files /dev/null and b/images/qrcode_20.jpg differ diff --git a/images/qrcode_21.jpg b/images/qrcode_21.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bb763dbca4ddac3886051904dd448bad13fcb5db Binary files /dev/null and b/images/qrcode_21.jpg differ diff --git a/images/qrcode_22.jpg b/images/qrcode_22.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0732df6bcdf25e77c67db39f2069a726022d2867 Binary files /dev/null and b/images/qrcode_22.jpg differ diff --git a/images/qrcode_23.jpg b/images/qrcode_23.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7e6f14a49c96de3c52f16261e32e910d9fef2fb2 Binary files /dev/null and b/images/qrcode_23.jpg differ diff --git a/images/qrcode_24.jpg b/images/qrcode_24.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fec6662eb46b7b1cb97cffc19e7a60b11e717de2 Binary files /dev/null and b/images/qrcode_24.jpg differ diff --git a/images/qrcode_25.jpg b/images/qrcode_25.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b4b5e2dee3dce6fcdf19e5e34e77494d39006eb1 Binary files /dev/null and b/images/qrcode_25.jpg differ diff --git a/images/qrcode_26.jpg b/images/qrcode_26.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8ef77fed20e6edfb34ac94c6238f4ce97a4d4a36 Binary files /dev/null and b/images/qrcode_26.jpg differ diff --git a/images/qrcode_27.jpg b/images/qrcode_27.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c1cd125e03c5df7b4569423756028858d248286 Binary files /dev/null and b/images/qrcode_27.jpg differ diff --git a/images/qrcode_28.jpg b/images/qrcode_28.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0cf169a50a10430f031159169230b84529ac16d3 Binary files /dev/null and b/images/qrcode_28.jpg differ diff --git a/images/qrcode_29.jpg b/images/qrcode_29.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e376c1cf863135135a7253deb76c6393d1fe4a7 Binary files /dev/null and b/images/qrcode_29.jpg differ diff --git a/images/qrcode_30.jpg b/images/qrcode_30.jpg new file mode 100644 index 0000000000000000000000000000000000000000..11ee10ea654a67f4dfd140b94787a345b798ea87 Binary files /dev/null and b/images/qrcode_30.jpg differ diff --git a/images/qrcode_31.jpg b/images/qrcode_31.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0aab1386cea54d50ba2fcde37ae4b7f224e7f1c7 Binary files /dev/null and b/images/qrcode_31.jpg differ diff --git a/images/qrcode_33.jpg b/images/qrcode_33.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9b0702ccfabad5a6461c55d6f147eb0068c17fe4 Binary files /dev/null and b/images/qrcode_33.jpg differ diff --git a/images/qrcode_34.jpg b/images/qrcode_34.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b3ce801b93446255b5dddc3e99f2f769ee1e8d44 Binary files /dev/null and b/images/qrcode_34.jpg differ diff --git a/images/qrcode_35.jpg b/images/qrcode_35.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7f508c5be777610abd23264323c1d1e498e9f56e Binary files /dev/null and b/images/qrcode_35.jpg differ diff --git a/images/qrcode_36.jpg b/images/qrcode_36.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b5825b6290e492c2a06a6ee199ef5bf30df13ba4 Binary files /dev/null and b/images/qrcode_36.jpg differ diff --git a/images/qrcode_37.jpg b/images/qrcode_37.jpg new file mode 100644 index 0000000000000000000000000000000000000000..515fd4e609f1ccefebb1ca079f88de5604f681d6 Binary files /dev/null and b/images/qrcode_37.jpg differ diff --git a/images/qrcode_38.jpg b/images/qrcode_38.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9d145f284a3e98b815a1d13963238ed2ad07bfaf Binary files /dev/null and b/images/qrcode_38.jpg differ diff --git a/images/qrcode_39.jpg b/images/qrcode_39.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc42008f8101a371da743287ac55958644af6abb Binary files /dev/null and b/images/qrcode_39.jpg differ diff --git a/images/qrcode_41.jpg b/images/qrcode_41.jpg new file mode 100644 index 0000000000000000000000000000000000000000..948079b4e38015d03bee0aa095b32e524908dc8d Binary files /dev/null and b/images/qrcode_41.jpg differ diff --git a/images/qrcode_42.jpg b/images/qrcode_42.jpg new file mode 100644 index 0000000000000000000000000000000000000000..91d70c2c6ea2bd20a2c939f3f4081ef8bc44c4ef Binary files /dev/null and b/images/qrcode_42.jpg differ diff --git a/images/qrcode_43.jpg b/images/qrcode_43.jpg new file mode 100644 index 0000000000000000000000000000000000000000..903cfd48d6bd367bc4f6e4ba69ace7c01c583bf2 Binary files /dev/null and b/images/qrcode_43.jpg differ diff --git a/images/qrcode_44.jpg b/images/qrcode_44.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b52a927716ffea38af0f44ee1e49279811607b39 Binary files /dev/null and b/images/qrcode_44.jpg differ diff --git a/images/qrcode_45.jpg b/images/qrcode_45.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a7945d48cd87d7653a7a695c383568ff4dee5a15 Binary files /dev/null and b/images/qrcode_45.jpg differ diff --git a/images/qrcode_46.jpg b/images/qrcode_46.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6101cbefebbb01ea7941b53ee488cde717f5f60e Binary files /dev/null and b/images/qrcode_46.jpg differ diff --git a/images/qrcode_47.jpg b/images/qrcode_47.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cfe4afc071a1ebd3db559199e7573f9fc274715c Binary files /dev/null and b/images/qrcode_47.jpg differ diff --git a/images/qrcode_48.jpg b/images/qrcode_48.jpg new file mode 100644 index 0000000000000000000000000000000000000000..39770bbaa0af23949fc3cd471f61b0d630670531 Binary files /dev/null and b/images/qrcode_48.jpg differ diff --git a/images/qrcode_49.jpg b/images/qrcode_49.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7aeb7fc097ab73c76635ab1f16e095909f122251 Binary files /dev/null and b/images/qrcode_49.jpg differ diff --git a/images/qrcode_50.jpg b/images/qrcode_50.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1353fc491ce28da544240e2ea44f05595cc390c0 Binary files /dev/null and b/images/qrcode_50.jpg differ diff --git a/images/qrcode_51.jpg b/images/qrcode_51.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3b65de68bf459bb2738139efa039a91312feadca Binary files /dev/null and b/images/qrcode_51.jpg differ diff --git a/images/qrcode_52.jpg b/images/qrcode_52.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f94e9662eb008343ef05d34741e43de748f287ab Binary files /dev/null and b/images/qrcode_52.jpg differ diff --git a/images/qrcode_53.jpg b/images/qrcode_53.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0df34718a608f419c897d6c72ef6420c36164820 Binary files /dev/null and b/images/qrcode_53.jpg differ diff --git a/images/qrcode_54.jpg b/images/qrcode_54.jpg new file mode 100644 index 0000000000000000000000000000000000000000..96b5909a41691cad46ad8eb861276c450dedde7e Binary files /dev/null and b/images/qrcode_54.jpg differ diff --git a/images/qrcode_55.jpg b/images/qrcode_55.jpg new file mode 100644 index 0000000000000000000000000000000000000000..126458a8f2d4d51154b2584e6ac4c9217d506bc5 Binary files /dev/null and b/images/qrcode_55.jpg differ diff --git a/model/deimv2_hgnetv2_femto_coco_npu3.axmodel b/model/deimv2_hgnetv2_femto_coco_npu3.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..74be41a14bcc39f17a743bc0cad62f9d3600eade --- /dev/null +++ b/model/deimv2_hgnetv2_femto_coco_npu3.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87f89ba2fee66b2f2b959a8f837fe61bad1a9c6ef49636e3d9d43e4541d51155 +size 2165133 diff --git a/model/yolov5n_npu3.axmodel b/model/yolov5n_npu3.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f77d5fa0720cd8ced7d9cd80b918c7a9145ec8c7 --- /dev/null +++ b/model/yolov5n_npu3.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ea4a3e74bd9dde388086cb60cd51f9d53c7734bf52f2e21de9a6d71de0b194 +size 2003219 diff --git a/model/yolov8n_npu3.axmodel b/model/yolov8n_npu3.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..ee197f2f22247b6d8d2face1c86f1c973e25a048 --- /dev/null +++ b/model/yolov8n_npu3.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05be5bad116511e552849e16793b8b3ea19238a8fa1dc00935a18fb58207e81 +size 3488708 diff --git a/python/QRCode_axmodel_infer_DEIMv2.py b/python/QRCode_axmodel_infer_DEIMv2.py new file mode 100644 index 0000000000000000000000000000000000000000..96795a21569167880d03ed7382d24a5f929ff482 --- /dev/null +++ b/python/QRCode_axmodel_infer_DEIMv2.py @@ -0,0 +1,370 @@ +""" +DEIMv2: Real-Time Object Detection Meets DINOv3 +Copyright (c) 2025 The DEIMv2 Authors. All Rights Reserved. +--------------------------------------------------------------------------------- +Modified from D-FINE (https://github.com/Peterande/D-FINE) +Copyright (c) 2024 The D-FINE Authors. All Rights Reserved. +""" +import os +import sys +import cv2 +import time +import numpy as np +import axengine as axe +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.transforms as T +from PIL import Image, ImageDraw +import pyzbar.pyzbar as pyzbar +import torchvision +import glob + +def mod(a, b): + out = a - a // b * b + return out + +mscoco_category2name = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} + +mscoco_category2label = {k: i for i, k in enumerate(mscoco_category2name.keys())} +mscoco_label2category = {v: k for k, v in mscoco_category2label.items()} + +class PostProcessor(nn.Module): + __share__ = [ + 'num_classes', + 'use_focal_loss', + 'num_top_queries', + 'remap_mscoco_category' + ] + + def __init__( + self, + num_classes=80, + use_focal_loss=True, + num_top_queries=300, + remap_mscoco_category=False + ) -> None: + super().__init__() + self.use_focal_loss = use_focal_loss + self.num_top_queries = num_top_queries + self.num_classes = int(num_classes) + self.remap_mscoco_category = remap_mscoco_category + self.deploy_mode = False + + def extra_repr(self) -> str: + return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' + + # def forward(self, outputs, orig_target_sizes): + def forward(self, outputs, orig_target_sizes: torch.Tensor): + logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] + # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) + + bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') + bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1) + + if self.use_focal_loss: + scores = F.sigmoid(logits) + + scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1) + # labels = index % self.num_classes + labels = mod(index, self.num_classes) + index = index // self.num_classes + boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])) + + else: + scores = F.softmax(logits)[:, :, :-1] + scores, labels = scores.max(dim=-1) + if scores.shape[1] > self.num_top_queries: + scores, index = torch.topk(scores, self.num_top_queries, dim=-1) + labels = torch.gather(labels, dim=1, index=index) + boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) + + if self.deploy_mode: + return labels, boxes, scores + + if self.remap_mscoco_category: + labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\ + .to(boxes.device).reshape(labels.shape) + + results = [] + for lab, box, sco in zip(labels, boxes, scores): + result = dict(labels=lab, boxes=box, scores=sco) + results.append(result) + + return results + + def deploy(self, ): + self.eval() + self.deploy_mode = True + return self +def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR): + """Resizes an image while maintaining aspect ratio and pads it.""" + original_width, original_height = image.size + ratio = min(size / original_width, size / original_height) + new_width = int(original_width * ratio) + new_height = int(original_height * ratio) + image = image.resize((new_width, new_height), interpolation) + + # Create a new image with the desired size and paste the resized image onto it + new_image = Image.new("RGB", (size, size)) + new_image.paste(image, ((size - new_width) // 2, (size - new_height) // 2)) + return new_image, ratio, (size - new_width) // 2, (size - new_height) // 2 + + +def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.25): + result_images = [] + detections=[] + for i, im in enumerate(images): + draw = ImageDraw.Draw(im) + scr = scores[i] + lab = labels[i][scr > thrh] + box = boxes[i][scr > thrh] + scr = scr[scr > thrh] + + ratio = ratios[i] + pad_w, pad_h = paddings[i] + + for lbl, bb in zip(lab, box): + # Adjust bounding boxes according to the resizing and padding + bb = [ + (bb[0] - pad_w) / ratio, + (bb[1] - pad_h) / ratio, + (bb[2] - pad_w) / ratio, + (bb[3] - pad_h) / ratio, + ] + draw.rectangle(bb, outline='red') + draw.text((bb[0], bb[1]), text=str(lbl), fill='blue') + detection=[int(bb[i]) for i in range(len(bb))] + detections.append(detection) + result_images.append(im) + return result_images, detections + + +def process_image(sess, im_pil, post_processor, size=640, model_size='s'): + + # Resize image while preserving aspect ratio + resized_im_pil, ratio, pad_w, pad_h = resize_with_aspect_ratio(im_pil, size) + orig_size = torch.tensor([[resized_im_pil.size[1], resized_im_pil.size[0]]]) + + transforms = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + if model_size not in ['atto', 'femto', 'pico', 'n'] + else T.Lambda(lambda x: x) + ]) + + im_data = transforms(resized_im_pil).unsqueeze(0) + + output = sess.run( + output_names=None, + input_feed={'images': im_data.numpy()} + ) + + output = {"pred_logits": torch.from_numpy(output[0]), "pred_boxes": torch.from_numpy(output[1])} + output=post_processor(output,orig_size) + labels, boxes, scores = output + + result_images, detections = draw( + [im_pil], labels, boxes, scores, + [ratio], [(pad_w, pad_h)] + ) + + return detections, result_images + + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩缓解检测截断,视检测情况而定 + # x1-=15 + # y1-=15 + # x2+=15 + # y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + +if __name__ == '__main__': + + #load the ONNX model + sess = axe.InferenceSession('deimv2_hgnetv2_femto_coco_npu3.axmodel') + size = sess.get_inputs()[0].shape[2] + + #QRCode decoder + decoder = QRCodeDecoder() + img_path = './images' + det_path='./DEIMv2_det_res' + crop_path='./DEIMv2_crop_res' + + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + #get post info from your trained model config + post_processor = PostProcessor(use_focal_loss=True, num_classes=1, num_top_queries=100) + post_processor.deploy() + # print('post_processor:',post_processor) + imgs = glob.glob(f"{img_path}/*.jpg") + totoal = len(imgs) + success = 0 + fail = 0 + start_time = time.time() + for idx,img in enumerate(imgs): + pic_name=os.path.basename(img).split('.')[0] + loop_start_time = time.time() + #detect image + im_pil = Image.open(img).convert('RGB') + img_cv2 = np.array(im_pil) + img_cv2 = cv2.cvtColor(img_cv2, cv2.COLOR_RGB2BGR) + det_result, res_img = process_image(sess, im_pil, post_processor, size, 'femto') + # res_img[0].save(os.path.join(det_path, pic_name+'.jpg')) + # print('det result:',det_result) + # Crop deteted QRCode & decode QRCode by pyzbar + cropped_images = decoder.crop_qr_regions(img_cv2, det_result) + # for i,cropped in enumerate(cropped_images): + # cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + print("识别成功!") + else: + fail += 1 + print("识别失败!") + loop_end_time = time.time() + print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") diff --git a/python/QRCode_axmodel_infer_v5.py b/python/QRCode_axmodel_infer_v5.py new file mode 100644 index 0000000000000000000000000000000000000000..655101aa96a257763d44c88d94c529fcc7492d5f --- /dev/null +++ b/python/QRCode_axmodel_infer_v5.py @@ -0,0 +1,458 @@ +import numpy as np +import cv2 +import os +import torch +import time +import torchvision +import matplotlib +import pyzbar.pyzbar as pyzbar +import axengine as axe + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + self.palette = [self.hex2rgb(c) for c in matplotlib.colors.TABLEAU_COLORS.values()] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + +colors = Colors() + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0, # number of masks +): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + device = prediction.device + mps = 'mps' in device.type # Apple MPS + if mps: # MPS not fully supported yet, convert tensors to CPU before NMS + prediction = prediction.cpu() + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - nm - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + mi = 5 + nc # mask start index + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, mi:] # zero columns if no masks + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = x[:, 5:mi].max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if mps: + output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, kpt_label=False, step=2): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0] + pad = ratio_pad[1] + if isinstance(gain, (list, tuple)): + gain = gain[0] + if not kpt_label: + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, [0, 2]] /= gain + coords[:, [1, 3]] /= gain + clip_coords(coords[0:4], img0_shape) + #coords[:, 0:4] = coords[:, 0:4].round() + else: + coords[:, 0::step] -= pad[0] # x padding + coords[:, 1::step] -= pad[1] # y padding + coords[:, 0::step] /= gain + coords[:, 1::step] /= gain + clip_coords(coords, img0_shape, step=step) + #coords = coords.round() + return coords + + +def clip_coords(boxes, img_shape, step=2): + # Clip bounding xyxy bounding boxes to image shape (height, width) + boxes[:, 0::step].clamp_(0, img_shape[1]) # x1 + boxes[:, 1::step].clamp_(0, img_shape[0]) # y1 + + + +def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None): + # Plots one bounding box on image 'im' using OpenCV + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 # line/font thickness + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA) + if label: + if len(label.split(' ')) > 1: + # label = label.split(' ')[-1] + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) + cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA) + + +def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + + +class Yolov5QRcodeDetector: + def __init__(self, model_path): + # self.model = onnxruntime.InferenceSession(model_path) + self.model = axe.InferenceSession(model_path) + self.input_name = self.model.get_inputs()[0].name + self.output_name = self.model.get_outputs()[0].name + self.classes=['QRCode'] + self.nc=len(self.classes) + self.no = self.nc + 5 + self.na =3 + self.nl =3 + self.anchors=torch.tensor([[10,13, 16,30, 33,23],[30,61, 62,45, 59,119],[116,90, 156,198, 373,326]]) + self.anchors=self.anchors.view(3,3,2) + self.stride=torch.tensor([8,16,32]) + self.anchors = self.anchors/(self.stride.view(-1, 1, 1)) + + def preprocess_image(self, img, img_size=(640, 640)): + img, _, _ = letterbox(img, img_size, auto=False, stride=32) + img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1)) + # img = np.asarray(img, dtype=np.float32) + img = np.asarray(img, dtype=np.uint8) + img = np.expand_dims(img, 0) + # img /= 255.0 + return img + + def model_inference(self, input=None): + output = self.model.run(None, {self.input_name: input}) + return output + + def _make_grid(self, nx=20, ny=20, i=0): + na = 3 + shape = 1, na, ny, nx, 2 # grid shape + y, x = torch.arange(ny, dtype=torch.float32), torch.arange(nx, dtype=torch.float32) + # yv, xv = torch.meshgrid(y, x) # torch>=0.7 compatibility + yv, xv = torch.meshgrid(y, x, indexing='ij') + grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 + anchor_grid = (self.anchors[i] * self.stride[i]).view((1, na, 1, 1, 2)).expand(shape) + return grid, anchor_grid + def postprocess(self, preds, img_shape, im0): + z = [] # inference output + for i,pred in enumerate(preds): + pred=torch.from_numpy(pred) #numpy2tensor + pred=pred.permute(0,3,1,2) #NHWC to NCHW + bs, _, ny, nx = pred.shape + pred = pred.view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + grid, anchor_grid = self._make_grid(nx, ny, i) + + xy, wh, conf = sigmoid(pred).split((2, 2, self.nc + 1), 4) + xy = (xy * 2 + grid) * self.stride[i] # xy + wh = (wh * 2) ** 2 * anchor_grid # wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, self.na * nx * ny, self.no)) + + preds=torch.cat(z, 1) + detections = [] + preds = non_max_suppression(preds, 0.3, 0.45) + for i, det in enumerate(preds): # detections per image + + if len(det): + # Rescale boxes from img_size to im0 size + scale_coords(img_shape[2:], det[:, :4], im0.shape, kpt_label=False) + + # Print results + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class + + # Write results + for det_index, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])): + # print('det:',xyxy, conf, cls) + int_coords = [int(tensor.item()) for tensor in xyxy] + # print(int_coords) + detections.append(int_coords) + # c = int(cls) # integer class + # label = f'{self.classes[c]} {conf:.2f}' + # plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=2,steps=3, orig_shape=im0.shape[:2]) + + return detections, im0 + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩缓解检测截断,视检测情况而定 + x1-=15 + y1-=15 + x2+=15 + y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + + +if __name__ == '__main__': + import time + + model = './yolov5n_npu3.axmodel' + input_size = [640,640] + detector = Yolov5QRcodeDetector(model) + # Crop deteted QRCode & decode QRCode by pyzbar + decoder = QRCodeDecoder() + pic_path = './images/' + det_path='./v5_det_res' + crop_path='./v5_crop_res' + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + pics = os.listdir(pic_path) + totoal = len(pics) + success = 0 + fail = 0 + start_time = time.time() # 记录总开始时间 + for idx, pic in enumerate(pics): + loop_start_time = time.time() # 记录单张图片开始时间 + org_img = os.path.join(pic_path, pic) + pic_name=pic.split('.')[0] + im0 = cv2.imread(org_img) + + #do QRCode detection + img = detector.preprocess_image(im0, img_size=input_size) + infer_start_time = time.time() + preds = detector.model_inference(img) + infer_end_time = time.time() + print(f"infer time: {infer_end_time - infer_start_time:.4f}s") + det_result, res_img = detector.postprocess(preds, img.shape, im0) + # cv2.imwrite(os.path.join(det_path, pic), res_img) + + cropped_images = decoder.crop_qr_regions(im0, det_result) + for i,cropped in enumerate(cropped_images): + cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + # print("识别成功!") + else: + fail += 1 + # print("识别失败!") + loop_end_time = time.time() # 记录单张图片结束时间 + print(f"图片 {pic} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") diff --git a/python/QRCode_axmodel_infer_v8.py b/python/QRCode_axmodel_infer_v8.py new file mode 100644 index 0000000000000000000000000000000000000000..16445ffda529ba62a95c411c64fd1f0a39965c96 --- /dev/null +++ b/python/QRCode_axmodel_infer_v8.py @@ -0,0 +1,556 @@ +import cv2 +import numpy as np +import time +import yaml +import glob +import os +import pyzbar.pyzbar as pyzbar +import axengine as axe + +names=['QRCode'] + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + + shape = im.shape[:2] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: + r = min(r, 1.0) + + ratio = r, r + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] + if auto: + dw, dh = np.mod(dw, stride), np.mod(dh, stride) + elif scaleFill: + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] + + dw /= 2 + dh /= 2 + + if shape[::-1] != new_unpad: + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) + return im, ratio, (dw, dh) + +def data_process_cv2(frame, input_shape): + ''' + 对输入的图像进行预处理 + :param frame: + :param input_shape: + :return: + ''' + im0 = cv2.imread(frame) + img = letterbox(im0, input_shape, auto=False, stride=32)[0] + org_data = img.copy() + img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1)) + img = np.asarray(img, dtype=np.uint8) + img = np.expand_dims(img, 0) + # img /= 255.0 + return img, im0, org_data + +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0 # number of masks + ): + """ + Perform Non-Maximum Suppression (NMS) on the boxes to filter out overlapping boxes. + + Parameters: + prediction (ndarray): Predictions from the model. + conf_thres (float): Confidence threshold to filter boxes. + iou_thres (float): Intersection over Union (IoU) threshold for NMS. + classes (list): Filter boxes by classes. + agnostic (bool): If True, perform class-agnostic NMS. + multi_label (bool): If True, perform multi-label NMS. + labels (list): Labels for auto-labelling. + max_det (int): Maximum number of detections. + nm (int): Number of masks. + + Returns: + list: A list of filtered boxes. + """ + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - nm - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + # redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + # merge = False # use merge-NMS + + t = time.time() + mi = 5 + nc # mask start index + output = [np.zeros((0, 6 + nm))] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = np.zeros((len(lb), nc + nm + 5)) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[np.arange(len(lb)), lb[:, 0].astype(int) + 5] = 1.0 # cls + x = np.concatenate((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, mi:] # zero columns if no masks + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = np.nonzero(x[:, 5:mi] > conf_thres) + x = np.concatenate((box[i], x[i, 5 + j][:, None], j[:, None].astype(float), mask[i]), 1) + else: # best class only + # conf = x[:, 5:mi].max(1, keepdims=True) + # j = x[:, 5:mi].argmax(1,keepdims=True) + conf = np.max(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1) + j = np.argmax(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1) + x = np.concatenate((box, conf, j.astype(float), mask), 1)[conf[:, 0] > conf_thres] + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == np.array(classes)[:, None]).any(1)] + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + sorted_indices = np.argsort(x[:, 4])[::-1] + x = x[sorted_indices][:max_nms] # sort by confidence and remove excess boxes + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = nms(boxes, scores, iou_thres) # NMS + + i = i[:max_det] # limit detections + + output[xi] = x[i] + # if mps: + # output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + # LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + return output + + +# Define the function for NMS using numpy +def nms(boxes, scores, iou_threshold): + """ + Perform Non-Maximum Suppression (NMS) on the given boxes with scores using numpy. + + Parameters: + boxes (ndarray): The bounding boxes, shaped (N, 4). + scores (ndarray): The confidence scores for each box, shaped (N,). + iou_threshold (float): The IoU threshold for suppressing overlapping boxes. + + Returns: + ndarray: The indices of the selected boxes after NMS. + """ + if len(boxes) == 0: + return [] + + # Sort boxes by their scores + indices = np.argsort(scores)[::-1] + + selected_indices = [] + while len(indices) > 0: + # Select the box with the highest score + current_index = indices[0] + selected_indices.append(current_index) + + # Compute IoU between the current box and all other boxes + current_box = boxes[current_index] + other_boxes = boxes[indices[1:]] + iou = calculate_iou(current_box, other_boxes) + + # Remove boxes with IoU higher than the threshold + indices = indices[1:][iou <= iou_threshold] + + return np.array(selected_indices) + + +def calculate_iou(box, boxes): + """ + Calculate the Intersection over Union (IoU) between a given box and a set of boxes. + + Parameters: + box (ndarray): The coordinates of the first box, shaped (4,). + boxes (ndarray): The coordinates of the other boxes, shaped (N, 4). + + Returns: + ndarray: The IoU between the given box and each box in the set, shaped (N,). + """ + # Calculate intersection coordinates + x1 = np.maximum(box[0], boxes[:, 0]) + y1 = np.maximum(box[1], boxes[:, 1]) + x2 = np.minimum(box[2], boxes[:, 2]) + y2 = np.minimum(box[3], boxes[:, 3]) + + # Calculate intersection area + intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) + + # Calculate areas of both bounding boxes + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + # Calculate IoU + iou = intersection_area / (box_area + boxes_area - intersection_area) + + return iou + +# Define xywh2xyxy function for converting bounding box format +def xywh2xyxy(x): + """ + Convert bounding boxes from (center_x, center_y, width, height) to (x1, y1, x2, y2) format. + + Parameters: + x (ndarray): Bounding boxes in (center_x, center_y, width, height) format, shaped (N, 4). + + Returns: + ndarray: Bounding boxes in (x1, y1, x2, y2) format, shaped (N, 4). + """ + y = x.copy() + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + +def post_process_yolo(det, im, im0, gn, save_path, img_name): + detections = [] + if len(det): + det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im0.shape).round() + colors = Colors() + for *xyxy, conf, cls in reversed(det): + # print("class:",int(cls), "left:%.0f" % xyxy[0],"top:%.0f" % xyxy[1],"right:%.0f" % xyxy[2],"bottom:%.0f" % xyxy[3], "conf:",'{:.0f}%'.format(float(conf)*100)) + int_coords = [int(tensor.item()) for tensor in xyxy] + # print(int_coords) + detections.append(int_coords) + # c = int(cls) + # label = names[c] + # res_img = plot_one_box(xyxy, im0, label=f'{label}:{conf:.2f}', color=colors(c, True), line_thickness=4) + # cv2.imwrite(f'{save_path}/{img_name}.jpg',res_img) + # xywh = (xyxy2xywh(np.array(xyxy,dtype=np.float32).reshape(1, 4)) / gn).reshape(-1).tolist() # normalized xywh + # line = (cls, *xywh) # label format + # with open(f'{save_path}/{img_name}.txt', 'a') as f: + # f.write(('%g ' * len(line)).rstrip() % line + '\n') + return detections + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + if ratio_pad is None: + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + boxes[..., [0, 2]] -= pad[0] + boxes[..., [1, 3]] -= pad[1] + boxes[..., :4] /= gain + clip_boxes(boxes, img0_shape) + return boxes + +def clip_boxes(boxes, shape): + boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) + boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) + + +def yaml_load(file='coco128.yaml'): + with open(file, errors='ignore') as f: + return yaml.safe_load(f) + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + """ + Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB. + Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`. + """ + hexs = ( + "FF3838", + "FF9D97", + "FF701F", + "FFB21D", + "CFD231", + "48F90A", + "92CC17", + "3DDB86", + "1A9334", + "00D4BB", + "2C99A8", + "00C2FF", + "344593", + "6473FF", + "0018EC", + "8438FF", + "520085", + "CB38FF", + "FF95C8", + "FF37C7", + ) + self.palette = [self.hex2rgb(f"#{c}") for c in hexs] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + """Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index.""" + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): + """Converts hex color codes to RGB values (i.e. default PIL order).""" + return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4)) + +def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None): + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA) + if label: + if len(label.split(':')) > 1: + tf = max(tl - 1, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) + cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA) + return im + +def model_load(model): + providers = ['CPUExecutionProvider'] + # session = ort.InferenceSession(model, providers=providers) + session = axe.InferenceSession(model) + input_name = session.get_inputs()[0].name + output_names = [ x.name for x in session.get_outputs()] + return session, output_names + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype = feats[0].dtype + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = np.arange(w, dtype=dtype) + grid_cell_offset # shift x + sy = np.arange(h, dtype=dtype) + grid_cell_offset # shift y + sy, sx = np.meshgrid(sy, sx, indexing='ij') + anchor_points.append(np.stack((sx, sy), axis=-1).reshape(-1, 2)) + stride_tensor.append(np.full((h * w, 1), stride, dtype=dtype)) + return np.concatenate(anchor_points), np.concatenate(stride_tensor) + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = np.split(distance, 2, axis=dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return np.concatenate((c_xy, wh), axis=dim) # xywh bbox + return np.concatenate((x1y1, x2y2), axis=dim) # xyxy bbox + +class DFL: + """ + NumPy implementation of Distribution Focal Loss (DFL) integral module. + Original paper: Generalized Focal Loss (IEEE TPAMI 2023) + """ + + def __init__(self, c1=16): + """Initialize with given number of distribution channels""" + self.c1 = c1 + # 初始化权重矩阵(等效于原conv层的固定权重) + self.weights = np.arange(c1, dtype=np.float32).reshape(1, c1, 1, 1) + + + def __call__(self, x): + """ + 前向传播逻辑 + 参数: + x: 输入张量,形状为(batch, channels, anchors) + 返回: + 处理后的张量,形状为(batch, 4, anchors) + """ + b, c, a = x.shape + + # 等效于原view->transpose->softmax操作 + x_reshaped = x.reshape(b, 4, self.c1, a) + x_transposed = np.transpose(x_reshaped, (0, 2, 1, 3)) + x_softmax = np.exp(x_transposed) / np.sum(np.exp(x_transposed), axis=1, keepdims=True) + + # 等效卷积操作(通过张量乘积实现) + conv_result = np.sum(self.weights * x_softmax, axis=1) + + return conv_result.reshape(b, 4, a) + +class YOLOV8Detector: + def __init__(self, model_path, imgsz=[640,640]): + self.model_path = model_path + self.session, self.output_names = model_load(self.model_path) + self.imgsz = imgsz + self.stride = [8.,16.,32.] + self.reg_max = 16 + self.nc = 1 + self.no = self.nc + self.reg_max * 4 + self.dfl = DFL(self.reg_max) + + def detect_objects(self, image, save_path): + im, im0, org_data = data_process_cv2(image, self.imgsz) + img_name = os.path.basename(image).split('.')[0] + infer_start_time = time.time() + x = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + infer_end_time = time.time() + print(f"infer time: {infer_end_time - infer_start_time:.4f}s") + x = [np.transpose(x[i],(0,3,1,2)) for i in range(3)] #to nchw + + anchors,strides = (np.transpose(x,(1, 0)) for x in make_anchors(x, self.stride, 0.5)) + x_cat = np.concatenate([xi.reshape(1, self.no, -1) for xi in x], axis=2) + box = x_cat[:, :self.reg_max * 4,:] + cls = x_cat[:, self.reg_max * 4:,:] + dbox = dist2bbox(self.dfl(box), np.expand_dims(anchors, axis=0), xywh=True, dim=1) * strides + y = np.concatenate((dbox, 1/(1 + np.exp(-cls))), axis=1) + pred = y.transpose([0, 2, 1]) + pred_class = pred[..., 4:] + pred_conf = np.max(pred_class, axis=-1) + pred = np.insert(pred, 4, pred_conf, axis=-1) + + pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000) + gn = np.array(org_data.shape)[[1, 0, 1, 0]].astype(np.float32) + res = post_process_yolo(pred[0], org_data, im0, gn, save_path, img_name) + return res, im0 + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩15个像素缓解因检测截断造成无法识别的情况,视检测情况而定 + x1-=15 + y1-=15 + x2+=15 + y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + +if __name__ == '__main__': + import time + + detector = YOLOV8Detector(model_path='./yolov8n_npu3.axmodel',imgsz=[640,640]) + decoder = QRCodeDecoder() + img_path = './images' + det_path='./v8_det_res' + crop_path='./v8_crop_res' + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + imgs = glob.glob(f"{img_path}/*.jpg") + totoal = len(imgs) + success = 0 + fail = 0 + start_time = time.time() + for idx,img in enumerate(imgs): + pic_name=os.path.basename(img).split('.')[0] + loop_start_time = time.time() + det_result, res_img = detector.detect_objects(img,det_path) + # cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img) + + # Crop deteted QRCode & decode QRCode by pyzbar + cropped_images = decoder.crop_qr_regions(res_img, det_result) + # for i,cropped in enumerate(cropped_images): + # cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + print("识别成功!") + else: + fail += 1 + print("识别失败!") + loop_end_time = time.time() + print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") \ No newline at end of file diff --git a/python/QRCode_onnx_infer_DEIMv2.py b/python/QRCode_onnx_infer_DEIMv2.py new file mode 100644 index 0000000000000000000000000000000000000000..87623c883d1f28a2f6acda23be3e02b8ab1f0dc3 --- /dev/null +++ b/python/QRCode_onnx_infer_DEIMv2.py @@ -0,0 +1,370 @@ +""" +DEIMv2: Real-Time Object Detection Meets DINOv3 +Copyright (c) 2025 The DEIMv2 Authors. All Rights Reserved. +--------------------------------------------------------------------------------- +Modified from D-FINE (https://github.com/Peterande/D-FINE) +Copyright (c) 2024 The D-FINE Authors. All Rights Reserved. +""" +import os +import sys +import cv2 +import time +import numpy as np +import onnxruntime as ort +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.transforms as T +from PIL import Image, ImageDraw +import pyzbar.pyzbar as pyzbar +import torchvision +import glob + +def mod(a, b): + out = a - a // b * b + return out + +mscoco_category2name = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} + +mscoco_category2label = {k: i for i, k in enumerate(mscoco_category2name.keys())} +mscoco_label2category = {v: k for k, v in mscoco_category2label.items()} + +class PostProcessor(nn.Module): + __share__ = [ + 'num_classes', + 'use_focal_loss', + 'num_top_queries', + 'remap_mscoco_category' + ] + + def __init__( + self, + num_classes=80, + use_focal_loss=True, + num_top_queries=300, + remap_mscoco_category=False + ) -> None: + super().__init__() + self.use_focal_loss = use_focal_loss + self.num_top_queries = num_top_queries + self.num_classes = int(num_classes) + self.remap_mscoco_category = remap_mscoco_category + self.deploy_mode = False + + def extra_repr(self) -> str: + return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' + + # def forward(self, outputs, orig_target_sizes): + def forward(self, outputs, orig_target_sizes: torch.Tensor): + logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] + # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) + + bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') + bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1) + + if self.use_focal_loss: + scores = F.sigmoid(logits) + + scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1) + # labels = index % self.num_classes + labels = mod(index, self.num_classes) + index = index // self.num_classes + boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])) + + else: + scores = F.softmax(logits)[:, :, :-1] + scores, labels = scores.max(dim=-1) + if scores.shape[1] > self.num_top_queries: + scores, index = torch.topk(scores, self.num_top_queries, dim=-1) + labels = torch.gather(labels, dim=1, index=index) + boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) + + if self.deploy_mode: + return labels, boxes, scores + + if self.remap_mscoco_category: + labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\ + .to(boxes.device).reshape(labels.shape) + + results = [] + for lab, box, sco in zip(labels, boxes, scores): + result = dict(labels=lab, boxes=box, scores=sco) + results.append(result) + + return results + + def deploy(self, ): + self.eval() + self.deploy_mode = True + return self +def resize_with_aspect_ratio(image, size, interpolation=Image.BILINEAR): + """Resizes an image while maintaining aspect ratio and pads it.""" + original_width, original_height = image.size + ratio = min(size / original_width, size / original_height) + new_width = int(original_width * ratio) + new_height = int(original_height * ratio) + image = image.resize((new_width, new_height), interpolation) + + # Create a new image with the desired size and paste the resized image onto it + new_image = Image.new("RGB", (size, size)) + new_image.paste(image, ((size - new_width) // 2, (size - new_height) // 2)) + return new_image, ratio, (size - new_width) // 2, (size - new_height) // 2 + + +def draw(images, labels, boxes, scores, ratios, paddings, thrh=0.25): + result_images = [] + detections=[] + for i, im in enumerate(images): + draw = ImageDraw.Draw(im) + scr = scores[i] + lab = labels[i][scr > thrh] + box = boxes[i][scr > thrh] + scr = scr[scr > thrh] + + ratio = ratios[i] + pad_w, pad_h = paddings[i] + + for lbl, bb in zip(lab, box): + # Adjust bounding boxes according to the resizing and padding + bb = [ + (bb[0] - pad_w) / ratio, + (bb[1] - pad_h) / ratio, + (bb[2] - pad_w) / ratio, + (bb[3] - pad_h) / ratio, + ] + draw.rectangle(bb, outline='red') + draw.text((bb[0], bb[1]), text=str(lbl), fill='blue') + detection=[int(bb[i]) for i in range(len(bb))] + detections.append(detection) + result_images.append(im) + return result_images, detections + + +def process_image(sess, im_pil, post_processor, size=640, model_size='s'): + + # Resize image while preserving aspect ratio + resized_im_pil, ratio, pad_w, pad_h = resize_with_aspect_ratio(im_pil, size) + orig_size = torch.tensor([[resized_im_pil.size[1], resized_im_pil.size[0]]]) + + transforms = T.Compose([ + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + if model_size not in ['atto', 'femto', 'pico', 'n'] + else T.Lambda(lambda x: x) + ]) + im_data = transforms(resized_im_pil).unsqueeze(0) + + output = sess.run( + output_names=None, + input_feed={'images': im_data.numpy()} + ) + + output = {"pred_logits": torch.from_numpy(output[0]), "pred_boxes": torch.from_numpy(output[1])} + output=post_processor(output,orig_size) + labels, boxes, scores = output + + result_images, detections = draw( + [im_pil], labels, boxes, scores, + [ratio], [(pad_w, pad_h)] + ) + + return detections, result_images + + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩缓解检测截断,视检测情况而定 + x1-=15 + y1-=15 + x2+=15 + y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + +if __name__ == '__main__': + + #load the ONNX model + sess = ort.InferenceSession('deimv2_hgnetv2_femto_coco.onnx') + size = sess.get_inputs()[0].shape[2] + print(f"Using device: {ort.get_device()}") + + #QRCode decoder + decoder = QRCodeDecoder() + img_path = './images' + det_path='./DEIMv2_det_res' + crop_path='./DEIMv2_crop_res' + + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + #get post info from your trained model config + post_processor = PostProcessor(use_focal_loss=True, num_classes=1, num_top_queries=100) + post_processor.deploy() + # print('post_processor:',post_processor) + imgs = glob.glob(f"{img_path}/*.jpg") + totoal = len(imgs) + success = 0 + fail = 0 + start_time = time.time() + for idx,img in enumerate(imgs): + pic_name=os.path.basename(img).split('.')[0] + loop_start_time = time.time() + #detect image + im_pil = Image.open(img).convert('RGB') + img_cv2 = np.array(im_pil) + img_cv2 = cv2.cvtColor(img_cv2, cv2.COLOR_RGB2BGR) + det_result, res_img = process_image(sess, im_pil, post_processor, size, 'femto') + res_img[0].save(os.path.join(det_path, pic_name+'.jpg')) + + # Crop deteted QRCode & decode QRCode by pyzbar + cropped_images = decoder.crop_qr_regions(img_cv2, det_result) + for i,cropped in enumerate(cropped_images): + cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + print("识别成功!") + else: + fail += 1 + print("识别失败!") + loop_end_time = time.time() + print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") diff --git a/python/QRCode_onnx_infer_v5.py b/python/QRCode_onnx_infer_v5.py new file mode 100644 index 0000000000000000000000000000000000000000..51e802ff5d4e8fe87331e44341f7668c1e9bc313 --- /dev/null +++ b/python/QRCode_onnx_infer_v5.py @@ -0,0 +1,456 @@ +import numpy as np +import cv2 +import os +import onnxruntime +import torch +import time +import torchvision +import matplotlib +import pyzbar.pyzbar as pyzbar + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + self.palette = [self.hex2rgb(c) for c in matplotlib.colors.TABLEAU_COLORS.values()] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + +colors = Colors() + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + +def box_iou(box1, box2): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + def box_area(box): + # box = 4xn + return (box[2] - box[0]) * (box[3] - box[1]) + + area1 = box_area(box1.T) + area2 = box_area(box2.T) + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0, # number of masks +): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + device = prediction.device + mps = 'mps' in device.type # Apple MPS + if mps: # MPS not fully supported yet, convert tensors to CPU before NMS + prediction = prediction.cpu() + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - nm - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + mi = 5 + nc # mask start index + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, mi:] # zero columns if no masks + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = x[:, 5:mi].max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if mps: + output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + +def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, kpt_label=False, step=2): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0] + pad = ratio_pad[1] + if isinstance(gain, (list, tuple)): + gain = gain[0] + if not kpt_label: + coords[:, [0, 2]] -= pad[0] # x padding + coords[:, [1, 3]] -= pad[1] # y padding + coords[:, [0, 2]] /= gain + coords[:, [1, 3]] /= gain + clip_coords(coords[0:4], img0_shape) + #coords[:, 0:4] = coords[:, 0:4].round() + else: + coords[:, 0::step] -= pad[0] # x padding + coords[:, 1::step] -= pad[1] # y padding + coords[:, 0::step] /= gain + coords[:, 1::step] /= gain + clip_coords(coords, img0_shape, step=step) + #coords = coords.round() + return coords + + +def clip_coords(boxes, img_shape, step=2): + # Clip bounding xyxy bounding boxes to image shape (height, width) + boxes[:, 0::step].clamp_(0, img_shape[1]) # x1 + boxes[:, 1::step].clamp_(0, img_shape[0]) # y1 + + + +def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None): + # Plots one bounding box on image 'im' using OpenCV + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 # line/font thickness + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA) + if label: + if len(label.split(' ')) > 1: + # label = label.split(' ')[-1] + tf = max(tl - 1, 1) # font thickness + t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) + cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA) + + +def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = img.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better test mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return img, ratio, (dw, dh) + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + + +class Yolov5QRcodeDetector: + def __init__(self, model_path): + self.model = onnxruntime.InferenceSession(model_path) + self.input_name = self.model.get_inputs()[0].name + self.output_name = self.model.get_outputs()[0].name + self.classes=['QRCode'] + self.nc=len(self.classes) + self.no = self.nc + 5 + self.na =3 + self.nl =3 + self.anchors=torch.tensor([[10,13, 16,30, 33,23],[30,61, 62,45, 59,119],[116,90, 156,198, 373,326]]) + self.anchors=self.anchors.view(3,3,2) + self.stride=torch.tensor([8,16,32]) + self.anchors = self.anchors/(self.stride.view(-1, 1, 1)) + + def preprocess_image(self, img, img_size=(640, 640)): + img, _, _ = letterbox(img, img_size, auto=False, stride=32) + img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1)) + img = np.asarray(img, dtype=np.float32) + img = np.expand_dims(img, 0) + img /= 255.0 + return img + + def model_inference(self, input=None): + output = self.model.run([], {self.input_name: input}) + return output + + def _make_grid(self, nx=20, ny=20, i=0): + na = 3 + shape = 1, na, ny, nx, 2 # grid shape + y, x = torch.arange(ny, dtype=torch.float32), torch.arange(nx, dtype=torch.float32) + # yv, xv = torch.meshgrid(y, x) # torch>=0.7 compatibility + yv, xv = torch.meshgrid(y, x, indexing='ij') + grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 + anchor_grid = (self.anchors[i] * self.stride[i]).view((1, na, 1, 1, 2)).expand(shape) + return grid, anchor_grid + def postprocess(self, preds, img_shape, im0): + z = [] # inference output + for i,pred in enumerate(preds): + pred=torch.from_numpy(pred) #numpy2tensor + pred=pred.permute(0,3,1,2) #NHWC to NCHW + bs, _, ny, nx = pred.shape + pred = pred.view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + grid, anchor_grid = self._make_grid(nx, ny, i) + + xy, wh, conf = sigmoid(pred).split((2, 2, self.nc + 1), 4) + xy = (xy * 2 + grid) * self.stride[i] # xy + wh = (wh * 2) ** 2 * anchor_grid # wh + y = torch.cat((xy, wh, conf), 4) + z.append(y.view(bs, self.na * nx * ny, self.no)) + + preds=torch.cat(z, 1) + detections = [] + preds = non_max_suppression(preds, 0.3, 0.45) + for i, det in enumerate(preds): # detections per image + + if len(det): + # Rescale boxes from img_size to im0 size + scale_coords(img_shape[2:], det[:, :4], im0.shape, kpt_label=False) + + # Print results + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class + + # Write results + for det_index, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])): + # print('det:',xyxy, conf, cls) + int_coords = [int(tensor.item()) for tensor in xyxy] + # print(int_coords) + detections.append(int_coords) + # c = int(cls) # integer class + # label = f'{self.classes[c]} {conf:.2f}' + # plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=2,steps=3, orig_shape=im0.shape[:2]) + + return detections, im0 + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩缓解检测截断,视检测情况而定 + x1-=15 + y1-=15 + x2+=15 + y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + + +if __name__ == '__main__': + import time + + model = './yolov5n.onnx' + input_size = [640,640] + detector = Yolov5QRcodeDetector(model) + # Crop deteted QRCode & decode QRCode by pyzbar + decoder = QRCodeDecoder() + pic_path = './images/' + det_path='./v5_det_res' + crop_path='./v5_crop_res' + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + pics = os.listdir(pic_path) + totoal = len(pics) + success = 0 + fail = 0 + start_time = time.time() # 记录总开始时间 + for idx, pic in enumerate(pics): + loop_start_time = time.time() # 记录单张图片开始时间 + org_img = os.path.join(pic_path, pic) + pic_name=pic.split('.')[0] + im0 = cv2.imread(org_img) + + #do QRCode detection + img = detector.preprocess_image(im0, img_size=input_size) + infer_start_time = time.time() + preds = detector.model_inference(img) + infer_end_time = time.time() + print(f"infer time: {infer_end_time - infer_start_time:.4f}s") + det_result, res_img = detector.postprocess(preds, img.shape, im0) + # cv2.imwrite(os.path.join(det_path, pic), res_img) + + cropped_images = decoder.crop_qr_regions(im0, det_result) + for i,cropped in enumerate(cropped_images): + cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + # print("识别成功!") + else: + fail += 1 + # print("识别失败!") + loop_end_time = time.time() # 记录单张图片结束时间 + print(f"图片 {pic} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") diff --git a/python/QRCode_onnx_infer_v8.py b/python/QRCode_onnx_infer_v8.py new file mode 100644 index 0000000000000000000000000000000000000000..1e0334fc31da4cb455e5b6c962d83794f5ff2f5f --- /dev/null +++ b/python/QRCode_onnx_infer_v8.py @@ -0,0 +1,555 @@ +import onnxruntime as ort +import cv2 +import numpy as np +import time +import yaml +import glob +import os +import pyzbar.pyzbar as pyzbar + +names=['QRCode'] + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + + shape = im.shape[:2] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: + r = min(r, 1.0) + + ratio = r, r + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] + if auto: + dw, dh = np.mod(dw, stride), np.mod(dh, stride) + elif scaleFill: + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] + + dw /= 2 + dh /= 2 + + if shape[::-1] != new_unpad: + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) + return im, ratio, (dw, dh) + +def data_process_cv2(frame, input_shape): + ''' + 对输入的图像进行预处理 + :param frame: + :param input_shape: + :return: + ''' + im0 = cv2.imread(frame) + img = letterbox(im0, input_shape, auto=False, stride=32)[0] + org_data = img.copy() + img = np.ascontiguousarray(img[:, :, ::-1].transpose(2, 0, 1)) + img = np.asarray(img, dtype=np.float32) + img = np.expand_dims(img, 0) + img /= 255.0 + return img, im0, org_data + +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0 # number of masks + ): + """ + Perform Non-Maximum Suppression (NMS) on the boxes to filter out overlapping boxes. + + Parameters: + prediction (ndarray): Predictions from the model. + conf_thres (float): Confidence threshold to filter boxes. + iou_thres (float): Intersection over Union (IoU) threshold for NMS. + classes (list): Filter boxes by classes. + agnostic (bool): If True, perform class-agnostic NMS. + multi_label (bool): If True, perform multi-label NMS. + labels (list): Labels for auto-labelling. + max_det (int): Maximum number of detections. + nm (int): Number of masks. + + Returns: + list: A list of filtered boxes. + """ + bs = prediction.shape[0] # batch size + nc = prediction.shape[2] - nm - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 0.5 + 0.05 * bs # seconds to quit after + # redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + # merge = False # use merge-NMS + + t = time.time() + mi = 5 + nc # mask start index + output = [np.zeros((0, 6 + nm))] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = np.zeros((len(lb), nc + nm + 5)) + v[:, :4] = lb[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[np.arange(len(lb)), lb[:, 0].astype(int) + 5] = 1.0 # cls + x = np.concatenate((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box/Mask + box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2) + mask = x[:, mi:] # zero columns if no masks + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = np.nonzero(x[:, 5:mi] > conf_thres) + x = np.concatenate((box[i], x[i, 5 + j][:, None], j[:, None].astype(float), mask[i]), 1) + else: # best class only + # conf = x[:, 5:mi].max(1, keepdims=True) + # j = x[:, 5:mi].argmax(1,keepdims=True) + conf = np.max(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1) + j = np.argmax(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1) + x = np.concatenate((box, conf, j.astype(float), mask), 1)[conf[:, 0] > conf_thres] + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == np.array(classes)[:, None]).any(1)] + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + sorted_indices = np.argsort(x[:, 4])[::-1] + x = x[sorted_indices][:max_nms] # sort by confidence and remove excess boxes + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = nms(boxes, scores, iou_thres) # NMS + + i = i[:max_det] # limit detections + + output[xi] = x[i] + # if mps: + # output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + # LOGGER.warning(f'WARNING NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + return output + + +# Define the function for NMS using numpy +def nms(boxes, scores, iou_threshold): + """ + Perform Non-Maximum Suppression (NMS) on the given boxes with scores using numpy. + + Parameters: + boxes (ndarray): The bounding boxes, shaped (N, 4). + scores (ndarray): The confidence scores for each box, shaped (N,). + iou_threshold (float): The IoU threshold for suppressing overlapping boxes. + + Returns: + ndarray: The indices of the selected boxes after NMS. + """ + if len(boxes) == 0: + return [] + + # Sort boxes by their scores + indices = np.argsort(scores)[::-1] + + selected_indices = [] + while len(indices) > 0: + # Select the box with the highest score + current_index = indices[0] + selected_indices.append(current_index) + + # Compute IoU between the current box and all other boxes + current_box = boxes[current_index] + other_boxes = boxes[indices[1:]] + iou = calculate_iou(current_box, other_boxes) + + # Remove boxes with IoU higher than the threshold + indices = indices[1:][iou <= iou_threshold] + + return np.array(selected_indices) + + +def calculate_iou(box, boxes): + """ + Calculate the Intersection over Union (IoU) between a given box and a set of boxes. + + Parameters: + box (ndarray): The coordinates of the first box, shaped (4,). + boxes (ndarray): The coordinates of the other boxes, shaped (N, 4). + + Returns: + ndarray: The IoU between the given box and each box in the set, shaped (N,). + """ + # Calculate intersection coordinates + x1 = np.maximum(box[0], boxes[:, 0]) + y1 = np.maximum(box[1], boxes[:, 1]) + x2 = np.minimum(box[2], boxes[:, 2]) + y2 = np.minimum(box[3], boxes[:, 3]) + + # Calculate intersection area + intersection_area = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) + + # Calculate areas of both bounding boxes + box_area = (box[2] - box[0]) * (box[3] - box[1]) + boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + # Calculate IoU + iou = intersection_area / (box_area + boxes_area - intersection_area) + + return iou + +# Define xywh2xyxy function for converting bounding box format +def xywh2xyxy(x): + """ + Convert bounding boxes from (center_x, center_y, width, height) to (x1, y1, x2, y2) format. + + Parameters: + x (ndarray): Bounding boxes in (center_x, center_y, width, height) format, shaped (N, 4). + + Returns: + ndarray: Bounding boxes in (x1, y1, x2, y2) format, shaped (N, 4). + """ + y = x.copy() + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + +def post_process_yolo(det, im, im0, gn, save_path, img_name): + detections = [] + if len(det): + det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im0.shape).round() + colors = Colors() + for *xyxy, conf, cls in reversed(det): + # print("class:",int(cls), "left:%.0f" % xyxy[0],"top:%.0f" % xyxy[1],"right:%.0f" % xyxy[2],"bottom:%.0f" % xyxy[3], "conf:",'{:.0f}%'.format(float(conf)*100)) + int_coords = [int(tensor.item()) for tensor in xyxy] + # print(int_coords) + detections.append(int_coords) + # c = int(cls) + # label = names[c] + # res_img = plot_one_box(xyxy, im0, label=f'{label}:{conf:.2f}', color=colors(c, True), line_thickness=4) + # cv2.imwrite(f'{save_path}/{img_name}.jpg',res_img) + # xywh = (xyxy2xywh(np.array(xyxy,dtype=np.float32).reshape(1, 4)) / gn).reshape(-1).tolist() # normalized xywh + # line = (cls, *xywh) # label format + # with open(f'{save_path}/{img_name}.txt', 'a') as f: + # f.write(('%g ' * len(line)).rstrip() % line + '\n') + return detections + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + if ratio_pad is None: + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + boxes[..., [0, 2]] -= pad[0] + boxes[..., [1, 3]] -= pad[1] + boxes[..., :4] /= gain + clip_boxes(boxes, img0_shape) + return boxes + +def clip_boxes(boxes, shape): + boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) + boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) + + +def yaml_load(file='coco128.yaml'): + with open(file, errors='ignore') as f: + return yaml.safe_load(f) + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + """ + Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB. + Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`. + """ + hexs = ( + "FF3838", + "FF9D97", + "FF701F", + "FFB21D", + "CFD231", + "48F90A", + "92CC17", + "3DDB86", + "1A9334", + "00D4BB", + "2C99A8", + "00C2FF", + "344593", + "6473FF", + "0018EC", + "8438FF", + "520085", + "CB38FF", + "FF95C8", + "FF37C7", + ) + self.palette = [self.hex2rgb(f"#{c}") for c in hexs] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + """Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index.""" + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): + """Converts hex color codes to RGB values (i.e. default PIL order).""" + return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4)) + +def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None): + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1 + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA) + if label: + if len(label.split(':')) > 1: + tf = max(tl - 1, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) + cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA) + return im + +def model_load(model): + providers = ['CPUExecutionProvider'] + session = ort.InferenceSession(model, providers=providers) + input_name = session.get_inputs()[0].name + output_names = [ x.name for x in session.get_outputs()] + return session, output_names + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype = feats[0].dtype + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = np.arange(w, dtype=dtype) + grid_cell_offset # shift x + sy = np.arange(h, dtype=dtype) + grid_cell_offset # shift y + sy, sx = np.meshgrid(sy, sx, indexing='ij') + anchor_points.append(np.stack((sx, sy), axis=-1).reshape(-1, 2)) + stride_tensor.append(np.full((h * w, 1), stride, dtype=dtype)) + return np.concatenate(anchor_points), np.concatenate(stride_tensor) + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = np.split(distance, 2, axis=dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return np.concatenate((c_xy, wh), axis=dim) # xywh bbox + return np.concatenate((x1y1, x2y2), axis=dim) # xyxy bbox + +class DFL: + """ + NumPy implementation of Distribution Focal Loss (DFL) integral module. + Original paper: Generalized Focal Loss (IEEE TPAMI 2023) + """ + + def __init__(self, c1=16): + """Initialize with given number of distribution channels""" + self.c1 = c1 + # 初始化权重矩阵(等效于原conv层的固定权重) + self.weights = np.arange(c1, dtype=np.float32).reshape(1, c1, 1, 1) + + + def __call__(self, x): + """ + 前向传播逻辑 + 参数: + x: 输入张量,形状为(batch, channels, anchors) + 返回: + 处理后的张量,形状为(batch, 4, anchors) + """ + b, c, a = x.shape + + # 等效于原view->transpose->softmax操作 + x_reshaped = x.reshape(b, 4, self.c1, a) + x_transposed = np.transpose(x_reshaped, (0, 2, 1, 3)) + x_softmax = np.exp(x_transposed) / np.sum(np.exp(x_transposed), axis=1, keepdims=True) + + # 等效卷积操作(通过张量乘积实现) + conv_result = np.sum(self.weights * x_softmax, axis=1) + + return conv_result.reshape(b, 4, a) + +class YOLOV8Detector: + def __init__(self, model_path, imgsz=[640,640]): + self.model_path = model_path + self.session, self.output_names = model_load(self.model_path) + self.imgsz = imgsz + self.stride = [8.,16.,32.] + self.reg_max = 16 + self.nc = 1 + self.no = self.nc + self.reg_max * 4 + self.dfl = DFL(self.reg_max) + + def detect_objects(self, image, save_path): + im, im0, org_data = data_process_cv2(image, self.imgsz) + img_name = os.path.basename(image).split('.')[0] + infer_start_time = time.time() + x = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + infer_end_time = time.time() + print(f"infer time: {infer_end_time - infer_start_time:.4f}s") + x = [np.transpose(x[i],(0,3,1,2)) for i in range(3)] #to nchw + + anchors,strides = (np.transpose(x,(1, 0)) for x in make_anchors(x, self.stride, 0.5)) + x_cat = np.concatenate([xi.reshape(1, self.no, -1) for xi in x], axis=2) + box = x_cat[:, :self.reg_max * 4,:] + cls = x_cat[:, self.reg_max * 4:,:] + dbox = dist2bbox(self.dfl(box), np.expand_dims(anchors, axis=0), xywh=True, dim=1) * strides + y = np.concatenate((dbox, 1/(1 + np.exp(-cls))), axis=1) + pred = y.transpose([0, 2, 1]) + pred_class = pred[..., 4:] + pred_conf = np.max(pred_class, axis=-1) + pred = np.insert(pred, 4, pred_conf, axis=-1) + + pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45, max_det=1000) + gn = np.array(org_data.shape)[[1, 0, 1, 0]].astype(np.float32) + res = post_process_yolo(pred[0], org_data, im0, gn, save_path, img_name) + return res, im0 + +class QRCodeDecoder: + def crop_qr_regions(self, image, regions): + """ + 根据检测到的边界框裁剪二维码区域 + """ + cropped_images = [] + for idx, region in enumerate(regions): + x1, y1, x2, y2 = region + # 外扩15个像素缓解因检测截断造成无法识别的情况,视检测情况而定 + x1-=15 + y1-=15 + x2+=15 + y2+=15 + # 裁剪图像 + cropped = image[y1:y2, x1:x2] + if cropped.size > 0: + cropped_images.append({ + 'image': cropped, + 'bbox': region, + }) + # cv2.imwrite(f'cropped_qr_{idx}.jpg', cropped) + return cropped_images + + def decode_qrcode_pyzbar(self, cropped_image): + """ + 使用pyzbar解码二维码 + """ + try: + # 转换为灰度图像 + if len(cropped_image.shape) == 3: + gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) + else: + gray = cropped_image + # cv2.imwrite('cropped_gray.jpg',gray) + # 使用pyzbar解码 + decoded_objects = pyzbar.decode(gray) + results = [] + for obj in decoded_objects: + try: + data = obj.data.decode('utf-8') + results.append({ + 'data': data, + 'type': obj.type, + 'points': obj.polygon + }) + except: + continue + + return results + except Exception as e: + print(f"decode error: {e}") + return [] + +if __name__ == '__main__': + import time + + detector = YOLOV8Detector(model_path='./yolov8n.onnx',imgsz=[640,640]) + decoder = QRCodeDecoder() + img_path = './images' + det_path='./v8_det_res' + crop_path='./v8_crop_res' + os.makedirs(det_path, exist_ok=True) + os.makedirs(crop_path, exist_ok=True) + imgs = glob.glob(f"{img_path}/*.jpg") + totoal = len(imgs) + success = 0 + fail = 0 + start_time = time.time() + for idx,img in enumerate(imgs): + pic_name=os.path.basename(img).split('.')[0] + loop_start_time = time.time() + det_result, res_img = detector.detect_objects(img,det_path) + # cv2.imwrite(os.path.join(det_path, pic_name+'.jpg'), res_img) + + # Crop deteted QRCode & decode QRCode by pyzbar + cropped_images = decoder.crop_qr_regions(res_img, det_result) + # for i,cropped in enumerate(cropped_images): + # cv2.imwrite(os.path.join(crop_path, f'{pic_name}_crop_{i}.jpg'), cropped['image']) + + all_decoded_results = [] + for i, cropped_data in enumerate(cropped_images): + decoded_results = decoder.decode_qrcode_pyzbar(cropped_data['image']) + all_decoded_results.extend(decoded_results) + + # for result in decoded_results: + # print(f"decode result: {result['data']} (type: {result['type']})") + if all_decoded_results: + success += 1 + print("识别成功!") + else: + fail += 1 + print("识别失败!") + loop_end_time = time.time() + print(f"图片 {img} 处理耗时: {loop_end_time - loop_start_time:.4f} 秒") + + end_time = time.time() # 记录总结束时间 + total_time = end_time - start_time # 记录总耗时 + + print(f"总共测试图片数量: {totoal}") + print(f"识别成功数量: {success}") + print(f"识别失败数量: {fail}") + print(f"识别成功率: {success/totoal*100:.2f}%") + print(f"整体处理耗时: {total_time:.4f} 秒") + print(f"平均每张图片处理耗时: {total_time/totoal:.4f} 秒") \ No newline at end of file diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e2902926f7cc8ee16f174a86dac0e39ee7f82ff --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,17 @@ +#for QR decode +pyzbar + +#for yolo +opencv-python-headless +ultralytics + + +#for DEIMv2 +torch==2.5.1 +torchvision==0.20.1 +faster-coco-eval>=1.6.7 +PyYAML +tensorboard +scipy +calflops +transformers \ No newline at end of file diff --git a/result.png b/result.png new file mode 100644 index 0000000000000000000000000000000000000000..c13a92547caad04a04f8606212347396e21b4dea Binary files /dev/null and b/result.png differ