yuvraj commited on
Commit ·
844ce1f
1
Parent(s): 52b2e72
Initial release: Awiros-ANPR-OCR model and inference script
Browse files- .gitattributes +1 -0
- LICENSE +189 -0
- README.md +124 -0
- TechnicalReport.pdf +3 -0
- en_dict.txt +62 -0
- images/10_14_68e6fcf21e55ac002f310971_awi_1.jpg +0 -0
- images/10_20_68f0b5ea88aefc002f543e60_awi_1.jpg +0 -0
- images/10_20_68fc5fb360b026003091b0eb_awi_1.jpg +0 -0
- images/10_20_69027403e20fe9002fceedaa_awi_1.jpg +0 -0
- images/10_21_68ecddf9925604002f70876a_awi_1.jpg +0 -0
- images/9_13_68f45a82b5e646002f924bbd_awi_1.jpg +0 -0
- images/9_16_68f325be3a50df002e43a509_awi_1.jpg +0 -0
- images/9_25_6901aa035e29cc002f516180_awi_1.jpg +0 -0
- images/sample_results.json +47 -0
- images/two_row-set-1_1_1420_.jpg +0 -0
- model.safetensors +3 -0
- requirements.txt +6 -0
- sample_results.json +47 -0
- test.py +266 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work.
|
| 38 |
+
|
| 39 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 40 |
+
form, that is based on (or derived from) the Work and for which the
|
| 41 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 42 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 43 |
+
of this License, Derivative Works shall not include works that remain
|
| 44 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 45 |
+
the Work and Derivative Works thereof.
|
| 46 |
+
|
| 47 |
+
"Contribution" shall mean any work of authorship, including
|
| 48 |
+
the original version of the Work and any modifications or additions
|
| 49 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 50 |
+
submitted to the Licensor for inclusion in the Work by the copyright owner
|
| 51 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 52 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 53 |
+
means any form of electronic, verbal, or written communication sent
|
| 54 |
+
to the Licensor or its representatives, including but not limited to
|
| 55 |
+
communication on electronic mailing lists, source code control systems,
|
| 56 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 57 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 58 |
+
excluding communication that is conspicuously marked or otherwise
|
| 59 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 60 |
+
|
| 61 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 62 |
+
on behalf of whom a Contribution has been received by the Licensor and
|
| 63 |
+
subsequently incorporated within the Work.
|
| 64 |
+
|
| 65 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 66 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 67 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 68 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 69 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 70 |
+
Work and such Derivative Works in Source or Object form.
|
| 71 |
+
|
| 72 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 73 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 74 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 75 |
+
(except as stated in this section) patent license to make, have made,
|
| 76 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 77 |
+
where such license applies only to those patent claims licensable
|
| 78 |
+
by such Contributor that are necessarily infringed by their
|
| 79 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 80 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 81 |
+
institute patent litigation against any entity (including a
|
| 82 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 83 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 84 |
+
or contributory patent infringement, then any patent licenses
|
| 85 |
+
granted to You under this License for that Work shall terminate
|
| 86 |
+
as of the date such litigation is filed.
|
| 87 |
+
|
| 88 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 89 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 90 |
+
modifications, and in Source or Object form, provided that You
|
| 91 |
+
meet the following conditions:
|
| 92 |
+
|
| 93 |
+
(a) You must give any other recipients of the Work or
|
| 94 |
+
Derivative Works a copy of this License; and
|
| 95 |
+
|
| 96 |
+
(b) You must cause any modified files to carry prominent notices
|
| 97 |
+
stating that You changed the files; and
|
| 98 |
+
|
| 99 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 100 |
+
that You distribute, all copyright, patent, trademark, and
|
| 101 |
+
attribution notices from the Source form of the Work,
|
| 102 |
+
excluding those notices that do not pertain to any part of
|
| 103 |
+
the Derivative Works; and
|
| 104 |
+
|
| 105 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 106 |
+
distribution, then any Derivative Works that You distribute must
|
| 107 |
+
include a readable copy of the attribution notices contained
|
| 108 |
+
within such NOTICE file, excluding any notices that do not
|
| 109 |
+
pertain to any part of the Derivative Works, in at least one
|
| 110 |
+
of the following places: within a NOTICE text file distributed
|
| 111 |
+
as part of the Derivative Works; within the Source form or
|
| 112 |
+
documentation, if provided along with the Derivative Works; or,
|
| 113 |
+
within a display generated by the Derivative Works, if and
|
| 114 |
+
wherever such third-party notices normally appear. The contents
|
| 115 |
+
of the NOTICE file are for informational purposes only and
|
| 116 |
+
do not modify the License. You may add Your own attribution
|
| 117 |
+
notices within Derivative Works that You distribute, alongside
|
| 118 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 119 |
+
that such additional attribution notices cannot be construed
|
| 120 |
+
as modifying the License.
|
| 121 |
+
|
| 122 |
+
You may add Your own copyright statement to Your modifications and
|
| 123 |
+
may provide additional or different license terms and conditions
|
| 124 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 125 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 126 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 127 |
+
the conditions stated in this License.
|
| 128 |
+
|
| 129 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 130 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 131 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 132 |
+
this License, without any additional terms or conditions.
|
| 133 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 134 |
+
the terms of any separate license agreement you may have executed
|
| 135 |
+
with Licensor regarding such Contributions.
|
| 136 |
+
|
| 137 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 138 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 139 |
+
except as required for reasonable and customary use in describing the
|
| 140 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 141 |
+
|
| 142 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 143 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 144 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 145 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 146 |
+
implied, including, without limitation, any warranties or conditions
|
| 147 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 148 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 149 |
+
appropriateness of using or redistributing the Work and assume any
|
| 150 |
+
risks associated with Your exercise of permissions under this License.
|
| 151 |
+
|
| 152 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 153 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 154 |
+
unless required by applicable law (such as deliberate and grossly
|
| 155 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 156 |
+
liable to You for damages, including any direct, indirect, special,
|
| 157 |
+
incidental, or consequential damages of any character arising as a
|
| 158 |
+
result of this License or out of the use or inability to use the
|
| 159 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 160 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 161 |
+
other commercial damages or losses), even if such Contributor
|
| 162 |
+
has been advised of the possibility of such damages.
|
| 163 |
+
|
| 164 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 165 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 166 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 167 |
+
or other liability obligations and/or rights consistent with this
|
| 168 |
+
License. However, in accepting such obligations, You may act only
|
| 169 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 170 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 171 |
+
defend, and hold each Contributor harmless for any liability
|
| 172 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 173 |
+
of your accepting any such warranty or additional liability.
|
| 174 |
+
|
| 175 |
+
END OF TERMS AND CONDITIONS
|
| 176 |
+
|
| 177 |
+
Copyright 2026 Awiros
|
| 178 |
+
|
| 179 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 180 |
+
you may not use this file except in compliance with the License.
|
| 181 |
+
You may obtain a copy of the License at
|
| 182 |
+
|
| 183 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 184 |
+
|
| 185 |
+
Unless required by applicable law or agreed to in writing, software
|
| 186 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 187 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 188 |
+
See the License for the specific language governing permissions and
|
| 189 |
+
limitations under the License.
|
README.md
CHANGED
|
@@ -1,3 +1,127 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
library_name: paddlepaddle
|
| 6 |
+
tags: [anpr, ocr, license-plate, indian-plates, PP-OCRv5]
|
| 7 |
---
|
| 8 |
+
|
| 9 |
+
# Data-Intelligent ANPR: Scalable License Plate Recognition Under Real-World Data Constraints
|
| 10 |
+
|
| 11 |
+
## Abstract
|
| 12 |
+
|
| 13 |
+
This release provides Awiros-ANPR-OCR, a 37M-parameter specialist model for
|
| 14 |
+
Automatic Number Plate Recognition (ANPR) on Indian license plates. The model
|
| 15 |
+
is built on the PP-OCRv5 encoder-decoder backbone (SVTR_HGNet with PPHGNetV2_B4)
|
| 16 |
+
and fine-tuned on a curated 558,767-sample corpus spanning both standard
|
| 17 |
+
single-row and non-standard dual-row Indian plate formats.
|
| 18 |
+
|
| 19 |
+
Starting from only 6,839 publicly available labeled samples, the training
|
| 20 |
+
corpus was grown through a data engineering pipeline combining synthetic data
|
| 21 |
+
synthesis, consensus pseudo-labeling, distribution-aware curation, VLM-assisted
|
| 22 |
+
data cleanup, and state-balanced batch sampling. The resulting model achieves
|
| 23 |
+
**98.42% accuracy** with **sub-6ms on-device inference** on an NVIDIA RTX 3090
|
| 24 |
+
--- a 1,260x latency advantage over frontier multimodal models like Gemini.
|
| 25 |
+
|
| 26 |
+
For the full data curation and training methodology, refer to our technical
|
| 27 |
+
report: [Technical Report](TechnicalReport.pdf).
|
| 28 |
+
|
| 29 |
+
## Evaluation and Results
|
| 30 |
+
|
| 31 |
+
All systems were evaluated on a shared held-out validation set constructed
|
| 32 |
+
using a distribution-aware split covering all Indian state codes, including
|
| 33 |
+
both standard and non-standard plate formats.
|
| 34 |
+
|
| 35 |
+
| System | Params | Overall Acc. | 1-Row Acc. | 2-Row Acc. | Latency Avg (ms) | Throughput (img/s) |
|
| 36 |
+
| --- | --- | --- | --- | --- | --- | --- |
|
| 37 |
+
| **Awiros-ANPR-OCR (Ours)** | **37.3M** | **98.42%** | **98.83%** | **96.91%** | **5.09** | **196.5** |
|
| 38 |
+
| Gemini-3-flash-preview | ~5-10B | 93.89% | 94.70% | 91.20% | 6,430 | 0.2 |
|
| 39 |
+
| Gemini-2.5-flash-preview | ~5B | 87.23% | 89.66% | 78.38% | --- | --- |
|
| 40 |
+
| Tencent HunyuanOCR | 996M | 67.62% | 76.65% | 34.78% | 309.15 | 3.2 |
|
| 41 |
+
| PP-OCRv5 Pretrained | 53.6M | 57.96% | 73.55% | 0.24% | 5.25 | 190.6 |
|
| 42 |
+
|
| 43 |
+
Latency measured on a single NVIDIA RTX 3090 GPU (batch size 1). Gemini
|
| 44 |
+
latency is end-to-end API round-trip. PP-OCRv5 Pretrained shares the same
|
| 45 |
+
architecture but uses original pretrained weights without domain-specific
|
| 46 |
+
fine-tuning --- the 57.96% to 98.42% gap is entirely a data story.
|
| 47 |
+
|
| 48 |
+
## Qualitative Comparison
|
| 49 |
+
|
| 50 |
+
Representative samples where Awiros-ANPR-OCR correctly transcribes the plate
|
| 51 |
+
while all baselines produce errors. Common failure modes for baselines include
|
| 52 |
+
confusing visually similar characters (Q→0, V→Y, M→R, B→8) and truncating
|
| 53 |
+
dual-row plates.
|
| 54 |
+
|
| 55 |
+
[](images/sample_plates.jpg)
|
| 56 |
+
|
| 57 |
+
## Key Design Decisions
|
| 58 |
+
|
| 59 |
+
- **End-to-end architecture**: Eliminates brittle multi-stage pre-processing
|
| 60 |
+
pipelines (perspective normalization, row segmentation, per-region
|
| 61 |
+
recognition) that prior systems relied upon
|
| 62 |
+
- **Consensus pseudo-labeling**: Two independently trained models must agree on
|
| 63 |
+
a transcription before it is accepted as a label, substantially reducing
|
| 64 |
+
pseudo-label noise
|
| 65 |
+
- **Distribution-aware curation**: Non-linear bucket-wise train/val splits
|
| 66 |
+
ensure rare state codes are not lost to validation
|
| 67 |
+
- **State-balanced batch sampling**: Uniform state-code sampling within each
|
| 68 |
+
batch prevents training dynamics from being dominated by high-frequency states
|
| 69 |
+
- **Negative sample training**: Unreadable plates labeled with an abstention
|
| 70 |
+
token suppress hallucination on degraded inputs
|
| 71 |
+
|
| 72 |
+
## Model Inference
|
| 73 |
+
|
| 74 |
+
Use the official PaddleOCR repository to run single-image inference with this
|
| 75 |
+
release model.
|
| 76 |
+
|
| 77 |
+
1. Clone PaddleOCR and move into the repository root.
|
| 78 |
+
```bash
|
| 79 |
+
git clone https://github.com/PaddlePaddle/PaddleOCR.git
|
| 80 |
+
cd PaddleOCR
|
| 81 |
+
```
|
| 82 |
+
2. Install dependencies.
|
| 83 |
+
```bash
|
| 84 |
+
pip install paddlepaddle # or paddlepaddle-gpu
|
| 85 |
+
pip install safetensors pillow opencv-python pyyaml
|
| 86 |
+
```
|
| 87 |
+
3. Copy `test.py` and `en_dict.txt` from this release folder into the
|
| 88 |
+
PaddleOCR repository root.
|
| 89 |
+
4. Place `model.safetensors` in the PaddleOCR repository root (or specify the
|
| 90 |
+
path via `--weights`).
|
| 91 |
+
5. Run inference on a single image.
|
| 92 |
+
```bash
|
| 93 |
+
python test.py \
|
| 94 |
+
--image_path path/to/plate_crop.jpg \
|
| 95 |
+
--weights model.safetensors \
|
| 96 |
+
--device gpu
|
| 97 |
+
```
|
| 98 |
+
6. Run inference on a directory of images.
|
| 99 |
+
```bash
|
| 100 |
+
python test.py \
|
| 101 |
+
--image_path path/to/plate_crops/ \
|
| 102 |
+
--weights model.safetensors \
|
| 103 |
+
--device gpu \
|
| 104 |
+
--output_json results.json
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
## Architecture Details
|
| 108 |
+
|
| 109 |
+
| Component | Value |
|
| 110 |
+
| --- | --- |
|
| 111 |
+
| Framework | PaddlePaddle / PP-OCRv5 |
|
| 112 |
+
| Backbone | PPHGNetV2_B4 |
|
| 113 |
+
| Head | MultiHead (CTCHead + NRTRHead) |
|
| 114 |
+
| Input shape | 3 x 48 x 320 |
|
| 115 |
+
| Character set | 0-9, A-Z, a-z, space (63 classes) |
|
| 116 |
+
| Max text length | 25 |
|
| 117 |
+
| Parameters | 37.3M |
|
| 118 |
+
| Export format | SafeTensors (from PaddlePaddle params) |
|
| 119 |
+
|
| 120 |
+
## Summary
|
| 121 |
+
|
| 122 |
+
We present a practical, data-centric ANPR framework that achieves
|
| 123 |
+
production-grade accuracy on Indian license plates without reliance on large
|
| 124 |
+
manually annotated datasets or frontier model scale. The same PP-OCRv5
|
| 125 |
+
architecture scores 57.96% out-of-the-box and 98.42% after our data
|
| 126 |
+
engineering pipeline --- demonstrating that the data, not the model, is the
|
| 127 |
+
primary driver of performance in domain-specific OCR.
|
TechnicalReport.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b4ac0c588ddd43f9e4a1fa81b9b2c2dde62c200ed78abdb90ecf05991b0e0fd
|
| 3 |
+
size 246863
|
en_dict.txt
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0
|
| 2 |
+
1
|
| 3 |
+
2
|
| 4 |
+
3
|
| 5 |
+
4
|
| 6 |
+
5
|
| 7 |
+
6
|
| 8 |
+
7
|
| 9 |
+
8
|
| 10 |
+
9
|
| 11 |
+
A
|
| 12 |
+
B
|
| 13 |
+
C
|
| 14 |
+
D
|
| 15 |
+
E
|
| 16 |
+
F
|
| 17 |
+
G
|
| 18 |
+
H
|
| 19 |
+
I
|
| 20 |
+
J
|
| 21 |
+
K
|
| 22 |
+
L
|
| 23 |
+
M
|
| 24 |
+
N
|
| 25 |
+
O
|
| 26 |
+
P
|
| 27 |
+
Q
|
| 28 |
+
R
|
| 29 |
+
S
|
| 30 |
+
T
|
| 31 |
+
U
|
| 32 |
+
V
|
| 33 |
+
W
|
| 34 |
+
X
|
| 35 |
+
Y
|
| 36 |
+
Z
|
| 37 |
+
a
|
| 38 |
+
b
|
| 39 |
+
c
|
| 40 |
+
d
|
| 41 |
+
e
|
| 42 |
+
f
|
| 43 |
+
g
|
| 44 |
+
h
|
| 45 |
+
i
|
| 46 |
+
j
|
| 47 |
+
k
|
| 48 |
+
l
|
| 49 |
+
m
|
| 50 |
+
n
|
| 51 |
+
o
|
| 52 |
+
p
|
| 53 |
+
q
|
| 54 |
+
r
|
| 55 |
+
s
|
| 56 |
+
t
|
| 57 |
+
u
|
| 58 |
+
v
|
| 59 |
+
w
|
| 60 |
+
x
|
| 61 |
+
y
|
| 62 |
+
z
|
images/10_14_68e6fcf21e55ac002f310971_awi_1.jpg
ADDED
|
images/10_20_68f0b5ea88aefc002f543e60_awi_1.jpg
ADDED
|
images/10_20_68fc5fb360b026003091b0eb_awi_1.jpg
ADDED
|
images/10_20_69027403e20fe9002fceedaa_awi_1.jpg
ADDED
|
images/10_21_68ecddf9925604002f70876a_awi_1.jpg
ADDED
|
images/9_13_68f45a82b5e646002f924bbd_awi_1.jpg
ADDED
|
images/9_16_68f325be3a50df002e43a509_awi_1.jpg
ADDED
|
images/9_25_6901aa035e29cc002f516180_awi_1.jpg
ADDED
|
images/sample_results.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"image": "10_14_68e6fcf21e55ac002f310971_awi_1.jpg",
|
| 4 |
+
"prediction": "HR12AX8522",
|
| 5 |
+
"confidence": 0.9996901750564575
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"image": "10_20_68f0b5ea88aefc002f543e60_awi_1.jpg",
|
| 9 |
+
"prediction": "HR35M2576",
|
| 10 |
+
"confidence": 0.9492533802986145
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"image": "10_20_68fc5fb360b026003091b0eb_awi_1.jpg",
|
| 14 |
+
"prediction": "HR34M4007",
|
| 15 |
+
"confidence": 0.9999865293502808
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"image": "10_20_69027403e20fe9002fceedaa_awi_1.jpg",
|
| 19 |
+
"prediction": "RJ29GB2097",
|
| 20 |
+
"confidence": 0.9950124621391296
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"image": "10_21_68ecddf9925604002f70876a_awi_1.jpg",
|
| 24 |
+
"prediction": "HR38AB421",
|
| 25 |
+
"confidence": 0.9921688437461853
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"image": "9_13_68f45a82b5e646002f924bbd_awi_1.jpg",
|
| 29 |
+
"prediction": "DL9CBH1669",
|
| 30 |
+
"confidence": 0.9998277425765991
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"image": "9_16_68f325be3a50df002e43a509_awi_1.jpg",
|
| 34 |
+
"prediction": "HR51BV822",
|
| 35 |
+
"confidence": 0.9994223117828369
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"image": "9_25_6901aa035e29cc002f516180_awi_1.jpg",
|
| 39 |
+
"prediction": "HR46E227",
|
| 40 |
+
"confidence": 0.9738591313362122
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"image": "two_row-set-1_1_1420_.jpg",
|
| 44 |
+
"prediction": "UP14BQ208",
|
| 45 |
+
"confidence": 0.9955475330352783
|
| 46 |
+
}
|
| 47 |
+
]
|
images/two_row-set-1_1_1420_.jpg
ADDED
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9f1264e0c115a239ca6d6700a74ceffab17168b75dafc6c169c232012c68e47
|
| 3 |
+
size 149448448
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
paddlepaddle-gpu>=2.6.0; platform_system != "Darwin"
|
| 2 |
+
paddlepaddle>=2.6.0; platform_system == "Darwin"
|
| 3 |
+
safetensors>=0.4.0
|
| 4 |
+
opencv-python>=4.8.0
|
| 5 |
+
numpy>=1.21.0
|
| 6 |
+
PyYAML>=6.0
|
sample_results.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"image": "10_14_68e6fcf21e55ac002f310971_awi_1.jpg",
|
| 4 |
+
"prediction": "HR12AX8522",
|
| 5 |
+
"confidence": 0.9997
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"image": "10_20_68f0b5ea88aefc002f543e60_awi_1.jpg",
|
| 9 |
+
"prediction": "HR35M2576",
|
| 10 |
+
"confidence": 0.9493
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"image": "10_20_68fc5fb360b026003091b0eb_awi_1.jpg",
|
| 14 |
+
"prediction": "HR34M4007",
|
| 15 |
+
"confidence": 1.0
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"image": "10_20_69027403e20fe9002fceedaa_awi_1.jpg",
|
| 19 |
+
"prediction": "RJ29GB2097",
|
| 20 |
+
"confidence": 0.995
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"image": "10_21_68ecddf9925604002f70876a_awi_1.jpg",
|
| 24 |
+
"prediction": "HR38AB421",
|
| 25 |
+
"confidence": 0.9922
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"image": "9_13_68f45a82b5e646002f924bbd_awi_1.jpg",
|
| 29 |
+
"prediction": "DL9CBH1669",
|
| 30 |
+
"confidence": 0.9998
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"image": "9_16_68f325be3a50df002e43a509_awi_1.jpg",
|
| 34 |
+
"prediction": "HR51BV822",
|
| 35 |
+
"confidence": 0.9994
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"image": "9_25_6901aa035e29cc002f516180_awi_1.jpg",
|
| 39 |
+
"prediction": "HR46E227",
|
| 40 |
+
"confidence": 0.9739
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"image": "two_row-set-1_1_1420_.jpg",
|
| 44 |
+
"prediction": "UP14BQ208",
|
| 45 |
+
"confidence": 0.9955
|
| 46 |
+
}
|
| 47 |
+
]
|
test.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Awiros-ANPR-OCR single-image / directory inference script.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
pip install -r requirements.txt
|
| 6 |
+
python test.py --image_path plate.jpg
|
| 7 |
+
python test.py --image_path plates_dir/ --output_json results.json
|
| 8 |
+
|
| 9 |
+
PaddleOCR repo is needed for model construction. On first run the script
|
| 10 |
+
auto-clones it into a PaddleOCR/ subfolder next to this file.
|
| 11 |
+
Pass --paddleocr_dir to point to an existing clone instead.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import argparse
|
| 15 |
+
import copy
|
| 16 |
+
import json
|
| 17 |
+
import os
|
| 18 |
+
import subprocess
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
import cv2
|
| 23 |
+
import numpy as np
|
| 24 |
+
|
| 25 |
+
_SCRIPT_DIR = Path(__file__).resolve().parent
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# Model architecture config (PP-OCRv5 server rec, SVTR_HGNet)
|
| 29 |
+
# CTC head output: 64 classes (63 dict chars + blank)
|
| 30 |
+
# NRTR head output: 68 classes (64 + bos/eos/pad/unk)
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
CTC_NUM_CLASSES = 64
|
| 33 |
+
NRTR_NUM_CLASSES = 67 # NRTRHead internally adds +1, so 67 -> 68 to match weights
|
| 34 |
+
|
| 35 |
+
MODEL_CONFIG = {
|
| 36 |
+
"Architecture": {
|
| 37 |
+
"model_type": "rec",
|
| 38 |
+
"algorithm": "SVTR_HGNet",
|
| 39 |
+
"Transform": None,
|
| 40 |
+
"Backbone": {"name": "PPHGNetV2_B4", "text_rec": True},
|
| 41 |
+
"Head": {
|
| 42 |
+
"name": "MultiHead",
|
| 43 |
+
"out_channels_list": {
|
| 44 |
+
"CTCLabelDecode": CTC_NUM_CLASSES,
|
| 45 |
+
"NRTRLabelDecode": NRTR_NUM_CLASSES,
|
| 46 |
+
},
|
| 47 |
+
"head_list": [
|
| 48 |
+
{
|
| 49 |
+
"CTCHead": {
|
| 50 |
+
"Neck": {
|
| 51 |
+
"name": "svtr",
|
| 52 |
+
"dims": 120,
|
| 53 |
+
"depth": 2,
|
| 54 |
+
"hidden_dims": 120,
|
| 55 |
+
"kernel_size": [1, 3],
|
| 56 |
+
"use_guide": True,
|
| 57 |
+
},
|
| 58 |
+
"Head": {"fc_decay": 1e-05},
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
{"NRTRHead": {"nrtr_dim": 384, "max_text_length": 25}},
|
| 62 |
+
],
|
| 63 |
+
},
|
| 64 |
+
},
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
IMAGE_SHAPE = [3, 48, 320]
|
| 68 |
+
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ---------------------------------------------------------------------------
|
| 72 |
+
# PaddleOCR path setup
|
| 73 |
+
# ---------------------------------------------------------------------------
|
| 74 |
+
def _find_paddleocr(explicit_path=None):
|
| 75 |
+
"""Find a directory containing the ppocr package."""
|
| 76 |
+
candidates = []
|
| 77 |
+
if explicit_path:
|
| 78 |
+
candidates.append(Path(explicit_path))
|
| 79 |
+
candidates += [
|
| 80 |
+
_SCRIPT_DIR / "PaddleOCR",
|
| 81 |
+
_SCRIPT_DIR,
|
| 82 |
+
Path.cwd(),
|
| 83 |
+
Path.cwd() / "PaddleOCR",
|
| 84 |
+
]
|
| 85 |
+
for c in candidates:
|
| 86 |
+
if (c / "ppocr" / "__init__.py").is_file():
|
| 87 |
+
return c
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _ensure_paddleocr(explicit_path=None):
|
| 92 |
+
"""Make ppocr importable. Auto-clones PaddleOCR if not found."""
|
| 93 |
+
root = _find_paddleocr(explicit_path)
|
| 94 |
+
if root is None:
|
| 95 |
+
clone_target = _SCRIPT_DIR / "PaddleOCR"
|
| 96 |
+
print(f"ppocr not found. Cloning PaddleOCR into {clone_target} ...")
|
| 97 |
+
subprocess.check_call([
|
| 98 |
+
"git", "clone", "--depth", "1",
|
| 99 |
+
"https://github.com/PaddlePaddle/PaddleOCR.git",
|
| 100 |
+
str(clone_target),
|
| 101 |
+
])
|
| 102 |
+
root = clone_target
|
| 103 |
+
root_str = str(root)
|
| 104 |
+
if root_str not in sys.path:
|
| 105 |
+
sys.path.insert(0, root_str)
|
| 106 |
+
return root
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# ---------------------------------------------------------------------------
|
| 110 |
+
# Helpers
|
| 111 |
+
# ---------------------------------------------------------------------------
|
| 112 |
+
def parse_args():
|
| 113 |
+
p = argparse.ArgumentParser("Awiros-ANPR-OCR inference")
|
| 114 |
+
p.add_argument("--image_path", required=True,
|
| 115 |
+
help="Path to a single image or a directory of images.")
|
| 116 |
+
p.add_argument("--weights", default="",
|
| 117 |
+
help="Path to model.safetensors (default: next to this script).")
|
| 118 |
+
p.add_argument("--dict_path", default="",
|
| 119 |
+
help="Path to en_dict.txt (default: next to this script).")
|
| 120 |
+
p.add_argument("--device", default="gpu", choices=["gpu", "cpu"],
|
| 121 |
+
help="Device for inference.")
|
| 122 |
+
p.add_argument("--output_json", default="",
|
| 123 |
+
help="Optional output JSON path for results.")
|
| 124 |
+
p.add_argument("--paddleocr_dir", default="",
|
| 125 |
+
help="Path to PaddleOCR repo root (auto-cloned if omitted).")
|
| 126 |
+
return p.parse_args()
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def resolve_path(user_path: str, filename: str) -> str:
|
| 130 |
+
"""Use user-supplied path if it exists, else fall back to script dir."""
|
| 131 |
+
if user_path and os.path.exists(user_path):
|
| 132 |
+
return user_path
|
| 133 |
+
alt = _SCRIPT_DIR / filename
|
| 134 |
+
if alt.exists():
|
| 135 |
+
return str(alt)
|
| 136 |
+
raise FileNotFoundError(
|
| 137 |
+
f"Could not find {filename}. Place it next to this script or pass its path."
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def load_safetensors_to_paddle(paddle_mod, weight_path: str):
|
| 142 |
+
from safetensors.numpy import load_file
|
| 143 |
+
np_state = load_file(weight_path)
|
| 144 |
+
return {k: paddle_mod.to_tensor(v) for k, v in np_state.items()}
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def resize_for_rec(img_bgr, target_shape):
|
| 148 |
+
_, h, w = target_shape
|
| 149 |
+
img_h, img_w = img_bgr.shape[:2]
|
| 150 |
+
ratio = h / img_h
|
| 151 |
+
new_w = min(int(img_w * ratio), w)
|
| 152 |
+
resized = cv2.resize(img_bgr, (new_w, h))
|
| 153 |
+
if new_w < w:
|
| 154 |
+
padded = np.zeros((h, w, 3), dtype=np.uint8)
|
| 155 |
+
padded[:, :new_w, :] = resized
|
| 156 |
+
resized = padded
|
| 157 |
+
return resized
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def preprocess(img_bgr, target_shape):
|
| 161 |
+
img = resize_for_rec(img_bgr, target_shape)
|
| 162 |
+
img = img.astype(np.float32) / 255.0
|
| 163 |
+
img = (img - 0.5) / 0.5
|
| 164 |
+
return img.transpose((2, 0, 1))
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def collect_images(path: str):
|
| 168 |
+
p = Path(path)
|
| 169 |
+
if p.is_file():
|
| 170 |
+
return [p]
|
| 171 |
+
if p.is_dir():
|
| 172 |
+
return sorted(f for f in p.iterdir()
|
| 173 |
+
if f.is_file() and f.suffix.lower() in IMAGE_EXTENSIONS)
|
| 174 |
+
raise FileNotFoundError(f"Path not found: {path}")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ---------------------------------------------------------------------------
|
| 178 |
+
# Main
|
| 179 |
+
# ---------------------------------------------------------------------------
|
| 180 |
+
def main():
|
| 181 |
+
args = parse_args()
|
| 182 |
+
|
| 183 |
+
# 1. Ensure ppocr is importable, then import paddle + ppocr
|
| 184 |
+
_ensure_paddleocr(args.paddleocr_dir or None)
|
| 185 |
+
|
| 186 |
+
import paddle
|
| 187 |
+
from ppocr.modeling.architectures import build_model as ppocr_build_model
|
| 188 |
+
from ppocr.postprocess import build_post_process
|
| 189 |
+
|
| 190 |
+
# 2. Device
|
| 191 |
+
if args.device == "gpu" and not paddle.is_compiled_with_cuda():
|
| 192 |
+
print("CUDA not available, falling back to CPU.")
|
| 193 |
+
paddle.set_device("cpu")
|
| 194 |
+
else:
|
| 195 |
+
paddle.set_device(args.device)
|
| 196 |
+
|
| 197 |
+
# 3. Resolve file paths
|
| 198 |
+
weights_path = resolve_path(args.weights, "model.safetensors")
|
| 199 |
+
dict_path = resolve_path(args.dict_path, "en_dict.txt")
|
| 200 |
+
|
| 201 |
+
# 4. Build CTC post-processor
|
| 202 |
+
post_process = build_post_process({
|
| 203 |
+
"name": "CTCLabelDecode",
|
| 204 |
+
"character_dict_path": dict_path,
|
| 205 |
+
"use_space_char": True,
|
| 206 |
+
})
|
| 207 |
+
|
| 208 |
+
# 5. Build model and load weights
|
| 209 |
+
config = copy.deepcopy(MODEL_CONFIG)
|
| 210 |
+
model = ppocr_build_model(config["Architecture"])
|
| 211 |
+
model.eval()
|
| 212 |
+
|
| 213 |
+
state_dict = load_safetensors_to_paddle(paddle, weights_path)
|
| 214 |
+
model.set_state_dict(state_dict)
|
| 215 |
+
print(f"Loaded weights from {weights_path}")
|
| 216 |
+
|
| 217 |
+
# 6. Run inference
|
| 218 |
+
image_paths = collect_images(args.image_path)
|
| 219 |
+
print(f"Found {len(image_paths)} image(s)\n")
|
| 220 |
+
|
| 221 |
+
results = []
|
| 222 |
+
for img_path in image_paths:
|
| 223 |
+
img_bgr = cv2.imread(str(img_path))
|
| 224 |
+
if img_bgr is None:
|
| 225 |
+
print(f"WARNING: Could not read {img_path}, skipping.")
|
| 226 |
+
continue
|
| 227 |
+
|
| 228 |
+
tensor = paddle.to_tensor(
|
| 229 |
+
np.expand_dims(preprocess(img_bgr, IMAGE_SHAPE), axis=0)
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
with paddle.no_grad():
|
| 233 |
+
preds = model(tensor)
|
| 234 |
+
|
| 235 |
+
if isinstance(preds, dict):
|
| 236 |
+
pred_tensor = preds.get("ctc", next(iter(preds.values())))
|
| 237 |
+
elif isinstance(preds, (list, tuple)):
|
| 238 |
+
pred_tensor = preds[0]
|
| 239 |
+
else:
|
| 240 |
+
pred_tensor = preds
|
| 241 |
+
|
| 242 |
+
post_result = post_process(pred_tensor.numpy())
|
| 243 |
+
if isinstance(post_result, (list, tuple)) and len(post_result) > 0:
|
| 244 |
+
text, confidence = post_result[0]
|
| 245 |
+
else:
|
| 246 |
+
text, confidence = "", 0.0
|
| 247 |
+
|
| 248 |
+
text = text.strip().upper()
|
| 249 |
+
result = {
|
| 250 |
+
"image": str(img_path.name),
|
| 251 |
+
"prediction": text,
|
| 252 |
+
"confidence": round(float(confidence), 4),
|
| 253 |
+
}
|
| 254 |
+
results.append(result)
|
| 255 |
+
print(f" {img_path.name}: {text} (conf: {confidence:.4f})")
|
| 256 |
+
|
| 257 |
+
# 7. Save JSON
|
| 258 |
+
if args.output_json:
|
| 259 |
+
out_path = Path(args.output_json)
|
| 260 |
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
| 261 |
+
out_path.write_text(json.dumps(results, indent=2))
|
| 262 |
+
print(f"\nResults saved to {out_path}")
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
if __name__ == "__main__":
|
| 266 |
+
main()
|