Initial release: Awiros-ANPR-OCR model and inference script

Files changed (19) hide show

.gitattributes +1 -0
LICENSE +189 -0
README.md +124 -0
TechnicalReport.pdf +3 -0
en_dict.txt +62 -0
images/10_14_68e6fcf21e55ac002f310971_awi_1.jpg +0 -0
images/10_20_68f0b5ea88aefc002f543e60_awi_1.jpg +0 -0
images/10_20_68fc5fb360b026003091b0eb_awi_1.jpg +0 -0
images/10_20_69027403e20fe9002fceedaa_awi_1.jpg +0 -0
images/10_21_68ecddf9925604002f70876a_awi_1.jpg +0 -0
images/9_13_68f45a82b5e646002f924bbd_awi_1.jpg +0 -0
images/9_16_68f325be3a50df002e43a509_awi_1.jpg +0 -0
images/9_25_6901aa035e29cc002f516180_awi_1.jpg +0 -0
images/sample_results.json +47 -0
images/two_row-set-1_1_1420_.jpg +0 -0
model.safetensors +3 -0
requirements.txt +6 -0
sample_results.json +47 -0
test.py +266 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,189 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work.
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to the Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by the Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding any notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   Copyright 2026 Awiros
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,3 +1,127 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+language:
+  - en
+library_name: paddlepaddle
+tags: [anpr, ocr, license-plate, indian-plates, PP-OCRv5]
 ---
+# Data-Intelligent ANPR: Scalable License Plate Recognition Under Real-World Data Constraints
+## Abstract
+This release provides Awiros-ANPR-OCR, a 37M-parameter specialist model for
+Automatic Number Plate Recognition (ANPR) on Indian license plates. The model
+is built on the PP-OCRv5 encoder-decoder backbone (SVTR_HGNet with PPHGNetV2_B4)
+and fine-tuned on a curated 558,767-sample corpus spanning both standard
+single-row and non-standard dual-row Indian plate formats.
+Starting from only 6,839 publicly available labeled samples, the training
+corpus was grown through a data engineering pipeline combining synthetic data
+synthesis, consensus pseudo-labeling, distribution-aware curation, VLM-assisted
+data cleanup, and state-balanced batch sampling. The resulting model achieves
+**98.42% accuracy** with **sub-6ms on-device inference** on an NVIDIA RTX 3090
+--- a 1,260x latency advantage over frontier multimodal models like Gemini.
+For the full data curation and training methodology, refer to our technical
+report: [Technical Report](TechnicalReport.pdf).
+## Evaluation and Results
+All systems were evaluated on a shared held-out validation set constructed
+using a distribution-aware split covering all Indian state codes, including
+both standard and non-standard plate formats.
+| System | Params | Overall Acc. | 1-Row Acc. | 2-Row Acc. | Latency Avg (ms) | Throughput (img/s) |
+| --- | --- | --- | --- | --- | --- | --- |
+| **Awiros-ANPR-OCR (Ours)** | **37.3M** | **98.42%** | **98.83%** | **96.91%** | **5.09** | **196.5** |
+| Gemini-3-flash-preview | ~5-10B | 93.89% | 94.70% | 91.20% | 6,430 | 0.2 |
+| Gemini-2.5-flash-preview | ~5B | 87.23% | 89.66% | 78.38% | --- | --- |
+| Tencent HunyuanOCR | 996M | 67.62% | 76.65% | 34.78% | 309.15 | 3.2 |
+| PP-OCRv5 Pretrained | 53.6M | 57.96% | 73.55% | 0.24% | 5.25 | 190.6 |
+Latency measured on a single NVIDIA RTX 3090 GPU (batch size 1). Gemini
+latency is end-to-end API round-trip. PP-OCRv5 Pretrained shares the same
+architecture but uses original pretrained weights without domain-specific
+fine-tuning --- the 57.96% to 98.42% gap is entirely a data story.
+## Qualitative Comparison
+Representative samples where Awiros-ANPR-OCR correctly transcribes the plate
+while all baselines produce errors. Common failure modes for baselines include
+confusing visually similar characters (Q→0, V→Y, M→R, B→8) and truncating
+dual-row plates.
+[![Sample plate images](images/sample_plates.jpg)](images/sample_plates.jpg)
+## Key Design Decisions
+- **End-to-end architecture**: Eliminates brittle multi-stage pre-processing
+  pipelines (perspective normalization, row segmentation, per-region
+  recognition) that prior systems relied upon
+- **Consensus pseudo-labeling**: Two independently trained models must agree on
+  a transcription before it is accepted as a label, substantially reducing
+  pseudo-label noise
+- **Distribution-aware curation**: Non-linear bucket-wise train/val splits
+  ensure rare state codes are not lost to validation
+- **State-balanced batch sampling**: Uniform state-code sampling within each
+  batch prevents training dynamics from being dominated by high-frequency states
+- **Negative sample training**: Unreadable plates labeled with an abstention
+  token suppress hallucination on degraded inputs
+## Model Inference
+Use the official PaddleOCR repository to run single-image inference with this
+release model.
+1. Clone PaddleOCR and move into the repository root.
+   ```bash
+   git clone https://github.com/PaddlePaddle/PaddleOCR.git
+   cd PaddleOCR
+   ```
+2. Install dependencies.
+   ```bash
+   pip install paddlepaddle  # or paddlepaddle-gpu
+   pip install safetensors pillow opencv-python pyyaml
+   ```
+3. Copy `test.py` and `en_dict.txt` from this release folder into the
+   PaddleOCR repository root.
+4. Place `model.safetensors` in the PaddleOCR repository root (or specify the
+   path via `--weights`).
+5. Run inference on a single image.
+   ```bash
+   python test.py \
+     --image_path path/to/plate_crop.jpg \
+     --weights model.safetensors \
+     --device gpu
+   ```
+6. Run inference on a directory of images.
+   ```bash
+   python test.py \
+     --image_path path/to/plate_crops/ \
+     --weights model.safetensors \
+     --device gpu \
+     --output_json results.json
+   ```
+## Architecture Details
+| Component | Value |
+| --- | --- |
+| Framework | PaddlePaddle / PP-OCRv5 |
+| Backbone | PPHGNetV2_B4 |
+| Head | MultiHead (CTCHead + NRTRHead) |
+| Input shape | 3 x 48 x 320 |
+| Character set | 0-9, A-Z, a-z, space (63 classes) |
+| Max text length | 25 |
+| Parameters | 37.3M |
+| Export format | SafeTensors (from PaddlePaddle params) |
+## Summary
+We present a practical, data-centric ANPR framework that achieves
+production-grade accuracy on Indian license plates without reliance on large
+manually annotated datasets or frontier model scale. The same PP-OCRv5
+architecture scores 57.96% out-of-the-box and 98.42% after our data
+engineering pipeline --- demonstrating that the data, not the model, is the
+primary driver of performance in domain-specific OCR.

TechnicalReport.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b4ac0c588ddd43f9e4a1fa81b9b2c2dde62c200ed78abdb90ecf05991b0e0fd
+size 246863

en_dict.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z

images/10_14_68e6fcf21e55ac002f310971_awi_1.jpg ADDED Viewed

images/10_20_68f0b5ea88aefc002f543e60_awi_1.jpg ADDED Viewed

images/10_20_68fc5fb360b026003091b0eb_awi_1.jpg ADDED Viewed

images/10_20_69027403e20fe9002fceedaa_awi_1.jpg ADDED Viewed

images/10_21_68ecddf9925604002f70876a_awi_1.jpg ADDED Viewed

images/9_13_68f45a82b5e646002f924bbd_awi_1.jpg ADDED Viewed

images/9_16_68f325be3a50df002e43a509_awi_1.jpg ADDED Viewed

images/9_25_6901aa035e29cc002f516180_awi_1.jpg ADDED Viewed

images/sample_results.json ADDED Viewed

	@@ -0,0 +1,47 @@

+[
+  {
+    "image": "10_14_68e6fcf21e55ac002f310971_awi_1.jpg",
+    "prediction": "HR12AX8522",
+    "confidence": 0.9996901750564575
+  },
+  {
+    "image": "10_20_68f0b5ea88aefc002f543e60_awi_1.jpg",
+    "prediction": "HR35M2576",
+    "confidence": 0.9492533802986145
+  },
+  {
+    "image": "10_20_68fc5fb360b026003091b0eb_awi_1.jpg",
+    "prediction": "HR34M4007",
+    "confidence": 0.9999865293502808
+  },
+  {
+    "image": "10_20_69027403e20fe9002fceedaa_awi_1.jpg",
+    "prediction": "RJ29GB2097",
+    "confidence": 0.9950124621391296
+  },
+  {
+    "image": "10_21_68ecddf9925604002f70876a_awi_1.jpg",
+    "prediction": "HR38AB421",
+    "confidence": 0.9921688437461853
+  },
+  {
+    "image": "9_13_68f45a82b5e646002f924bbd_awi_1.jpg",
+    "prediction": "DL9CBH1669",
+    "confidence": 0.9998277425765991
+  },
+  {
+    "image": "9_16_68f325be3a50df002e43a509_awi_1.jpg",
+    "prediction": "HR51BV822",
+    "confidence": 0.9994223117828369
+  },
+  {
+    "image": "9_25_6901aa035e29cc002f516180_awi_1.jpg",
+    "prediction": "HR46E227",
+    "confidence": 0.9738591313362122
+  },
+  {
+    "image": "two_row-set-1_1_1420_.jpg",
+    "prediction": "UP14BQ208",
+    "confidence": 0.9955475330352783
+  }
+]

images/two_row-set-1_1_1420_.jpg ADDED Viewed

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9f1264e0c115a239ca6d6700a74ceffab17168b75dafc6c169c232012c68e47
+size 149448448

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+paddlepaddle-gpu>=2.6.0; platform_system != "Darwin"
+paddlepaddle>=2.6.0; platform_system == "Darwin"
+safetensors>=0.4.0
+opencv-python>=4.8.0
+numpy>=1.21.0
+PyYAML>=6.0

sample_results.json ADDED Viewed

	@@ -0,0 +1,47 @@

+[
+  {
+    "image": "10_14_68e6fcf21e55ac002f310971_awi_1.jpg",
+    "prediction": "HR12AX8522",
+    "confidence": 0.9997
+  },
+  {
+    "image": "10_20_68f0b5ea88aefc002f543e60_awi_1.jpg",
+    "prediction": "HR35M2576",
+    "confidence": 0.9493
+  },
+  {
+    "image": "10_20_68fc5fb360b026003091b0eb_awi_1.jpg",
+    "prediction": "HR34M4007",
+    "confidence": 1.0
+  },
+  {
+    "image": "10_20_69027403e20fe9002fceedaa_awi_1.jpg",
+    "prediction": "RJ29GB2097",
+    "confidence": 0.995
+  },
+  {
+    "image": "10_21_68ecddf9925604002f70876a_awi_1.jpg",
+    "prediction": "HR38AB421",
+    "confidence": 0.9922
+  },
+  {
+    "image": "9_13_68f45a82b5e646002f924bbd_awi_1.jpg",
+    "prediction": "DL9CBH1669",
+    "confidence": 0.9998
+  },
+  {
+    "image": "9_16_68f325be3a50df002e43a509_awi_1.jpg",
+    "prediction": "HR51BV822",
+    "confidence": 0.9994
+  },
+  {
+    "image": "9_25_6901aa035e29cc002f516180_awi_1.jpg",
+    "prediction": "HR46E227",
+    "confidence": 0.9739
+  },
+  {
+    "image": "two_row-set-1_1_1420_.jpg",
+    "prediction": "UP14BQ208",
+    "confidence": 0.9955
+  }
+]

test.py ADDED Viewed

	@@ -0,0 +1,266 @@

+"""
+Awiros-ANPR-OCR single-image / directory inference script.
+Usage:
+    pip install -r requirements.txt
+    python test.py --image_path plate.jpg
+    python test.py --image_path plates_dir/ --output_json results.json
+PaddleOCR repo is needed for model construction. On first run the script
+auto-clones it into a PaddleOCR/ subfolder next to this file.
+Pass --paddleocr_dir to point to an existing clone instead.
+"""
+import argparse
+import copy
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+_SCRIPT_DIR = Path(__file__).resolve().parent
+# ---------------------------------------------------------------------------
+# Model architecture config (PP-OCRv5 server rec, SVTR_HGNet)
+# CTC head output: 64 classes (63 dict chars + blank)
+# NRTR head output: 68 classes (64 + bos/eos/pad/unk)
+# ---------------------------------------------------------------------------
+CTC_NUM_CLASSES = 64
+NRTR_NUM_CLASSES = 67  # NRTRHead internally adds +1, so 67 -> 68 to match weights
+MODEL_CONFIG = {
+    "Architecture": {
+        "model_type": "rec",
+        "algorithm": "SVTR_HGNet",
+        "Transform": None,
+        "Backbone": {"name": "PPHGNetV2_B4", "text_rec": True},
+        "Head": {
+            "name": "MultiHead",
+            "out_channels_list": {
+                "CTCLabelDecode": CTC_NUM_CLASSES,
+                "NRTRLabelDecode": NRTR_NUM_CLASSES,
+            },
+            "head_list": [
+                {
+                    "CTCHead": {
+                        "Neck": {
+                            "name": "svtr",
+                            "dims": 120,
+                            "depth": 2,
+                            "hidden_dims": 120,
+                            "kernel_size": [1, 3],
+                            "use_guide": True,
+                        },
+                        "Head": {"fc_decay": 1e-05},
+                    }
+                },
+                {"NRTRHead": {"nrtr_dim": 384, "max_text_length": 25}},
+            ],
+        },
+    },
+}
+IMAGE_SHAPE = [3, 48, 320]
+IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp"}
+# ---------------------------------------------------------------------------
+# PaddleOCR path setup
+# ---------------------------------------------------------------------------
+def _find_paddleocr(explicit_path=None):
+    """Find a directory containing the ppocr package."""
+    candidates = []
+    if explicit_path:
+        candidates.append(Path(explicit_path))
+    candidates += [
+        _SCRIPT_DIR / "PaddleOCR",
+        _SCRIPT_DIR,
+        Path.cwd(),
+        Path.cwd() / "PaddleOCR",
+    ]
+    for c in candidates:
+        if (c / "ppocr" / "__init__.py").is_file():
+            return c
+    return None
+def _ensure_paddleocr(explicit_path=None):
+    """Make ppocr importable. Auto-clones PaddleOCR if not found."""
+    root = _find_paddleocr(explicit_path)
+    if root is None:
+        clone_target = _SCRIPT_DIR / "PaddleOCR"
+        print(f"ppocr not found. Cloning PaddleOCR into {clone_target} ...")
+        subprocess.check_call([
+            "git", "clone", "--depth", "1",
+            "https://github.com/PaddlePaddle/PaddleOCR.git",
+            str(clone_target),
+        ])
+        root = clone_target
+    root_str = str(root)
+    if root_str not in sys.path:
+        sys.path.insert(0, root_str)
+    return root
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def parse_args():
+    p = argparse.ArgumentParser("Awiros-ANPR-OCR inference")
+    p.add_argument("--image_path", required=True,
+                    help="Path to a single image or a directory of images.")
+    p.add_argument("--weights", default="",
+                    help="Path to model.safetensors (default: next to this script).")
+    p.add_argument("--dict_path", default="",
+                    help="Path to en_dict.txt (default: next to this script).")
+    p.add_argument("--device", default="gpu", choices=["gpu", "cpu"],
+                    help="Device for inference.")
+    p.add_argument("--output_json", default="",
+                    help="Optional output JSON path for results.")
+    p.add_argument("--paddleocr_dir", default="",
+                    help="Path to PaddleOCR repo root (auto-cloned if omitted).")
+    return p.parse_args()
+def resolve_path(user_path: str, filename: str) -> str:
+    """Use user-supplied path if it exists, else fall back to script dir."""
+    if user_path and os.path.exists(user_path):
+        return user_path
+    alt = _SCRIPT_DIR / filename
+    if alt.exists():
+        return str(alt)
+    raise FileNotFoundError(
+        f"Could not find {filename}. Place it next to this script or pass its path."
+    )
+def load_safetensors_to_paddle(paddle_mod, weight_path: str):
+    from safetensors.numpy import load_file
+    np_state = load_file(weight_path)
+    return {k: paddle_mod.to_tensor(v) for k, v in np_state.items()}
+def resize_for_rec(img_bgr, target_shape):
+    _, h, w = target_shape
+    img_h, img_w = img_bgr.shape[:2]
+    ratio = h / img_h
+    new_w = min(int(img_w * ratio), w)
+    resized = cv2.resize(img_bgr, (new_w, h))
+    if new_w < w:
+        padded = np.zeros((h, w, 3), dtype=np.uint8)
+        padded[:, :new_w, :] = resized
+        resized = padded
+    return resized
+def preprocess(img_bgr, target_shape):
+    img = resize_for_rec(img_bgr, target_shape)
+    img = img.astype(np.float32) / 255.0
+    img = (img - 0.5) / 0.5
+    return img.transpose((2, 0, 1))
+def collect_images(path: str):
+    p = Path(path)
+    if p.is_file():
+        return [p]
+    if p.is_dir():
+        return sorted(f for f in p.iterdir()
+                       if f.is_file() and f.suffix.lower() in IMAGE_EXTENSIONS)
+    raise FileNotFoundError(f"Path not found: {path}")
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main():
+    args = parse_args()
+    # 1. Ensure ppocr is importable, then import paddle + ppocr
+    _ensure_paddleocr(args.paddleocr_dir or None)
+    import paddle
+    from ppocr.modeling.architectures import build_model as ppocr_build_model
+    from ppocr.postprocess import build_post_process
+    # 2. Device
+    if args.device == "gpu" and not paddle.is_compiled_with_cuda():
+        print("CUDA not available, falling back to CPU.")
+        paddle.set_device("cpu")
+    else:
+        paddle.set_device(args.device)
+    # 3. Resolve file paths
+    weights_path = resolve_path(args.weights, "model.safetensors")
+    dict_path = resolve_path(args.dict_path, "en_dict.txt")
+    # 4. Build CTC post-processor
+    post_process = build_post_process({
+        "name": "CTCLabelDecode",
+        "character_dict_path": dict_path,
+        "use_space_char": True,
+    })
+    # 5. Build model and load weights
+    config = copy.deepcopy(MODEL_CONFIG)
+    model = ppocr_build_model(config["Architecture"])
+    model.eval()
+    state_dict = load_safetensors_to_paddle(paddle, weights_path)
+    model.set_state_dict(state_dict)
+    print(f"Loaded weights from {weights_path}")
+    # 6. Run inference
+    image_paths = collect_images(args.image_path)
+    print(f"Found {len(image_paths)} image(s)\n")
+    results = []
+    for img_path in image_paths:
+        img_bgr = cv2.imread(str(img_path))
+        if img_bgr is None:
+            print(f"WARNING: Could not read {img_path}, skipping.")
+            continue
+        tensor = paddle.to_tensor(
+            np.expand_dims(preprocess(img_bgr, IMAGE_SHAPE), axis=0)
+        )
+        with paddle.no_grad():
+            preds = model(tensor)
+        if isinstance(preds, dict):
+            pred_tensor = preds.get("ctc", next(iter(preds.values())))
+        elif isinstance(preds, (list, tuple)):
+            pred_tensor = preds[0]
+        else:
+            pred_tensor = preds
+        post_result = post_process(pred_tensor.numpy())
+        if isinstance(post_result, (list, tuple)) and len(post_result) > 0:
+            text, confidence = post_result[0]
+        else:
+            text, confidence = "", 0.0
+        text = text.strip().upper()
+        result = {
+            "image": str(img_path.name),
+            "prediction": text,
+            "confidence": round(float(confidence), 4),
+        }
+        results.append(result)
+        print(f"  {img_path.name}: {text}  (conf: {confidence:.4f})")
+    # 7. Save JSON
+    if args.output_json:
+        out_path = Path(args.output_json)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(json.dumps(results, indent=2))
+        print(f"\nResults saved to {out_path}")
+if __name__ == "__main__":
+    main()