JoyCN commited on
Commit
aae75f0
·
verified ·
1 Parent(s): a4d720a

Reorg: move v2/v3/v4 pth to legacy/, English README as default (+ README_zh.md), update config.json

Browse files

- moved 5 legacy pth files (v2/v3/v4) from repo root to legacy/ via in-repo LFS copy
- deleted two obsolete root yml (superseded by configs/rec/PP-OCRv5/*.yml)
- replaced README.md with English primary version (zh copy kept as README_zh.md)
- updated config.json to reflect the full 15-model v5 catalog + legacy map

No PP-OCRv5 safetensors / yml / dict URLs changed.

PP-OCRv5_server_det.yml DELETED
@@ -1,174 +0,0 @@
1
- Global:
2
- model_name: PP-OCRv5_server_det # To use static model for inference.
3
- debug: false
4
- use_gpu: true
5
- epoch_num: &epoch_num 500
6
- log_smooth_window: 20
7
- print_batch_step: 10
8
- save_model_dir: ./output/PP-OCRv5_server_det
9
- save_epoch_step: 10
10
- eval_batch_step:
11
- - 0
12
- - 1500
13
- cal_metric_during_train: false
14
- checkpoints:
15
- pretrained_model: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PPHGNetV2_B4_ocr_det.pdparams
16
- save_inference_dir: null
17
- use_visualdl: false
18
- infer_img: doc/imgs_en/img_10.jpg
19
- save_res_path: ./checkpoints/det_db/predicts_db.txt
20
- distributed: true
21
-
22
- Architecture:
23
- model_type: det
24
- algorithm: DB
25
- Transform: null
26
- Backbone:
27
- name: PPHGNetV2_B4
28
- det: True
29
- Neck:
30
- name: LKPAN
31
- out_channels: 256
32
- intracl: true
33
- Head:
34
- name: PFHeadLocal
35
- k: 50
36
- mode: "large"
37
-
38
-
39
- Loss:
40
- name: DBLoss
41
- balance_loss: true
42
- main_loss_type: DiceLoss
43
- alpha: 5
44
- beta: 10
45
- ohem_ratio: 3
46
-
47
- Optimizer:
48
- name: Adam
49
- beta1: 0.9
50
- beta2: 0.999
51
- lr:
52
- name: Cosine
53
- learning_rate: 0.001 #(8*8c)
54
- warmup_epoch: 2
55
- regularizer:
56
- name: L2
57
- factor: 1e-6
58
-
59
- PostProcess:
60
- name: DBPostProcess
61
- thresh: 0.3
62
- box_thresh: 0.6
63
- max_candidates: 1000
64
- unclip_ratio: 1.5
65
-
66
- Metric:
67
- name: DetMetric
68
- main_indicator: hmean
69
-
70
- Train:
71
- dataset:
72
- name: SimpleDataSet
73
- data_dir: ./train_data/icdar2015/text_localization/
74
- label_file_list:
75
- - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
76
- ratio_list: [1.0]
77
- transforms:
78
- - DecodeImage:
79
- img_mode: BGR
80
- channel_first: false
81
- - DetLabelEncode: null
82
- - CopyPaste: null
83
- - IaaAugment:
84
- augmenter_args:
85
- - type: Fliplr
86
- args:
87
- p: 0.5
88
- - type: Affine
89
- args:
90
- rotate:
91
- - -10
92
- - 10
93
- - type: Resize
94
- args:
95
- size:
96
- - 0.5
97
- - 3
98
- - EastRandomCropData:
99
- size:
100
- - 640
101
- - 640
102
- max_tries: 50
103
- keep_ratio: true
104
- - MakeBorderMap:
105
- shrink_ratio: 0.4
106
- thresh_min: 0.3
107
- thresh_max: 0.7
108
- total_epoch: *epoch_num
109
- - MakeShrinkMap:
110
- shrink_ratio: 0.4
111
- min_text_size: 8
112
- total_epoch: *epoch_num
113
- - NormalizeImage:
114
- scale: 1./255.
115
- mean:
116
- - 0.485
117
- - 0.456
118
- - 0.406
119
- std:
120
- - 0.229
121
- - 0.224
122
- - 0.225
123
- order: hwc
124
- - ToCHWImage: null
125
- - KeepKeys:
126
- keep_keys:
127
- - image
128
- - threshold_map
129
- - threshold_mask
130
- - shrink_map
131
- - shrink_mask
132
- loader:
133
- shuffle: true
134
- drop_last: false
135
- batch_size_per_card: 8
136
- num_workers: 8
137
-
138
- Eval:
139
- dataset:
140
- name: SimpleDataSet
141
- data_dir: ./train_data/icdar2015/text_localization/
142
- label_file_list:
143
- - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
144
- transforms:
145
- transforms:
146
- - DecodeImage:
147
- img_mode: BGR
148
- channel_first: false
149
- - DetLabelEncode: null
150
- - DetResizeForTest:
151
- - NormalizeImage:
152
- scale: 1./255.
153
- mean:
154
- - 0.485
155
- - 0.456
156
- - 0.406
157
- std:
158
- - 0.229
159
- - 0.224
160
- - 0.225
161
- order: hwc
162
- - ToCHWImage: null
163
- - KeepKeys:
164
- keep_keys:
165
- - image
166
- - shape
167
- - polys
168
- - ignore_tags
169
- loader:
170
- shuffle: false
171
- drop_last: false
172
- batch_size_per_card: 1
173
- num_workers: 2
174
- profiler_options: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
PP-OCRv5_server_rec.yml DELETED
@@ -1,136 +0,0 @@
1
- Global:
2
- model_name: PP-OCRv5_server_rec # To use static model for inference.
3
- debug: false
4
- use_gpu: true
5
- epoch_num: 75
6
- log_smooth_window: 20
7
- print_batch_step: 10
8
- save_model_dir: ./output/PP-OCRv5_server_rec
9
- save_epoch_step: 1
10
- eval_batch_step: [0, 2000]
11
- cal_metric_during_train: true
12
- calc_epoch_interval: 1
13
- pretrained_model:
14
- checkpoints:
15
- save_inference_dir:
16
- use_visualdl: false
17
- infer_img: doc/imgs_words/ch/word_1.jpg
18
- character_dict_path: ./pytorchocr/utils/dict/ppocrv5_dict.txt
19
- max_text_length: &max_text_length 25
20
- infer_mode: false
21
- use_space_char: true
22
- distributed: true
23
- save_res_path: ./output/rec/predicts_ppocrv5.txt
24
- d2s_train_image_shape: [3, 48, 320]
25
-
26
-
27
- Optimizer:
28
- name: Adam
29
- beta1: 0.9
30
- beta2: 0.999
31
- lr:
32
- name: Cosine
33
- learning_rate: 0.0005
34
- warmup_epoch: 1
35
- regularizer:
36
- name: L2
37
- factor: 3.0e-05
38
-
39
-
40
- Architecture:
41
- model_type: rec
42
- algorithm: SVTR_HGNet
43
- Transform:
44
- Backbone:
45
- name: PPHGNetV2_B4
46
- text_rec: True
47
- Head:
48
- name: MultiHead
49
- head_list:
50
- - CTCHead:
51
- Neck:
52
- name: svtr
53
- dims: 120
54
- depth: 2
55
- hidden_dims: 120
56
- kernel_size: [1, 3]
57
- use_guide: True
58
- Head:
59
- fc_decay: 0.00001
60
- - NRTRHead:
61
- nrtr_dim: 384
62
- max_text_length: *max_text_length
63
-
64
- Loss:
65
- name: MultiLoss
66
- loss_config_list:
67
- - CTCLoss:
68
- - NRTRLoss:
69
-
70
- PostProcess:
71
- name: CTCLabelDecode
72
-
73
- Metric:
74
- name: RecMetric
75
- main_indicator: acc
76
-
77
- Train:
78
- dataset:
79
- name: MultiScaleDataSet
80
- ds_width: false
81
- data_dir: ./train_data/
82
- ext_op_transform_idx: 1
83
- label_file_list:
84
- - ./train_data/train_list.txt
85
- transforms:
86
- - DecodeImage:
87
- img_mode: BGR
88
- channel_first: false
89
- - RecAug:
90
- - MultiLabelEncode:
91
- gtc_encode: NRTRLabelEncode
92
- - KeepKeys:
93
- keep_keys:
94
- - image
95
- - label_ctc
96
- - label_gtc
97
- - length
98
- - valid_ratio
99
- sampler:
100
- name: MultiScaleSampler
101
- scales: [[320, 32], [320, 48], [320, 64]]
102
- first_bs: &bs 128
103
- fix_bs: false
104
- divided_factor: [8, 16] # w, h
105
- is_training: True
106
- loader:
107
- shuffle: true
108
- batch_size_per_card: *bs
109
- drop_last: true
110
- num_workers: 16
111
- Eval:
112
- dataset:
113
- name: SimpleDataSet
114
- data_dir: ./train_data
115
- label_file_list:
116
- - ./train_data/val_list.txt
117
- transforms:
118
- - DecodeImage:
119
- img_mode: BGR
120
- channel_first: false
121
- - MultiLabelEncode:
122
- gtc_encode: NRTRLabelEncode
123
- - RecResizeImg:
124
- image_shape: [3, 48, 320]
125
- - KeepKeys:
126
- keep_keys:
127
- - image
128
- - label_ctc
129
- - label_gtc
130
- - length
131
- - valid_ratio
132
- loader:
133
- shuffle: false
134
- drop_last: false
135
- batch_size_per_card: 128
136
- num_workers: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -28,116 +28,128 @@ pipeline_tag: image-to-text
28
 
29
  # PP-OCRv5 PyTorch Model Zoo
30
 
31
- PP-OCRv5 全系列模型的 **PyTorch** 版本(safetensors 格式),从百度 PaddlePaddle 官方 `.pdparams` 动态图权重精确转换而来,**推理结果与 PaddleOCR 原版位精确一致**
32
 
33
- - **文本检测**2 个(mobile / server
34
- - **文本识别(基础)**2 个,覆盖 简中 / 繁中 / 英文 / 日文
35
- - **文本识别(多语言)**11 个,覆盖 100+ 语种(韩 / / / / 阿拉伯 / 天城文 / 泰 / 希腊 / 泰米尔 / 泰卢固 / 纯英文等)
36
 
37
- > 本仓库**仅包含权重、配置和字典**,不包含推理代码。推理请配合 [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 使用,或参考下文"自定义 Python 推理"章节自行集成。
 
 
38
 
39
  ---
40
 
41
- ## 仓库结构
42
 
43
  ```
44
  .
45
- ├── README.md
46
- ├── LICENSE # Apache 2.0
47
- ├── *.safetensors # 15 PP-OCRv5 权重(位于根目录)
 
 
 
 
48
  ├── configs/
49
  │ ├── det/PP-OCRv5/
50
- │ │ ├── PP-OCRv5_mobile_det.yml # 移动端检测
51
- │ │ └── PP-OCRv5_server_det.yml # 服务端检测
52
  │ └── rec/PP-OCRv5/
53
- │ ├── PP-OCRv5_mobile_rec.yml # 基础识别(中繁英日,移动端)
54
- │ ├── PP-OCRv5_server_rec.yml # 基础识别(中繁英日,服务端)
55
- │ └── multi_language/
56
- │ ├── en_PP-OCRv5_mobile_rec.yaml # 英文专用
57
- │ ├── korean_PP-OCRv5_mobile_rec.yml # 韩文 + 英文
58
- │ ├── latin_PP-OCRv5_mobile_rec.yml # 拉丁字母 40+ 语种(法//西/意/葡 等)
59
- │ ├── eslav_PP-OCRv5_mobile_rec.yml # 东斯拉夫(俄/白俄/乌克兰)
60
- │ ├── cyrillic_PP-OCRv5_mobile_rec.yaml # 西里尔字母 33
61
- │ ├── arabic_PP-OCRv5_mobile_rec.yaml # 阿拉伯 / 波斯 / 维吾尔 / 乌尔都
62
- │ ├── devanagari_PP-OCRv5_mobile_rec.yaml # 天城文系 14 种(印地/马拉地/尼泊尔/梵文 等)
63
- │ ├── th_PP-OCRv5_mobile_rec.yaml # 泰文
64
- │ ├── el_PP-OCRv5_mobile_rec.yaml # 希腊文
65
- │ ├── ta_PP-OCRv5_mobile_rec.yaml # 泰米尔文
66
- │ └── te_PP-OCRv5_mobile_rec.yaml # 泰卢固文
67
- └── dicts/ # 字符集字典(rec 推理必需)
68
- ├── ppocrv5_dict.txt # 基础(中繁英日)
69
- ├── ppocrv5_en_dict.txt
70
- ── ppocrv5_korean_dict.txt
71
- └── ...(共 12 个)
 
 
 
 
 
 
72
  ```
73
 
74
- > 所有 rec yaml `character_dict_path` 已改写为相对路径 `./dicts/...``git clone` `snapshot_download` 下载后**无需修改路径**即可使用。
75
 
76
  ---
77
 
78
- ## 模型清单
79
 
80
- ### 文本检测
81
 
82
- | 权重文件 | 对应 yaml | 场景 | 文件大小 |
83
  |---|---|---|---|
84
- | `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` | 移动端 / CPU 推荐 | ~14 MB |
85
- | `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` | 服务端 / 高精度 | ~101 MB |
86
 
87
- ### 文本识别(基础)
88
 
89
- | 权重文件 | 对应 yaml | 支持语种 | 文件大小 |
90
  |---|---|---|---|
91
- | `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~31 MB |
92
- | `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~128 MB |
93
 
94
- ### 文本识别(多语言)
95
 
96
- 所有多语言识别模型共享相同网络(`SVTR_LCNet` + `PPLCNetV3`),仅字符集不同。文件大小 23–28 MB
97
 
98
- | 权重文件 | 支持语种 |
99
  |---|---|
100
- | `ptocr_v5_en_mobile_rec.safetensors` | 英文专用(针对英文场景定向优化) |
101
- | `ptocr_v5_korean_mobile_rec.safetensors` | 韩文、英文 |
102
- | `ptocr_v5_latin_mobile_rec.safetensors` | 法文、德文、南非荷兰文、意大利文、西班牙文、葡萄牙文、捷克文、丹麦文、爱沙尼亚文、克罗地亚文、荷兰文、挪威文、波兰文、瑞典文、芬兰文、土耳其文、越南文 40+ 语种 |
103
- | `ptocr_v5_eslav_mobile_rec.safetensors` | 俄罗斯文、白俄罗斯文、乌克兰文、英文 |
104
- | `ptocr_v5_cyrillic_mobile_rec.safetensors` | 俄文、白俄文、乌克兰文、塞尔维亚(西里尔)、保加利亚、蒙古 33 种西里尔字母语言 |
105
- | `ptocr_v5_arabic_mobile_rec.safetensors` | 阿拉伯文、波斯文、维吾尔文、乌尔都文、普什图文、信德文 |
106
- | `ptocr_v5_devanagari_mobile_rec.safetensors` | 印地文、马拉地文、尼泊尔文、梵文 14 种天城文系语言 |
107
- | `ptocr_v5_th_mobile_rec.safetensors` | 泰文、英文 |
108
- | `ptocr_v5_el_mobile_rec.safetensors` | 希腊文、英文 |
109
- | `ptocr_v5_ta_mobile_rec.safetensors` | 泰米尔文、英文 |
110
- | `ptocr_v5_te_mobile_rec.safetensors` | 泰卢固文、英文 |
111
 
112
  ---
113
 
114
- ## 快速开始
115
 
116
- ### 下载权重
117
 
118
  ```python
119
  from huggingface_hub import snapshot_download, hf_hub_download
120
 
121
- # 方式 1:下载整个仓库(权重 + yml + 字典 + README
122
  repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
123
- print("仓库下载到:", repo_dir)
124
 
125
- # 方式 2:只下载单个权重文件
126
  weight_path = hf_hub_download(
127
  repo_id="JoyCN/PaddleOCR-Pytorch",
128
- filename="ptocr_v5_korean_mobile_rec.safetensors"
129
  )
130
  ```
131
 
132
- ### 使用 PaddleOCR2Pytorch 项目做推理(推荐)
133
 
134
  ```bash
135
- # 1. clone 推理代码仓
136
  git clone https://github.com/frotms/PaddleOCR2Pytorch
137
  cd PaddleOCR2Pytorch
138
  pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
139
 
140
- # 2. 用本仓库下载的权重 + yml(假设下载到 /path/to/hf_repo
141
  python tools/infer/predict_rec.py \
142
  --image_dir doc/imgs_words/korean/1.jpg \
143
  --rec_algorithm SVTR_LCNet \
@@ -148,36 +160,35 @@ python tools/infer/predict_rec.py \
148
  --use_gpu False
149
  ```
150
 
151
- > PaddleOCR2Pytorch `base_ocr_v20.py` 已原生支持 `.safetensors`(按后缀自动识别,向后兼容 `.pth`)。
152
 
153
- ### 自定义 Python 推理代码
154
 
155
- 如果你不想依赖 PaddleOCR2Pytorch 完整推理栈,下面是**一个最小 rec 推理代码片段**的骨架。它展示了如何加载权重并做前向推理——但你仍然需要 PaddleOCR2Pytorch 项目中的网络定义代码(`pytorchocr/modeling/`)。
156
 
157
  ```python
158
  import sys, numpy as np, cv2, torch, yaml
159
  from safetensors.torch import load_file
160
 
161
- # 以下 import 需要你先 clone https://github.com/frotms/PaddleOCR2Pytorch
162
- # 并把其根目录加入 PYTHONPATH
163
  sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
164
  from pytorchocr.modeling.architectures.base_model import BaseModel
165
  from pytorchocr.postprocess import build_post_process
166
 
167
- HF_REPO = "/path/to/hf_repo" # snapshot_download 得到的路径
168
  yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
169
  weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
170
 
171
- # 1. 读配置 + 字符集
172
  with open(yml_path, encoding="utf-8") as f:
173
  cfg = yaml.safe_load(f)
174
- dict_path = cfg["Global"]["character_dict_path"] # './dicts/ppocrv5_korean_dict.txt'
175
  dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
176
  with open(dict_abs, encoding="utf-8") as f:
177
  chars = [l.strip("\n\r") for l in f]
178
- n_char = len(chars) + 2 # +1 blank, +1 space(依 use_space_char 而定)
179
 
180
- # 2. 构建网络 + 加载权重(safetensors 零代码执行、mmap 快速加载)
181
  cfg["Architecture"]["Head"]["out_channels_list"] = {
182
  "CTCLabelDecode": n_char,
183
  "SARLabelDecode": n_char + 2,
@@ -187,7 +198,7 @@ net = BaseModel(cfg["Architecture"], out_channels=n_char)
187
  net.load_state_dict(load_file(weight_path, device="cpu"))
188
  net.eval()
189
 
190
- # 3. 读图 + 预处理(resize [3, 48, 320],归一化到 [-1, 1]
191
  img = cv2.imread("input_word.jpg")
192
  h, w = img.shape[:2]
193
  ratio = w / h
@@ -199,56 +210,77 @@ x = canvas.astype(np.float32).transpose(2, 0, 1) / 255.0
199
  x = (x - 0.5) / 0.5
200
  x = torch.from_numpy(x).unsqueeze(0)
201
 
202
- # 4. 前向 + CTC 解码
203
  with torch.no_grad():
204
  logits = net(x)
205
- post_op = build_post_process({"name": "CTCLabelDecode",
206
- "character_dict_path": dict_abs,
207
- "use_space_char": True})
 
 
208
  result = post_op(logits)
209
- print("识别结果:", result) # e.g. [('바탕으로', 0.9998)]
210
  ```
211
 
212
- ### 推理所需依赖
213
 
214
  ```
215
- torch >= 1.13
216
- safetensors >= 0.4
217
  numpy, pillow, opencv-python
218
  pyyaml, shapely, pyclipper
219
- scikit-image # det 后处理需要
220
  ```
221
 
222
  ---
223
 
224
- ## 转换 & 验证来源
225
 
226
- - 源权重:PaddlePaddle 官方 `.pdparams`,来自 [paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/](https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/)
227
- - 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 中的 `converter/ppocr_v5_det_converter.py` / `ppocr_v5_rec_converter.py`
228
- - 验证:在 macOS Apple Silicon (M 系列) CPU 环境下做过端到端推理,**多语言识别结果与 PaddleOCR 官方 `.pdparams` 位精确一致**(float32 小数点后 8 位完全相同)
 
229
 
230
- 样例推理输出(CPU<0.7 s / 张):
231
 
232
- | 样例 | 识别结果 | 置信度 |
233
  |---|---|---|
234
- | 中文 `word_1.jpg` | 韩国小馆 | 0.99797755 |
235
- | 韩文 `korean/1.jpg` | 바탕으로 | 0.99977183 |
236
- | 法文 `french/1.jpg` | de l'amendement, | 0.99656343 |
237
- | 阿拉伯 `arabic/ar_1.jpg` | الكيصياوي | 0.68281130 |
238
 
239
  ---
240
 
241
- ## 许可证 & 致谢
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  - **License**: Apache License 2.0
244
- - 权重来源:[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) by PaddlePaddle 团队,Apache 2.0
245
- - 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch)Apache 2.0
246
 
247
- 如果本仓库对你有帮助,请同时给上述两个原始项目 star 致谢。
248
 
249
  ---
250
 
251
- ## 引用
252
 
253
  ```bibtex
254
  @misc{pp_ocrv5_pytorch_joycn_2025,
 
28
 
29
  # PP-OCRv5 PyTorch Model Zoo
30
 
31
+ PyTorch weights (**safetensors** format) for the full **PP-OCRv5** family, converted bit-exactly from the official PaddlePaddle `.pdparams` dynamic-graph weights — inference outputs are **identical to the original PaddleOCR** down to float32 precision.
32
 
33
+ - **Text Detection**: 2 models (mobile / server)
34
+ - **Text Recognition (base)**: 2 models covering Simplified Chinese / Traditional Chinese / English / Japanese
35
+ - **Text Recognition (multilingual)**: 11 models covering **100+ languages** (Korean, French, German, Russian, Arabic, Devanagari, Thai, Greek, Tamil, Telugu, etc.)
36
 
37
+ > This repo contains **weights + configs + dictionaries only**, not inference code. For inference, use [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch), or follow the "Custom Python Inference" section below.
38
+ >
39
+ > Also available: [README_zh.md](./README_zh.md) (中文版).
40
 
41
  ---
42
 
43
+ ## Repository Layout
44
 
45
  ```
46
  .
47
+ ├── README.md / README_zh.md
48
+ ├── LICENSE # Apache 2.0
49
+ ├── config.json # Repo metadata + model index
50
+
51
+ ├── ptocr_v5_*.safetensors # 15 PP-OCRv5 weights at root (stable URLs)
52
+ ├── ptocr_v5_server_{det,rec}.pth # Legacy pth copies of V5 server (kept)
53
+
54
  ├── configs/
55
  │ ├── det/PP-OCRv5/
56
+ │ │ ├── PP-OCRv5_mobile_det.yml
57
+ │ │ └── PP-OCRv5_server_det.yml
58
  │ └── rec/PP-OCRv5/
59
+ │ ├── PP-OCRv5_mobile_rec.yml # zh / zh-Hant / en / ja
60
+ │ ├── PP-OCRv5_server_rec.yml
61
+ │ └── multi_language/ # 11 multilingual rec yamls
62
+ │ ├── en_PP-OCRv5_mobile_rec.yaml
63
+ │ ├── korean_PP-OCRv5_mobile_rec.yml
64
+ │ ├── latin_PP-OCRv5_mobile_rec.yml # French / German / Spanish / ... (40+ Latin-script)
65
+ │ ├── eslav_PP-OCRv5_mobile_rec.yml # Russian / Belarusian / Ukrainian
66
+ │ ├── cyrillic_PP-OCRv5_mobile_rec.yaml # 33 Cyrillic-script languages
67
+ │ ├── arabic_PP-OCRv5_mobile_rec.yaml # Arabic / Persian / Uyghur / Urdu / ...
68
+ │ ├── devanagari_PP-OCRv5_mobile_rec.yaml # Hindi / Marathi / Nepali / Sanskrit / ...
69
+ │ ├── th_PP-OCRv5_mobile_rec.yaml # Thai
70
+ │ ├── el_PP-OCRv5_mobile_rec.yaml # Greek
71
+ │ ├── ta_PP-OCRv5_mobile_rec.yaml # Tamil
72
+ │ └── te_PP-OCRv5_mobile_rec.yaml # Telugu
73
+
74
+ ├── dicts/ # Character set dictionaries (required for rec)
75
+ ├── ppocrv5_dict.txt # base (zh / zh-Hant / en / ja)
76
+ │ └── ppocrv5_<lang>_dict.txt # 11 multilingual dicts
77
+
78
+ └── legacy/ # Older PP-OCR v2/v3/v4 weights (kept for back-compat)
79
+ ├── ch_ptocr_mobile_v2.0_cls_infer.pth
80
+ ├── ch_ptocr_v4_det_infer.pth
81
+ ├── ch_ptocr_v4_rec_infer.pth
82
+ ├── en_ptocr_v3_det_infer.pth
83
+ └── en_ptocr_v4_rec_infer.pth
84
  ```
85
 
86
+ > All rec yamls use relative `character_dict_path: ./dicts/...`. After `git clone` or `snapshot_download`, paths resolve correctly with **no modification required**.
87
 
88
  ---
89
 
90
+ ## Model Catalog
91
 
92
+ ### Text Detection
93
 
94
+ | Weight | Config | Use case | Size |
95
  |---|---|---|---|
96
+ | `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` | Mobile / CPU-friendly | ~14 MB |
97
+ | `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` | Server / high-accuracy | ~101 MB |
98
 
99
+ ### Text Recognition (Base)
100
 
101
+ | Weight | Config | Languages | Size |
102
  |---|---|---|---|
103
+ | `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` | Simplified / Traditional Chinese, English, Japanese | ~31 MB |
104
+ | `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` | same as above, higher accuracy | ~128 MB |
105
 
106
+ ### Text Recognition (Multilingual)
107
 
108
+ All multilingual rec models share the same architecture (`SVTR_LCNet` + `PPLCNetV3`); they differ only by character dictionary. File size 23–28 MB each.
109
 
110
+ | Weight | Supported languages |
111
  |---|---|
112
+ | `ptocr_v5_en_mobile_rec.safetensors` | English (dedicated model optimized for English-only scenarios) |
113
+ | `ptocr_v5_korean_mobile_rec.safetensors` | Korean, English |
114
+ | `ptocr_v5_latin_mobile_rec.safetensors` | French, German, Spanish, Italian, Portuguese, Dutch, Swedish, Danish, Norwegian, Finnish, Polish, Czech, Turkish, Vietnamese, ... (40+ Latin-script) |
115
+ | `ptocr_v5_eslav_mobile_rec.safetensors` | Russian, Belarusian, Ukrainian, English |
116
+ | `ptocr_v5_cyrillic_mobile_rec.safetensors` | 33 Cyrillic-script languages (Russian, Serbian-Cyrillic, Bulgarian, Mongolian, Kazakh, ...) |
117
+ | `ptocr_v5_arabic_mobile_rec.safetensors` | Arabic, Persian, Uyghur, Urdu, Pashto, Sindhi, ... |
118
+ | `ptocr_v5_devanagari_mobile_rec.safetensors` | 14 Devanagari-script languages (Hindi, Marathi, Nepali, Sanskrit, ...) |
119
+ | `ptocr_v5_th_mobile_rec.safetensors` | Thai, English |
120
+ | `ptocr_v5_el_mobile_rec.safetensors` | Greek, English |
121
+ | `ptocr_v5_ta_mobile_rec.safetensors` | Tamil, English |
122
+ | `ptocr_v5_te_mobile_rec.safetensors` | Telugu, English |
123
 
124
  ---
125
 
126
+ ## Quick Start
127
 
128
+ ### Download Weights
129
 
130
  ```python
131
  from huggingface_hub import snapshot_download, hf_hub_download
132
 
133
+ # Option 1: download the whole repo (weights + configs + dicts + README)
134
  repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
135
+ print("downloaded to:", repo_dir)
136
 
137
+ # Option 2: fetch a single weight file
138
  weight_path = hf_hub_download(
139
  repo_id="JoyCN/PaddleOCR-Pytorch",
140
+ filename="ptocr_v5_korean_mobile_rec.safetensors",
141
  )
142
  ```
143
 
144
+ ### Inference via PaddleOCR2Pytorch (Recommended)
145
 
146
  ```bash
147
+ # 1. clone the inference code repo
148
  git clone https://github.com/frotms/PaddleOCR2Pytorch
149
  cd PaddleOCR2Pytorch
150
  pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
151
 
152
+ # 2. Assume you ran snapshot_download above into /path/to/hf_repo
153
  python tools/infer/predict_rec.py \
154
  --image_dir doc/imgs_words/korean/1.jpg \
155
  --rec_algorithm SVTR_LCNet \
 
160
  --use_gpu False
161
  ```
162
 
163
+ > PaddleOCR2Pytorch `base_ocr_v20.py` auto-detects `.safetensors` vs `.pth` by extension (backward compatible).
164
 
165
+ ### Custom Python Inference
166
 
167
+ A minimal skeleton showing how to load the weights and run a forward pass. You still need the network definitions from the PaddleOCR2Pytorch `pytorchocr/modeling/` package.
168
 
169
  ```python
170
  import sys, numpy as np, cv2, torch, yaml
171
  from safetensors.torch import load_file
172
 
173
+ # Requires https://github.com/frotms/PaddleOCR2Pytorch on PYTHONPATH
 
174
  sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
175
  from pytorchocr.modeling.architectures.base_model import BaseModel
176
  from pytorchocr.postprocess import build_post_process
177
 
178
+ HF_REPO = "/path/to/hf_repo" # the path returned by snapshot_download
179
  yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
180
  weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
181
 
182
+ # 1. load config + dictionary
183
  with open(yml_path, encoding="utf-8") as f:
184
  cfg = yaml.safe_load(f)
185
+ dict_path = cfg["Global"]["character_dict_path"] # './dicts/ppocrv5_korean_dict.txt'
186
  dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
187
  with open(dict_abs, encoding="utf-8") as f:
188
  chars = [l.strip("\n\r") for l in f]
189
+ n_char = len(chars) + 2 # +1 blank, +1 space (if use_space_char)
190
 
191
+ # 2. build network + load weights (safetensors = zero-code-exec, mmap-fast)
192
  cfg["Architecture"]["Head"]["out_channels_list"] = {
193
  "CTCLabelDecode": n_char,
194
  "SARLabelDecode": n_char + 2,
 
198
  net.load_state_dict(load_file(weight_path, device="cpu"))
199
  net.eval()
200
 
201
+ # 3. preprocess (resize to [3, 48, 320], normalize to [-1, 1])
202
  img = cv2.imread("input_word.jpg")
203
  h, w = img.shape[:2]
204
  ratio = w / h
 
210
  x = (x - 0.5) / 0.5
211
  x = torch.from_numpy(x).unsqueeze(0)
212
 
213
+ # 4. forward + CTC decode
214
  with torch.no_grad():
215
  logits = net(x)
216
+ post_op = build_post_process({
217
+ "name": "CTCLabelDecode",
218
+ "character_dict_path": dict_abs,
219
+ "use_space_char": True,
220
+ })
221
  result = post_op(logits)
222
+ print("prediction:", result) # e.g. [('바탕으로', 0.9998)]
223
  ```
224
 
225
+ ### Runtime Dependencies
226
 
227
  ```
228
+ torch >= 1.13
229
+ safetensors >= 0.4
230
  numpy, pillow, opencv-python
231
  pyyaml, shapely, pyclipper
232
+ scikit-image # required by det post-processing
233
  ```
234
 
235
  ---
236
 
237
+ ## Conversion & Verification
238
 
239
+ - **Source weights**: official PaddlePaddle `.pdparams` from
240
+ `https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/`
241
+ - **Converter**: [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) scripts `converter/ppocr_v5_det_converter.py` / `converter/ppocr_v5_rec_converter.py`
242
+ - **Verification**: end-to-end inference was run on macOS Apple Silicon (M-series) CPU; multilingual rec outputs are **bit-exact** with the original PaddleOCR `.pdparams` (float32 values match to 8 decimal places).
243
 
244
+ Sample inference results (CPU, < 0.7 s / image):
245
 
246
+ | Sample | Prediction | Confidence |
247
  |---|---|---|
248
+ | Chinese `word_1.jpg` | 韩国小馆 | 0.99797755 |
249
+ | Korean `korean/1.jpg` | 바탕으로 | 0.99977183 |
250
+ | French `french/1.jpg` | de l'amendement, | 0.99656343 |
251
+ | Arabic `arabic/ar_1.jpg` | الكيصياوي | 0.68281130 |
252
 
253
  ---
254
 
255
+ ## Legacy Files (`legacy/`)
256
+
257
+ Older PP-OCR (v2 / v3 / v4) checkpoints previously at the repo root have been **moved into `legacy/`** for clarity. They are still present and continue to work — just add the `legacy/` prefix to your path.
258
+
259
+ If you were previously using any of these URLs at the root:
260
+
261
+ ```
262
+ legacy/ch_ptocr_mobile_v2.0_cls_infer.pth
263
+ legacy/ch_ptocr_v4_det_infer.pth
264
+ legacy/ch_ptocr_v4_rec_infer.pth
265
+ legacy/en_ptocr_v3_det_infer.pth
266
+ legacy/en_ptocr_v4_rec_infer.pth
267
+ ```
268
+
269
+ The 15 PP-OCRv5 safetensors files **remain at the repo root** — their URLs did not change.
270
+
271
+ ---
272
+
273
+ ## License & Credits
274
 
275
  - **License**: Apache License 2.0
276
+ - Weights originate from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) by the PaddlePaddle team (Apache 2.0).
277
+ - Converted with [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) (Apache 2.0).
278
 
279
+ If this repo helps you, please also star both of those original projects.
280
 
281
  ---
282
 
283
+ ## Citation
284
 
285
  ```bibtex
286
  @misc{pp_ocrv5_pytorch_joycn_2025,
README_zh.md ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - zh
5
+ - en
6
+ - ja
7
+ - ko
8
+ - fr
9
+ - de
10
+ - es
11
+ - ru
12
+ - ar
13
+ - hi
14
+ - th
15
+ - el
16
+ library_name: pytorch
17
+ tags:
18
+ - ocr
19
+ - text-detection
20
+ - text-recognition
21
+ - paddleocr
22
+ - pp-ocrv5
23
+ - multilingual
24
+ - svtr
25
+ - db
26
+ pipeline_tag: image-to-text
27
+ ---
28
+
29
+ # PP-OCRv5 PyTorch Model Zoo(中文版)
30
+
31
+ > 本仓库的主 README 为英文版 [README.md](./README.md)。本文件为中文对照版。
32
+
33
+ PP-OCRv5 全系列模型的 **PyTorch** 版本(safetensors 格式),从百度 PaddlePaddle 官方 `.pdparams` 动态图权重精确转换而来,**推理结果与 PaddleOCR 原版位精确一致**。
34
+
35
+ - **文本检测**:2 个(mobile / server)
36
+ - **文本识别(基础)**:2 个,覆盖 简中 / 繁中 / 英文 / 日文
37
+ - **文本识别(多语言)**:11 个,覆盖 100+ 语种(韩 / 法 / 德 / 俄 / 阿拉伯 / 天城文 / 泰 / 希腊 / 泰米尔 / 泰卢固 / 纯英文等)
38
+
39
+ > 本仓库**仅包含权重、配置和字典**,不包含推理代码。推理请配合 [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 使用,或参考下文"自定义 Python 推理"章节自行集成。
40
+
41
+ ---
42
+
43
+ ## 仓库结构
44
+
45
+ ```
46
+ .
47
+ ├── README.md / README_zh.md
48
+ ├── LICENSE # Apache 2.0
49
+ ├── config.json # 仓库元数据 + 模型索引
50
+ ├── *.safetensors # 15 个 PP-OCRv5 权重(位于根目录,URL 稳定)
51
+ ├── ptocr_v5_server_{det,rec}.pth # V5 服务端的 pth 副本(向后兼容保留)
52
+ ├── configs/
53
+ │ ├── det/PP-OCRv5/
54
+ │ │ ├── PP-OCRv5_mobile_det.yml # 移动端检测
55
+ │ │ └── PP-OCRv5_server_det.yml # 服务端检测
56
+ │ └── rec/PP-OCRv5/
57
+ │ ├── PP-OCRv5_mobile_rec.yml # 基础识别(中繁英日,移动端)
58
+ │ ├── PP-OCRv5_server_rec.yml # 基础识别(中繁英日,服务端)
59
+ │ └── multi_language/
60
+ │ ├── en_PP-OCRv5_mobile_rec.yaml # 英文专用
61
+ │ ├── korean_PP-OCRv5_mobile_rec.yml # 韩文 + 英文
62
+ │ ├── latin_PP-OCRv5_mobile_rec.yml # 拉丁字母 40+ 语种(法/德/西/意/葡 等)
63
+ │ ├── eslav_PP-OCRv5_mobile_rec.yml # 东斯拉夫(俄/白俄/乌克兰)
64
+ │ ├── cyrillic_PP-OCRv5_mobile_rec.yaml # 西里尔字母 33 种
65
+ │ ├── arabic_PP-OCRv5_mobile_rec.yaml # 阿拉伯 / 波斯 / 维吾尔 / 乌尔都 等
66
+ │ ├── devanagari_PP-OCRv5_mobile_rec.yaml # 天城文系 14 种(印地/马拉地/尼泊尔/梵文 等)
67
+ │ ├── th_PP-OCRv5_mobile_rec.yaml # 泰文
68
+ │ ├── el_PP-OCRv5_mobile_rec.yaml # 希腊文
69
+ │ ├── ta_PP-OCRv5_mobile_rec.yaml # 泰米尔文
70
+ │ └── te_PP-OCRv5_mobile_rec.yaml # 泰卢固文
71
+ └── dicts/ # 字符集字典(rec 推理必需)
72
+ ├── ppocrv5_dict.txt # 基础(中繁英日)
73
+ ├── ppocrv5_en_dict.txt
74
+ ├── ppocrv5_korean_dict.txt
75
+ └── ...(共 12 个)
76
+
77
+ legacy/ # 旧版本(v2/v3/v4)pth 集中目录
78
+ ├── ch_ptocr_mobile_v2.0_cls_infer.pth
79
+ ├── ch_ptocr_v4_det_infer.pth
80
+ ├── ch_ptocr_v4_rec_infer.pth
81
+ ├── en_ptocr_v3_det_infer.pth
82
+ └── en_ptocr_v4_rec_infer.pth
83
+ ```
84
+
85
+ > 所有 rec yaml 的 `character_dict_path` 已改写为相对路径 `./dicts/...`,`git clone` 或 `snapshot_download` 下载后**无需修改路径**即可使用。
86
+
87
+ ---
88
+
89
+ ## 模型清单
90
+
91
+ ### 文本检测
92
+
93
+ | 权重文件 | 对应 yaml | 场景 | 文件大小 |
94
+ |---|---|---|---|
95
+ | `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` | 移动端 / CPU 推荐 | ~14 MB |
96
+ | `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` | 服务端 / 高精度 | ~101 MB |
97
+
98
+ ### 文本识别(基础)
99
+
100
+ | 权重文件 | 对应 yaml | 支持语种 | 文件大小 |
101
+ |---|---|---|---|
102
+ | `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~31 MB |
103
+ | `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~128 MB |
104
+
105
+ ### 文本识别(多语言)
106
+
107
+ 所有多语言识别模型共享相同网络(`SVTR_LCNet` + `PPLCNetV3`),仅字符集不同。文件大小 23–28 MB。
108
+
109
+ | 权重文件 | 支持语种 |
110
+ |---|---|
111
+ | `ptocr_v5_en_mobile_rec.safetensors` | 英文专用(针对英文场景定向优化) |
112
+ | `ptocr_v5_korean_mobile_rec.safetensors` | 韩文、英文 |
113
+ | `ptocr_v5_latin_mobile_rec.safetensors` | 法文、德文、南非荷兰文、意大利文、西班牙文、葡萄牙文、捷克文、丹麦文、爱沙尼亚文、克罗地亚文、荷兰文、挪威文、波兰文、瑞典文、芬兰文、土耳其文、越南文 等 40+ 语种 |
114
+ | `ptocr_v5_eslav_mobile_rec.safetensors` | 俄罗斯文、白俄罗斯文、乌克兰文、英文 |
115
+ | `ptocr_v5_cyrillic_mobile_rec.safetensors` | 俄文、白俄文、乌克兰文、塞尔维亚(西里尔)、保加利亚、蒙古 等 33 种西里尔字母语言 |
116
+ | `ptocr_v5_arabic_mobile_rec.safetensors` | 阿拉伯文、波斯文、维吾尔文、乌尔都文、普什图文、信德文 等 |
117
+ | `ptocr_v5_devanagari_mobile_rec.safetensors` | 印地文、马拉地文、尼泊尔文、梵文 等 14 种天城文系语言 |
118
+ | `ptocr_v5_th_mobile_rec.safetensors` | 泰文、英文 |
119
+ | `ptocr_v5_el_mobile_rec.safetensors` | 希腊文、英文 |
120
+ | `ptocr_v5_ta_mobile_rec.safetensors` | 泰米尔文、英文 |
121
+ | `ptocr_v5_te_mobile_rec.safetensors` | 泰卢固文、英文 |
122
+
123
+ ---
124
+
125
+ ## 快速开始
126
+
127
+ ### 下载权重
128
+
129
+ ```python
130
+ from huggingface_hub import snapshot_download, hf_hub_download
131
+
132
+ # 方式 1:下载整个仓库(权重 + yml + 字典 + README)
133
+ repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
134
+ print("仓库下载到:", repo_dir)
135
+
136
+ # 方式 2:只下载单个权重文件
137
+ weight_path = hf_hub_download(
138
+ repo_id="JoyCN/PaddleOCR-Pytorch",
139
+ filename="ptocr_v5_korean_mobile_rec.safetensors"
140
+ )
141
+ ```
142
+
143
+ ### 使用 PaddleOCR2Pytorch 项目做推理(推荐)
144
+
145
+ ```bash
146
+ # 1. clone 推理代码仓
147
+ git clone https://github.com/frotms/PaddleOCR2Pytorch
148
+ cd PaddleOCR2Pytorch
149
+ pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
150
+
151
+ # 2. 用本仓库下载的权重 + yml(假设下载到 /path/to/hf_repo)
152
+ python tools/infer/predict_rec.py \
153
+ --image_dir doc/imgs_words/korean/1.jpg \
154
+ --rec_algorithm SVTR_LCNet \
155
+ --rec_model_path /path/to/hf_repo/ptocr_v5_korean_mobile_rec.safetensors \
156
+ --rec_yaml_path /path/to/hf_repo/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml \
157
+ --rec_image_shape "3,48,320" \
158
+ --rec_char_dict_path /path/to/hf_repo/dicts/ppocrv5_korean_dict.txt \
159
+ --use_gpu False
160
+ ```
161
+
162
+ > PaddleOCR2Pytorch 的 `base_ocr_v20.py` 已原生支持 `.safetensors`(按后缀自动识别,向后兼容 `.pth`)。
163
+
164
+ ### 自定义 Python 推理代码
165
+
166
+ 如果你不想依赖 PaddleOCR2Pytorch 完整推理栈,下面是**一个最小 rec 推理代码片段**的骨架。它展示了如何加载权重并做前向推理——但你仍然需要 PaddleOCR2Pytorch 项目中的网络定义代码(`pytorchocr/modeling/`)。
167
+
168
+ ```python
169
+ import sys, numpy as np, cv2, torch, yaml
170
+ from safetensors.torch import load_file
171
+
172
+ # 以下 import 需要你先 clone https://github.com/frotms/PaddleOCR2Pytorch
173
+ # 并把其根目录加入 PYTHONPATH
174
+ sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
175
+ from pytorchocr.modeling.architectures.base_model import BaseModel
176
+ from pytorchocr.postprocess import build_post_process
177
+
178
+ HF_REPO = "/path/to/hf_repo" # snapshot_download 得到的路径
179
+ yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
180
+ weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
181
+
182
+ # 1. 读配置 + 字符集
183
+ with open(yml_path, encoding="utf-8") as f:
184
+ cfg = yaml.safe_load(f)
185
+ dict_path = cfg["Global"]["character_dict_path"] # './dicts/ppocrv5_korean_dict.txt'
186
+ dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
187
+ with open(dict_abs, encoding="utf-8") as f:
188
+ chars = [l.strip("\n\r") for l in f]
189
+ n_char = len(chars) + 2 # +1 blank, +1 space(依 use_space_char 而定)
190
+
191
+ # 2. 构建网络 + 加载权重(safetensors 零代码执行、mmap 快速加载)
192
+ cfg["Architecture"]["Head"]["out_channels_list"] = {
193
+ "CTCLabelDecode": n_char,
194
+ "SARLabelDecode": n_char + 2,
195
+ "NRTRLabelDecode": n_char + 3,
196
+ }
197
+ net = BaseModel(cfg["Architecture"], out_channels=n_char)
198
+ net.load_state_dict(load_file(weight_path, device="cpu"))
199
+ net.eval()
200
+
201
+ # 3. 读图 + 预处理(resize 到 [3, 48, 320],归一化到 [-1, 1])
202
+ img = cv2.imread("input_word.jpg")
203
+ h, w = img.shape[:2]
204
+ ratio = w / h
205
+ tw = min(int(48 * ratio), 320)
206
+ img = cv2.resize(img, (tw, 48))
207
+ canvas = np.zeros((48, 320, 3), dtype=np.uint8)
208
+ canvas[:, :tw] = img
209
+ x = canvas.astype(np.float32).transpose(2, 0, 1) / 255.0
210
+ x = (x - 0.5) / 0.5
211
+ x = torch.from_numpy(x).unsqueeze(0)
212
+
213
+ # 4. 前向 + CTC 解码
214
+ with torch.no_grad():
215
+ logits = net(x)
216
+ post_op = build_post_process({"name": "CTCLabelDecode",
217
+ "character_dict_path": dict_abs,
218
+ "use_space_char": True})
219
+ result = post_op(logits)
220
+ print("识别结果:", result) # e.g. [('바탕으로', 0.9998)]
221
+ ```
222
+
223
+ ### 推理所需依赖
224
+
225
+ ```
226
+ torch >= 1.13
227
+ safetensors >= 0.4
228
+ numpy, pillow, opencv-python
229
+ pyyaml, shapely, pyclipper
230
+ scikit-image # det 后处理需要
231
+ ```
232
+
233
+ ---
234
+
235
+ ## 转换 & 验证来源
236
+
237
+ - 源权重:PaddlePaddle 官方 `.pdparams`,来自 [paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/](https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/)
238
+ - 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 中的 `converter/ppocr_v5_det_converter.py` / `ppocr_v5_rec_converter.py`
239
+ - 验证:在 macOS Apple Silicon (M 系列) CPU 环境下做过端到端推理,**多语言识别结果与 PaddleOCR 官方 `.pdparams` 位精确一致**(float32 小数点后 8 位完全相同)
240
+
241
+ 样例推理输出(CPU,<0.7 s / 张):
242
+
243
+ | 样例 | 识别结果 | 置信度 |
244
+ |---|---|---|
245
+ | 中文 `word_1.jpg` | 韩国小馆 | 0.99797755 |
246
+ | 韩文 `korean/1.jpg` | 바탕으로 | 0.99977183 |
247
+ | 法文 `french/1.jpg` | de l'amendement, | 0.99656343 |
248
+ | 阿拉伯 `arabic/ar_1.jpg` | الكيصياوي | 0.68281130 |
249
+
250
+ ---
251
+
252
+ ## Legacy 文件说明(`legacy/`)
253
+
254
+ 原本放在仓库根目录的 PP-OCR v2 / v3 / v4 老版本权重,现已**统一迁移到 `legacy/` 目录**以便整理。这些文件仍然存在且可正常使用,只需在 URL 路径前面加上 `legacy/` 前缀即可:
255
+
256
+ ```
257
+ legacy/ch_ptocr_mobile_v2.0_cls_infer.pth
258
+ legacy/ch_ptocr_v4_det_infer.pth
259
+ legacy/ch_ptocr_v4_rec_infer.pth
260
+ legacy/en_ptocr_v3_det_infer.pth
261
+ legacy/en_ptocr_v4_rec_infer.pth
262
+ ```
263
+
264
+ **15 个 PP-OCRv5 safetensors 权重依然位于仓库根目录,URL 未变**。
265
+
266
+ ---
267
+
268
+ ## 许可证 & 致谢
269
+
270
+ - **License**: Apache License 2.0
271
+ - 权重来源:[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) by PaddlePaddle 团队,Apache 2.0
272
+ - 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch),Apache 2.0
273
+
274
+ 如果本仓库对你有帮助,请同时给上述两个原始项目 star 致谢。
275
+
276
+ ---
277
+
278
+ ## 引用
279
+
280
+ ```bibtex
281
+ @misc{pp_ocrv5_pytorch_joycn_2025,
282
+ title = {PP-OCRv5 PyTorch Model Zoo},
283
+ author = {JoyCN},
284
+ howpublished = {\url{https://huggingface.co/JoyCN/PaddleOCR-Pytorch}},
285
+ year = {2025}
286
+ }
287
+ ```
config.json CHANGED
@@ -1,27 +1,122 @@
1
  {
2
  "library_name": "pytorch",
3
- "format": "pth",
4
- "weights": {
5
- "det": {
6
- "safetensors": "ptocr_v5_server_det.safetensors",
7
- "pth": "ptocr_v5_server_det.pth"
8
- },
9
- "rec": {
10
- "safetensors": "ptocr_v5_server_rec.safetensors",
11
- "pth": "ptocr_v5_server_rec.pth"
12
- }
13
- },
14
- "homepage": "https://github.com/frotms/PaddleOCR2Pytorch",
15
  "created": "2025-09-16",
16
- "notes": "Presence of this file helps Hub track downloads (query file).",
17
- "formats": [
18
- "safetensors",
19
- "pth"
20
- ],
21
  "compat": {
22
  "project": "PaddleOCR2Pytorch",
23
  "repo": "https://github.com/frotms/PaddleOCR2Pytorch",
24
  "license": "Apache-2.0"
25
  },
26
- "updated": "2025-09-16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
 
1
  {
2
  "library_name": "pytorch",
3
+ "format": "safetensors",
4
+ "formats": ["safetensors", "pth"],
5
+ "homepage": "https://huggingface.co/JoyCN/PaddleOCR-Pytorch",
6
+ "source_project": "https://github.com/frotms/PaddleOCR2Pytorch",
7
+ "upstream_project": "https://github.com/PaddlePaddle/PaddleOCR",
8
+ "license": "Apache-2.0",
 
 
 
 
 
 
9
  "created": "2025-09-16",
10
+ "updated": "2026-04-15",
11
+ "notes": "PP-OCRv5 full model zoo (2 det + 2 base rec + 11 multilingual rec), bit-exact conversion of official PaddlePaddle .pdparams to PyTorch. Older PP-OCR v2/v3/v4 checkpoints kept under legacy/.",
 
 
 
12
  "compat": {
13
  "project": "PaddleOCR2Pytorch",
14
  "repo": "https://github.com/frotms/PaddleOCR2Pytorch",
15
  "license": "Apache-2.0"
16
  },
17
+ "models": {
18
+ "ppocrv5": {
19
+ "det": {
20
+ "mobile": {
21
+ "safetensors": "ptocr_v5_mobile_det.safetensors",
22
+ "yaml": "configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml"
23
+ },
24
+ "server": {
25
+ "safetensors": "ptocr_v5_server_det.safetensors",
26
+ "pth": "ptocr_v5_server_det.pth",
27
+ "yaml": "configs/det/PP-OCRv5/PP-OCRv5_server_det.yml"
28
+ }
29
+ },
30
+ "rec_base": {
31
+ "mobile": {
32
+ "safetensors": "ptocr_v5_mobile_rec.safetensors",
33
+ "yaml": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml",
34
+ "dict": "dicts/ppocrv5_dict.txt",
35
+ "languages": ["zh-Hans", "zh-Hant", "en", "ja"]
36
+ },
37
+ "server": {
38
+ "safetensors": "ptocr_v5_server_rec.safetensors",
39
+ "pth": "ptocr_v5_server_rec.pth",
40
+ "yaml": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml",
41
+ "dict": "dicts/ppocrv5_dict.txt",
42
+ "languages": ["zh-Hans", "zh-Hant", "en", "ja"]
43
+ }
44
+ },
45
+ "rec_multilingual": {
46
+ "en": {
47
+ "safetensors": "ptocr_v5_en_mobile_rec.safetensors",
48
+ "yaml": "configs/rec/PP-OCRv5/multi_language/en_PP-OCRv5_mobile_rec.yaml",
49
+ "dict": "dicts/ppocrv5_en_dict.txt",
50
+ "languages": ["en"]
51
+ },
52
+ "korean": {
53
+ "safetensors": "ptocr_v5_korean_mobile_rec.safetensors",
54
+ "yaml": "configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml",
55
+ "dict": "dicts/ppocrv5_korean_dict.txt",
56
+ "languages": ["ko", "en"]
57
+ },
58
+ "latin": {
59
+ "safetensors": "ptocr_v5_latin_mobile_rec.safetensors",
60
+ "yaml": "configs/rec/PP-OCRv5/multi_language/latin_PP-OCRv5_mobile_rec.yml",
61
+ "dict": "dicts/ppocrv5_latin_dict.txt",
62
+ "languages": ["fr", "de", "es", "it", "pt", "nl", "sv", "da", "no", "fi", "pl", "cs", "tr", "vi", "af", "bs", "cy", "et", "ga", "hr", "uz", "hu", "id", "is", "lt", "mi", "ms", "sk", "sl", "sq", "sw", "tl", "la", "az", "ku", "lv", "mt", "pi", "ro", "eu", "gl", "lb", "rm", "ca", "qu", "rs_latin", "oc"]
63
+ },
64
+ "eslav": {
65
+ "safetensors": "ptocr_v5_eslav_mobile_rec.safetensors",
66
+ "yaml": "configs/rec/PP-OCRv5/multi_language/eslav_PP-OCRv5_mobile_rec.yml",
67
+ "dict": "dicts/ppocrv5_eslav_dict.txt",
68
+ "languages": ["ru", "be", "uk", "en"]
69
+ },
70
+ "cyrillic": {
71
+ "safetensors": "ptocr_v5_cyrillic_mobile_rec.safetensors",
72
+ "yaml": "configs/rec/PP-OCRv5/multi_language/cyrillic_PP-OCRv5_mobile_rec.yaml",
73
+ "dict": "dicts/ppocrv5_cyrillic_dict.txt",
74
+ "languages": ["ru", "be", "uk", "rs_cyrillic", "bg", "mn", "kk", "ky", "tg", "mk", "tt", "cv", "ba", "mhr", "mo", "udm", "kv", "os", "bua", "xal", "tyv", "sah", "kaa", "ab", "ady", "kbd", "av", "dar", "inh", "ce", "lki", "lez", "tab", "en"]
75
+ },
76
+ "arabic": {
77
+ "safetensors": "ptocr_v5_arabic_mobile_rec.safetensors",
78
+ "yaml": "configs/rec/PP-OCRv5/multi_language/arabic_PP-OCRv5_mobile_rec.yaml",
79
+ "dict": "dicts/ppocrv5_arabic_dict.txt",
80
+ "languages": ["ar", "fa", "ug", "ur", "ps", "ku", "sd", "bal", "en"]
81
+ },
82
+ "devanagari": {
83
+ "safetensors": "ptocr_v5_devanagari_mobile_rec.safetensors",
84
+ "yaml": "configs/rec/PP-OCRv5/multi_language/devanagari_PP-OCRv5_mobile_rec.yaml",
85
+ "dict": "dicts/ppocrv5_devanagari_dict.txt",
86
+ "languages": ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc", "en"]
87
+ },
88
+ "th": {
89
+ "safetensors": "ptocr_v5_th_mobile_rec.safetensors",
90
+ "yaml": "configs/rec/PP-OCRv5/multi_language/th_PP-OCRv5_mobile_rec.yaml",
91
+ "dict": "dicts/ppocrv5_th_dict.txt",
92
+ "languages": ["th", "en"]
93
+ },
94
+ "el": {
95
+ "safetensors": "ptocr_v5_el_mobile_rec.safetensors",
96
+ "yaml": "configs/rec/PP-OCRv5/multi_language/el_PP-OCRv5_mobile_rec.yaml",
97
+ "dict": "dicts/ppocrv5_el_dict.txt",
98
+ "languages": ["el", "en"]
99
+ },
100
+ "ta": {
101
+ "safetensors": "ptocr_v5_ta_mobile_rec.safetensors",
102
+ "yaml": "configs/rec/PP-OCRv5/multi_language/ta_PP-OCRv5_mobile_rec.yaml",
103
+ "dict": "dicts/ppocrv5_ta_dict.txt",
104
+ "languages": ["ta", "en"]
105
+ },
106
+ "te": {
107
+ "safetensors": "ptocr_v5_te_mobile_rec.safetensors",
108
+ "yaml": "configs/rec/PP-OCRv5/multi_language/te_PP-OCRv5_mobile_rec.yaml",
109
+ "dict": "dicts/ppocrv5_te_dict.txt",
110
+ "languages": ["te", "en"]
111
+ }
112
+ }
113
+ }
114
+ },
115
+ "legacy": {
116
+ "ppocrv2_cls_ch": "legacy/ch_ptocr_mobile_v2.0_cls_infer.pth",
117
+ "ppocrv3_det_en": "legacy/en_ptocr_v3_det_infer.pth",
118
+ "ppocrv4_det_ch": "legacy/ch_ptocr_v4_det_infer.pth",
119
+ "ppocrv4_rec_ch": "legacy/ch_ptocr_v4_rec_infer.pth",
120
+ "ppocrv4_rec_en": "legacy/en_ptocr_v4_rec_infer.pth"
121
+ }
122
  }
ch_ptocr_mobile_v2.0_cls_infer.pth → legacy/ch_ptocr_mobile_v2.0_cls_infer.pth RENAMED
File without changes
ch_ptocr_v4_det_infer.pth → legacy/ch_ptocr_v4_det_infer.pth RENAMED
File without changes
ch_ptocr_v4_rec_infer.pth → legacy/ch_ptocr_v4_rec_infer.pth RENAMED
File without changes
en_ptocr_v3_det_infer.pth → legacy/en_ptocr_v3_det_infer.pth RENAMED
File without changes
en_ptocr_v4_rec_infer.pth → legacy/en_ptocr_v4_rec_infer.pth RENAMED
File without changes