Reorg: move v2/v3/v4 pth to legacy/, English README as default (+ README_zh.md), update config.json
Browse files- moved 5 legacy pth files (v2/v3/v4) from repo root to legacy/ via in-repo LFS copy
- deleted two obsolete root yml (superseded by configs/rec/PP-OCRv5/*.yml)
- replaced README.md with English primary version (zh copy kept as README_zh.md)
- updated config.json to reflect the full 15-model v5 catalog + legacy map
No PP-OCRv5 safetensors / yml / dict URLs changed.
- PP-OCRv5_server_det.yml +0 -174
- PP-OCRv5_server_rec.yml +0 -136
- README.md +130 -98
- README_zh.md +287 -0
- config.json +113 -18
- ch_ptocr_mobile_v2.0_cls_infer.pth → legacy/ch_ptocr_mobile_v2.0_cls_infer.pth +0 -0
- ch_ptocr_v4_det_infer.pth → legacy/ch_ptocr_v4_det_infer.pth +0 -0
- ch_ptocr_v4_rec_infer.pth → legacy/ch_ptocr_v4_rec_infer.pth +0 -0
- en_ptocr_v3_det_infer.pth → legacy/en_ptocr_v3_det_infer.pth +0 -0
- en_ptocr_v4_rec_infer.pth → legacy/en_ptocr_v4_rec_infer.pth +0 -0
PP-OCRv5_server_det.yml
DELETED
|
@@ -1,174 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
model_name: PP-OCRv5_server_det # To use static model for inference.
|
| 3 |
-
debug: false
|
| 4 |
-
use_gpu: true
|
| 5 |
-
epoch_num: &epoch_num 500
|
| 6 |
-
log_smooth_window: 20
|
| 7 |
-
print_batch_step: 10
|
| 8 |
-
save_model_dir: ./output/PP-OCRv5_server_det
|
| 9 |
-
save_epoch_step: 10
|
| 10 |
-
eval_batch_step:
|
| 11 |
-
- 0
|
| 12 |
-
- 1500
|
| 13 |
-
cal_metric_during_train: false
|
| 14 |
-
checkpoints:
|
| 15 |
-
pretrained_model: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PPHGNetV2_B4_ocr_det.pdparams
|
| 16 |
-
save_inference_dir: null
|
| 17 |
-
use_visualdl: false
|
| 18 |
-
infer_img: doc/imgs_en/img_10.jpg
|
| 19 |
-
save_res_path: ./checkpoints/det_db/predicts_db.txt
|
| 20 |
-
distributed: true
|
| 21 |
-
|
| 22 |
-
Architecture:
|
| 23 |
-
model_type: det
|
| 24 |
-
algorithm: DB
|
| 25 |
-
Transform: null
|
| 26 |
-
Backbone:
|
| 27 |
-
name: PPHGNetV2_B4
|
| 28 |
-
det: True
|
| 29 |
-
Neck:
|
| 30 |
-
name: LKPAN
|
| 31 |
-
out_channels: 256
|
| 32 |
-
intracl: true
|
| 33 |
-
Head:
|
| 34 |
-
name: PFHeadLocal
|
| 35 |
-
k: 50
|
| 36 |
-
mode: "large"
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
Loss:
|
| 40 |
-
name: DBLoss
|
| 41 |
-
balance_loss: true
|
| 42 |
-
main_loss_type: DiceLoss
|
| 43 |
-
alpha: 5
|
| 44 |
-
beta: 10
|
| 45 |
-
ohem_ratio: 3
|
| 46 |
-
|
| 47 |
-
Optimizer:
|
| 48 |
-
name: Adam
|
| 49 |
-
beta1: 0.9
|
| 50 |
-
beta2: 0.999
|
| 51 |
-
lr:
|
| 52 |
-
name: Cosine
|
| 53 |
-
learning_rate: 0.001 #(8*8c)
|
| 54 |
-
warmup_epoch: 2
|
| 55 |
-
regularizer:
|
| 56 |
-
name: L2
|
| 57 |
-
factor: 1e-6
|
| 58 |
-
|
| 59 |
-
PostProcess:
|
| 60 |
-
name: DBPostProcess
|
| 61 |
-
thresh: 0.3
|
| 62 |
-
box_thresh: 0.6
|
| 63 |
-
max_candidates: 1000
|
| 64 |
-
unclip_ratio: 1.5
|
| 65 |
-
|
| 66 |
-
Metric:
|
| 67 |
-
name: DetMetric
|
| 68 |
-
main_indicator: hmean
|
| 69 |
-
|
| 70 |
-
Train:
|
| 71 |
-
dataset:
|
| 72 |
-
name: SimpleDataSet
|
| 73 |
-
data_dir: ./train_data/icdar2015/text_localization/
|
| 74 |
-
label_file_list:
|
| 75 |
-
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
| 76 |
-
ratio_list: [1.0]
|
| 77 |
-
transforms:
|
| 78 |
-
- DecodeImage:
|
| 79 |
-
img_mode: BGR
|
| 80 |
-
channel_first: false
|
| 81 |
-
- DetLabelEncode: null
|
| 82 |
-
- CopyPaste: null
|
| 83 |
-
- IaaAugment:
|
| 84 |
-
augmenter_args:
|
| 85 |
-
- type: Fliplr
|
| 86 |
-
args:
|
| 87 |
-
p: 0.5
|
| 88 |
-
- type: Affine
|
| 89 |
-
args:
|
| 90 |
-
rotate:
|
| 91 |
-
- -10
|
| 92 |
-
- 10
|
| 93 |
-
- type: Resize
|
| 94 |
-
args:
|
| 95 |
-
size:
|
| 96 |
-
- 0.5
|
| 97 |
-
- 3
|
| 98 |
-
- EastRandomCropData:
|
| 99 |
-
size:
|
| 100 |
-
- 640
|
| 101 |
-
- 640
|
| 102 |
-
max_tries: 50
|
| 103 |
-
keep_ratio: true
|
| 104 |
-
- MakeBorderMap:
|
| 105 |
-
shrink_ratio: 0.4
|
| 106 |
-
thresh_min: 0.3
|
| 107 |
-
thresh_max: 0.7
|
| 108 |
-
total_epoch: *epoch_num
|
| 109 |
-
- MakeShrinkMap:
|
| 110 |
-
shrink_ratio: 0.4
|
| 111 |
-
min_text_size: 8
|
| 112 |
-
total_epoch: *epoch_num
|
| 113 |
-
- NormalizeImage:
|
| 114 |
-
scale: 1./255.
|
| 115 |
-
mean:
|
| 116 |
-
- 0.485
|
| 117 |
-
- 0.456
|
| 118 |
-
- 0.406
|
| 119 |
-
std:
|
| 120 |
-
- 0.229
|
| 121 |
-
- 0.224
|
| 122 |
-
- 0.225
|
| 123 |
-
order: hwc
|
| 124 |
-
- ToCHWImage: null
|
| 125 |
-
- KeepKeys:
|
| 126 |
-
keep_keys:
|
| 127 |
-
- image
|
| 128 |
-
- threshold_map
|
| 129 |
-
- threshold_mask
|
| 130 |
-
- shrink_map
|
| 131 |
-
- shrink_mask
|
| 132 |
-
loader:
|
| 133 |
-
shuffle: true
|
| 134 |
-
drop_last: false
|
| 135 |
-
batch_size_per_card: 8
|
| 136 |
-
num_workers: 8
|
| 137 |
-
|
| 138 |
-
Eval:
|
| 139 |
-
dataset:
|
| 140 |
-
name: SimpleDataSet
|
| 141 |
-
data_dir: ./train_data/icdar2015/text_localization/
|
| 142 |
-
label_file_list:
|
| 143 |
-
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
| 144 |
-
transforms:
|
| 145 |
-
transforms:
|
| 146 |
-
- DecodeImage:
|
| 147 |
-
img_mode: BGR
|
| 148 |
-
channel_first: false
|
| 149 |
-
- DetLabelEncode: null
|
| 150 |
-
- DetResizeForTest:
|
| 151 |
-
- NormalizeImage:
|
| 152 |
-
scale: 1./255.
|
| 153 |
-
mean:
|
| 154 |
-
- 0.485
|
| 155 |
-
- 0.456
|
| 156 |
-
- 0.406
|
| 157 |
-
std:
|
| 158 |
-
- 0.229
|
| 159 |
-
- 0.224
|
| 160 |
-
- 0.225
|
| 161 |
-
order: hwc
|
| 162 |
-
- ToCHWImage: null
|
| 163 |
-
- KeepKeys:
|
| 164 |
-
keep_keys:
|
| 165 |
-
- image
|
| 166 |
-
- shape
|
| 167 |
-
- polys
|
| 168 |
-
- ignore_tags
|
| 169 |
-
loader:
|
| 170 |
-
shuffle: false
|
| 171 |
-
drop_last: false
|
| 172 |
-
batch_size_per_card: 1
|
| 173 |
-
num_workers: 2
|
| 174 |
-
profiler_options: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PP-OCRv5_server_rec.yml
DELETED
|
@@ -1,136 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
model_name: PP-OCRv5_server_rec # To use static model for inference.
|
| 3 |
-
debug: false
|
| 4 |
-
use_gpu: true
|
| 5 |
-
epoch_num: 75
|
| 6 |
-
log_smooth_window: 20
|
| 7 |
-
print_batch_step: 10
|
| 8 |
-
save_model_dir: ./output/PP-OCRv5_server_rec
|
| 9 |
-
save_epoch_step: 1
|
| 10 |
-
eval_batch_step: [0, 2000]
|
| 11 |
-
cal_metric_during_train: true
|
| 12 |
-
calc_epoch_interval: 1
|
| 13 |
-
pretrained_model:
|
| 14 |
-
checkpoints:
|
| 15 |
-
save_inference_dir:
|
| 16 |
-
use_visualdl: false
|
| 17 |
-
infer_img: doc/imgs_words/ch/word_1.jpg
|
| 18 |
-
character_dict_path: ./pytorchocr/utils/dict/ppocrv5_dict.txt
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
infer_mode: false
|
| 21 |
-
use_space_char: true
|
| 22 |
-
distributed: true
|
| 23 |
-
save_res_path: ./output/rec/predicts_ppocrv5.txt
|
| 24 |
-
d2s_train_image_shape: [3, 48, 320]
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
Optimizer:
|
| 28 |
-
name: Adam
|
| 29 |
-
beta1: 0.9
|
| 30 |
-
beta2: 0.999
|
| 31 |
-
lr:
|
| 32 |
-
name: Cosine
|
| 33 |
-
learning_rate: 0.0005
|
| 34 |
-
warmup_epoch: 1
|
| 35 |
-
regularizer:
|
| 36 |
-
name: L2
|
| 37 |
-
factor: 3.0e-05
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
Architecture:
|
| 41 |
-
model_type: rec
|
| 42 |
-
algorithm: SVTR_HGNet
|
| 43 |
-
Transform:
|
| 44 |
-
Backbone:
|
| 45 |
-
name: PPHGNetV2_B4
|
| 46 |
-
text_rec: True
|
| 47 |
-
Head:
|
| 48 |
-
name: MultiHead
|
| 49 |
-
head_list:
|
| 50 |
-
- CTCHead:
|
| 51 |
-
Neck:
|
| 52 |
-
name: svtr
|
| 53 |
-
dims: 120
|
| 54 |
-
depth: 2
|
| 55 |
-
hidden_dims: 120
|
| 56 |
-
kernel_size: [1, 3]
|
| 57 |
-
use_guide: True
|
| 58 |
-
Head:
|
| 59 |
-
fc_decay: 0.00001
|
| 60 |
-
- NRTRHead:
|
| 61 |
-
nrtr_dim: 384
|
| 62 |
-
max_text_length: *max_text_length
|
| 63 |
-
|
| 64 |
-
Loss:
|
| 65 |
-
name: MultiLoss
|
| 66 |
-
loss_config_list:
|
| 67 |
-
- CTCLoss:
|
| 68 |
-
- NRTRLoss:
|
| 69 |
-
|
| 70 |
-
PostProcess:
|
| 71 |
-
name: CTCLabelDecode
|
| 72 |
-
|
| 73 |
-
Metric:
|
| 74 |
-
name: RecMetric
|
| 75 |
-
main_indicator: acc
|
| 76 |
-
|
| 77 |
-
Train:
|
| 78 |
-
dataset:
|
| 79 |
-
name: MultiScaleDataSet
|
| 80 |
-
ds_width: false
|
| 81 |
-
data_dir: ./train_data/
|
| 82 |
-
ext_op_transform_idx: 1
|
| 83 |
-
label_file_list:
|
| 84 |
-
- ./train_data/train_list.txt
|
| 85 |
-
transforms:
|
| 86 |
-
- DecodeImage:
|
| 87 |
-
img_mode: BGR
|
| 88 |
-
channel_first: false
|
| 89 |
-
- RecAug:
|
| 90 |
-
- MultiLabelEncode:
|
| 91 |
-
gtc_encode: NRTRLabelEncode
|
| 92 |
-
- KeepKeys:
|
| 93 |
-
keep_keys:
|
| 94 |
-
- image
|
| 95 |
-
- label_ctc
|
| 96 |
-
- label_gtc
|
| 97 |
-
- length
|
| 98 |
-
- valid_ratio
|
| 99 |
-
sampler:
|
| 100 |
-
name: MultiScaleSampler
|
| 101 |
-
scales: [[320, 32], [320, 48], [320, 64]]
|
| 102 |
-
first_bs: &bs 128
|
| 103 |
-
fix_bs: false
|
| 104 |
-
divided_factor: [8, 16] # w, h
|
| 105 |
-
is_training: True
|
| 106 |
-
loader:
|
| 107 |
-
shuffle: true
|
| 108 |
-
batch_size_per_card: *bs
|
| 109 |
-
drop_last: true
|
| 110 |
-
num_workers: 16
|
| 111 |
-
Eval:
|
| 112 |
-
dataset:
|
| 113 |
-
name: SimpleDataSet
|
| 114 |
-
data_dir: ./train_data
|
| 115 |
-
label_file_list:
|
| 116 |
-
- ./train_data/val_list.txt
|
| 117 |
-
transforms:
|
| 118 |
-
- DecodeImage:
|
| 119 |
-
img_mode: BGR
|
| 120 |
-
channel_first: false
|
| 121 |
-
- MultiLabelEncode:
|
| 122 |
-
gtc_encode: NRTRLabelEncode
|
| 123 |
-
- RecResizeImg:
|
| 124 |
-
image_shape: [3, 48, 320]
|
| 125 |
-
- KeepKeys:
|
| 126 |
-
keep_keys:
|
| 127 |
-
- image
|
| 128 |
-
- label_ctc
|
| 129 |
-
- label_gtc
|
| 130 |
-
- length
|
| 131 |
-
- valid_ratio
|
| 132 |
-
loader:
|
| 133 |
-
shuffle: false
|
| 134 |
-
drop_last: false
|
| 135 |
-
batch_size_per_card: 128
|
| 136 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -28,116 +28,128 @@ pipeline_tag: image-to-text
|
|
| 28 |
|
| 29 |
# PP-OCRv5 PyTorch Model Zoo
|
| 30 |
|
| 31 |
-
|
| 32 |
|
| 33 |
-
- **
|
| 34 |
-
- **
|
| 35 |
-
- **
|
| 36 |
|
| 37 |
-
>
|
|
|
|
|
|
|
| 38 |
|
| 39 |
---
|
| 40 |
|
| 41 |
-
##
|
| 42 |
|
| 43 |
```
|
| 44 |
.
|
| 45 |
-
├── README.md
|
| 46 |
-
├── LICENSE
|
| 47 |
-
├──
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
├── configs/
|
| 49 |
│ ├── det/PP-OCRv5/
|
| 50 |
-
│ │ ├── PP-OCRv5_mobile_det.yml
|
| 51 |
-
│ │ └── PP-OCRv5_server_det.yml
|
| 52 |
│ └── rec/PP-OCRv5/
|
| 53 |
-
│ ├── PP-OCRv5_mobile_rec.yml
|
| 54 |
-
│ ├── PP-OCRv5_server_rec.yml
|
| 55 |
-
│ └── multi_language/
|
| 56 |
-
│ ├── en_PP-OCRv5_mobile_rec.yaml
|
| 57 |
-
│ ├── korean_PP-OCRv5_mobile_rec.yml
|
| 58 |
-
│ ├── latin_PP-OCRv5_mobile_rec.yml
|
| 59 |
-
│ ├── eslav_PP-OCRv5_mobile_rec.yml
|
| 60 |
-
│ ├── cyrillic_PP-OCRv5_mobile_rec.yaml
|
| 61 |
-
│ ├── arabic_PP-OCRv5_mobile_rec.yaml
|
| 62 |
-
│ ├── devanagari_PP-OCRv5_mobile_rec.yaml
|
| 63 |
-
│ ├── th_PP-OCRv5_mobile_rec.yaml
|
| 64 |
-
│ ├── el_PP-OCRv5_mobile_rec.yaml
|
| 65 |
-
│ ├── ta_PP-OCRv5_mobile_rec.yaml
|
| 66 |
-
│ └── te_PP-OCRv5_mobile_rec.yaml
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
```
|
| 73 |
|
| 74 |
-
>
|
| 75 |
|
| 76 |
---
|
| 77 |
|
| 78 |
-
##
|
| 79 |
|
| 80 |
-
###
|
| 81 |
|
| 82 |
-
|
|
| 83 |
|---|---|---|---|
|
| 84 |
-
| `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` |
|
| 85 |
-
| `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` |
|
| 86 |
|
| 87 |
-
###
|
| 88 |
|
| 89 |
-
|
|
| 90 |
|---|---|---|---|
|
| 91 |
-
| `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` |
|
| 92 |
-
| `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` |
|
| 93 |
|
| 94 |
-
###
|
| 95 |
|
| 96 |
-
|
| 97 |
|
| 98 |
-
|
|
| 99 |
|---|---|
|
| 100 |
-
| `ptocr_v5_en_mobile_rec.safetensors` |
|
| 101 |
-
| `ptocr_v5_korean_mobile_rec.safetensors` |
|
| 102 |
-
| `ptocr_v5_latin_mobile_rec.safetensors` |
|
| 103 |
-
| `ptocr_v5_eslav_mobile_rec.safetensors` |
|
| 104 |
-
| `ptocr_v5_cyrillic_mobile_rec.safetensors` |
|
| 105 |
-
| `ptocr_v5_arabic_mobile_rec.safetensors` |
|
| 106 |
-
| `ptocr_v5_devanagari_mobile_rec.safetensors` |
|
| 107 |
-
| `ptocr_v5_th_mobile_rec.safetensors` |
|
| 108 |
-
| `ptocr_v5_el_mobile_rec.safetensors` |
|
| 109 |
-
| `ptocr_v5_ta_mobile_rec.safetensors` |
|
| 110 |
-
| `ptocr_v5_te_mobile_rec.safetensors` |
|
| 111 |
|
| 112 |
---
|
| 113 |
|
| 114 |
-
##
|
| 115 |
|
| 116 |
-
###
|
| 117 |
|
| 118 |
```python
|
| 119 |
from huggingface_hub import snapshot_download, hf_hub_download
|
| 120 |
|
| 121 |
-
#
|
| 122 |
repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
|
| 123 |
-
print("
|
| 124 |
|
| 125 |
-
#
|
| 126 |
weight_path = hf_hub_download(
|
| 127 |
repo_id="JoyCN/PaddleOCR-Pytorch",
|
| 128 |
-
filename="ptocr_v5_korean_mobile_rec.safetensors"
|
| 129 |
)
|
| 130 |
```
|
| 131 |
|
| 132 |
-
###
|
| 133 |
|
| 134 |
```bash
|
| 135 |
-
# 1. clone
|
| 136 |
git clone https://github.com/frotms/PaddleOCR2Pytorch
|
| 137 |
cd PaddleOCR2Pytorch
|
| 138 |
pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
|
| 139 |
|
| 140 |
-
# 2.
|
| 141 |
python tools/infer/predict_rec.py \
|
| 142 |
--image_dir doc/imgs_words/korean/1.jpg \
|
| 143 |
--rec_algorithm SVTR_LCNet \
|
|
@@ -148,36 +160,35 @@ python tools/infer/predict_rec.py \
|
|
| 148 |
--use_gpu False
|
| 149 |
```
|
| 150 |
|
| 151 |
-
> PaddleOCR2Pytorch
|
| 152 |
|
| 153 |
-
###
|
| 154 |
|
| 155 |
-
|
| 156 |
|
| 157 |
```python
|
| 158 |
import sys, numpy as np, cv2, torch, yaml
|
| 159 |
from safetensors.torch import load_file
|
| 160 |
|
| 161 |
-
#
|
| 162 |
-
# 并把其根目录加入 PYTHONPATH
|
| 163 |
sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
|
| 164 |
from pytorchocr.modeling.architectures.base_model import BaseModel
|
| 165 |
from pytorchocr.postprocess import build_post_process
|
| 166 |
|
| 167 |
-
HF_REPO = "/path/to/hf_repo" #
|
| 168 |
yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
|
| 169 |
weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
|
| 170 |
|
| 171 |
-
# 1.
|
| 172 |
with open(yml_path, encoding="utf-8") as f:
|
| 173 |
cfg = yaml.safe_load(f)
|
| 174 |
-
dict_path = cfg["Global"]["character_dict_path"]
|
| 175 |
dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
|
| 176 |
with open(dict_abs, encoding="utf-8") as f:
|
| 177 |
chars = [l.strip("\n\r") for l in f]
|
| 178 |
-
n_char = len(chars) + 2
|
| 179 |
|
| 180 |
-
# 2.
|
| 181 |
cfg["Architecture"]["Head"]["out_channels_list"] = {
|
| 182 |
"CTCLabelDecode": n_char,
|
| 183 |
"SARLabelDecode": n_char + 2,
|
|
@@ -187,7 +198,7 @@ net = BaseModel(cfg["Architecture"], out_channels=n_char)
|
|
| 187 |
net.load_state_dict(load_file(weight_path, device="cpu"))
|
| 188 |
net.eval()
|
| 189 |
|
| 190 |
-
# 3.
|
| 191 |
img = cv2.imread("input_word.jpg")
|
| 192 |
h, w = img.shape[:2]
|
| 193 |
ratio = w / h
|
|
@@ -199,56 +210,77 @@ x = canvas.astype(np.float32).transpose(2, 0, 1) / 255.0
|
|
| 199 |
x = (x - 0.5) / 0.5
|
| 200 |
x = torch.from_numpy(x).unsqueeze(0)
|
| 201 |
|
| 202 |
-
# 4.
|
| 203 |
with torch.no_grad():
|
| 204 |
logits = net(x)
|
| 205 |
-
post_op = build_post_process({
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
result = post_op(logits)
|
| 209 |
-
print("
|
| 210 |
```
|
| 211 |
|
| 212 |
-
###
|
| 213 |
|
| 214 |
```
|
| 215 |
-
torch
|
| 216 |
-
safetensors
|
| 217 |
numpy, pillow, opencv-python
|
| 218 |
pyyaml, shapely, pyclipper
|
| 219 |
-
scikit-image
|
| 220 |
```
|
| 221 |
|
| 222 |
---
|
| 223 |
|
| 224 |
-
##
|
| 225 |
|
| 226 |
-
-
|
| 227 |
-
|
| 228 |
-
-
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
|
| 232 |
-
|
|
| 233 |
|---|---|---|
|
| 234 |
-
|
|
| 235 |
-
|
|
| 236 |
-
|
|
| 237 |
-
|
|
| 238 |
|
| 239 |
---
|
| 240 |
|
| 241 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
- **License**: Apache License 2.0
|
| 244 |
-
-
|
| 245 |
-
-
|
| 246 |
|
| 247 |
-
|
| 248 |
|
| 249 |
---
|
| 250 |
|
| 251 |
-
##
|
| 252 |
|
| 253 |
```bibtex
|
| 254 |
@misc{pp_ocrv5_pytorch_joycn_2025,
|
|
|
|
| 28 |
|
| 29 |
# PP-OCRv5 PyTorch Model Zoo
|
| 30 |
|
| 31 |
+
PyTorch weights (**safetensors** format) for the full **PP-OCRv5** family, converted bit-exactly from the official PaddlePaddle `.pdparams` dynamic-graph weights — inference outputs are **identical to the original PaddleOCR** down to float32 precision.
|
| 32 |
|
| 33 |
+
- **Text Detection**: 2 models (mobile / server)
|
| 34 |
+
- **Text Recognition (base)**: 2 models covering Simplified Chinese / Traditional Chinese / English / Japanese
|
| 35 |
+
- **Text Recognition (multilingual)**: 11 models covering **100+ languages** (Korean, French, German, Russian, Arabic, Devanagari, Thai, Greek, Tamil, Telugu, etc.)
|
| 36 |
|
| 37 |
+
> This repo contains **weights + configs + dictionaries only**, not inference code. For inference, use [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch), or follow the "Custom Python Inference" section below.
|
| 38 |
+
>
|
| 39 |
+
> Also available: [README_zh.md](./README_zh.md) (中文版).
|
| 40 |
|
| 41 |
---
|
| 42 |
|
| 43 |
+
## Repository Layout
|
| 44 |
|
| 45 |
```
|
| 46 |
.
|
| 47 |
+
├── README.md / README_zh.md
|
| 48 |
+
├── LICENSE # Apache 2.0
|
| 49 |
+
├── config.json # Repo metadata + model index
|
| 50 |
+
│
|
| 51 |
+
├── ptocr_v5_*.safetensors # 15 PP-OCRv5 weights at root (stable URLs)
|
| 52 |
+
├── ptocr_v5_server_{det,rec}.pth # Legacy pth copies of V5 server (kept)
|
| 53 |
+
│
|
| 54 |
├── configs/
|
| 55 |
│ ├── det/PP-OCRv5/
|
| 56 |
+
│ │ ├── PP-OCRv5_mobile_det.yml
|
| 57 |
+
│ │ └── PP-OCRv5_server_det.yml
|
| 58 |
│ └── rec/PP-OCRv5/
|
| 59 |
+
│ ├── PP-OCRv5_mobile_rec.yml # zh / zh-Hant / en / ja
|
| 60 |
+
│ ├── PP-OCRv5_server_rec.yml
|
| 61 |
+
│ └── multi_language/ # 11 multilingual rec yamls
|
| 62 |
+
│ ├── en_PP-OCRv5_mobile_rec.yaml
|
| 63 |
+
│ ├── korean_PP-OCRv5_mobile_rec.yml
|
| 64 |
+
│ ├── latin_PP-OCRv5_mobile_rec.yml # French / German / Spanish / ... (40+ Latin-script)
|
| 65 |
+
│ ├── eslav_PP-OCRv5_mobile_rec.yml # Russian / Belarusian / Ukrainian
|
| 66 |
+
│ ├── cyrillic_PP-OCRv5_mobile_rec.yaml # 33 Cyrillic-script languages
|
| 67 |
+
│ ├── arabic_PP-OCRv5_mobile_rec.yaml # Arabic / Persian / Uyghur / Urdu / ...
|
| 68 |
+
│ ├── devanagari_PP-OCRv5_mobile_rec.yaml # Hindi / Marathi / Nepali / Sanskrit / ...
|
| 69 |
+
│ ├── th_PP-OCRv5_mobile_rec.yaml # Thai
|
| 70 |
+
│ ├── el_PP-OCRv5_mobile_rec.yaml # Greek
|
| 71 |
+
│ ├── ta_PP-OCRv5_mobile_rec.yaml # Tamil
|
| 72 |
+
│ └── te_PP-OCRv5_mobile_rec.yaml # Telugu
|
| 73 |
+
│
|
| 74 |
+
├── dicts/ # Character set dictionaries (required for rec)
|
| 75 |
+
│ ├── ppocrv5_dict.txt # base (zh / zh-Hant / en / ja)
|
| 76 |
+
│ └── ppocrv5_<lang>_dict.txt # 11 multilingual dicts
|
| 77 |
+
│
|
| 78 |
+
└── legacy/ # Older PP-OCR v2/v3/v4 weights (kept for back-compat)
|
| 79 |
+
├── ch_ptocr_mobile_v2.0_cls_infer.pth
|
| 80 |
+
├── ch_ptocr_v4_det_infer.pth
|
| 81 |
+
├── ch_ptocr_v4_rec_infer.pth
|
| 82 |
+
├── en_ptocr_v3_det_infer.pth
|
| 83 |
+
└── en_ptocr_v4_rec_infer.pth
|
| 84 |
```
|
| 85 |
|
| 86 |
+
> All rec yamls use relative `character_dict_path: ./dicts/...`. After `git clone` or `snapshot_download`, paths resolve correctly with **no modification required**.
|
| 87 |
|
| 88 |
---
|
| 89 |
|
| 90 |
+
## Model Catalog
|
| 91 |
|
| 92 |
+
### Text Detection
|
| 93 |
|
| 94 |
+
| Weight | Config | Use case | Size |
|
| 95 |
|---|---|---|---|
|
| 96 |
+
| `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` | Mobile / CPU-friendly | ~14 MB |
|
| 97 |
+
| `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` | Server / high-accuracy | ~101 MB |
|
| 98 |
|
| 99 |
+
### Text Recognition (Base)
|
| 100 |
|
| 101 |
+
| Weight | Config | Languages | Size |
|
| 102 |
|---|---|---|---|
|
| 103 |
+
| `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` | Simplified / Traditional Chinese, English, Japanese | ~31 MB |
|
| 104 |
+
| `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` | same as above, higher accuracy | ~128 MB |
|
| 105 |
|
| 106 |
+
### Text Recognition (Multilingual)
|
| 107 |
|
| 108 |
+
All multilingual rec models share the same architecture (`SVTR_LCNet` + `PPLCNetV3`); they differ only by character dictionary. File size 23–28 MB each.
|
| 109 |
|
| 110 |
+
| Weight | Supported languages |
|
| 111 |
|---|---|
|
| 112 |
+
| `ptocr_v5_en_mobile_rec.safetensors` | English (dedicated model optimized for English-only scenarios) |
|
| 113 |
+
| `ptocr_v5_korean_mobile_rec.safetensors` | Korean, English |
|
| 114 |
+
| `ptocr_v5_latin_mobile_rec.safetensors` | French, German, Spanish, Italian, Portuguese, Dutch, Swedish, Danish, Norwegian, Finnish, Polish, Czech, Turkish, Vietnamese, ... (40+ Latin-script) |
|
| 115 |
+
| `ptocr_v5_eslav_mobile_rec.safetensors` | Russian, Belarusian, Ukrainian, English |
|
| 116 |
+
| `ptocr_v5_cyrillic_mobile_rec.safetensors` | 33 Cyrillic-script languages (Russian, Serbian-Cyrillic, Bulgarian, Mongolian, Kazakh, ...) |
|
| 117 |
+
| `ptocr_v5_arabic_mobile_rec.safetensors` | Arabic, Persian, Uyghur, Urdu, Pashto, Sindhi, ... |
|
| 118 |
+
| `ptocr_v5_devanagari_mobile_rec.safetensors` | 14 Devanagari-script languages (Hindi, Marathi, Nepali, Sanskrit, ...) |
|
| 119 |
+
| `ptocr_v5_th_mobile_rec.safetensors` | Thai, English |
|
| 120 |
+
| `ptocr_v5_el_mobile_rec.safetensors` | Greek, English |
|
| 121 |
+
| `ptocr_v5_ta_mobile_rec.safetensors` | Tamil, English |
|
| 122 |
+
| `ptocr_v5_te_mobile_rec.safetensors` | Telugu, English |
|
| 123 |
|
| 124 |
---
|
| 125 |
|
| 126 |
+
## Quick Start
|
| 127 |
|
| 128 |
+
### Download Weights
|
| 129 |
|
| 130 |
```python
|
| 131 |
from huggingface_hub import snapshot_download, hf_hub_download
|
| 132 |
|
| 133 |
+
# Option 1: download the whole repo (weights + configs + dicts + README)
|
| 134 |
repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
|
| 135 |
+
print("downloaded to:", repo_dir)
|
| 136 |
|
| 137 |
+
# Option 2: fetch a single weight file
|
| 138 |
weight_path = hf_hub_download(
|
| 139 |
repo_id="JoyCN/PaddleOCR-Pytorch",
|
| 140 |
+
filename="ptocr_v5_korean_mobile_rec.safetensors",
|
| 141 |
)
|
| 142 |
```
|
| 143 |
|
| 144 |
+
### Inference via PaddleOCR2Pytorch (Recommended)
|
| 145 |
|
| 146 |
```bash
|
| 147 |
+
# 1. clone the inference code repo
|
| 148 |
git clone https://github.com/frotms/PaddleOCR2Pytorch
|
| 149 |
cd PaddleOCR2Pytorch
|
| 150 |
pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
|
| 151 |
|
| 152 |
+
# 2. Assume you ran snapshot_download above into /path/to/hf_repo
|
| 153 |
python tools/infer/predict_rec.py \
|
| 154 |
--image_dir doc/imgs_words/korean/1.jpg \
|
| 155 |
--rec_algorithm SVTR_LCNet \
|
|
|
|
| 160 |
--use_gpu False
|
| 161 |
```
|
| 162 |
|
| 163 |
+
> PaddleOCR2Pytorch `base_ocr_v20.py` auto-detects `.safetensors` vs `.pth` by extension (backward compatible).
|
| 164 |
|
| 165 |
+
### Custom Python Inference
|
| 166 |
|
| 167 |
+
A minimal skeleton showing how to load the weights and run a forward pass. You still need the network definitions from the PaddleOCR2Pytorch `pytorchocr/modeling/` package.
|
| 168 |
|
| 169 |
```python
|
| 170 |
import sys, numpy as np, cv2, torch, yaml
|
| 171 |
from safetensors.torch import load_file
|
| 172 |
|
| 173 |
+
# Requires https://github.com/frotms/PaddleOCR2Pytorch on PYTHONPATH
|
|
|
|
| 174 |
sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
|
| 175 |
from pytorchocr.modeling.architectures.base_model import BaseModel
|
| 176 |
from pytorchocr.postprocess import build_post_process
|
| 177 |
|
| 178 |
+
HF_REPO = "/path/to/hf_repo" # the path returned by snapshot_download
|
| 179 |
yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
|
| 180 |
weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
|
| 181 |
|
| 182 |
+
# 1. load config + dictionary
|
| 183 |
with open(yml_path, encoding="utf-8") as f:
|
| 184 |
cfg = yaml.safe_load(f)
|
| 185 |
+
dict_path = cfg["Global"]["character_dict_path"] # './dicts/ppocrv5_korean_dict.txt'
|
| 186 |
dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
|
| 187 |
with open(dict_abs, encoding="utf-8") as f:
|
| 188 |
chars = [l.strip("\n\r") for l in f]
|
| 189 |
+
n_char = len(chars) + 2 # +1 blank, +1 space (if use_space_char)
|
| 190 |
|
| 191 |
+
# 2. build network + load weights (safetensors = zero-code-exec, mmap-fast)
|
| 192 |
cfg["Architecture"]["Head"]["out_channels_list"] = {
|
| 193 |
"CTCLabelDecode": n_char,
|
| 194 |
"SARLabelDecode": n_char + 2,
|
|
|
|
| 198 |
net.load_state_dict(load_file(weight_path, device="cpu"))
|
| 199 |
net.eval()
|
| 200 |
|
| 201 |
+
# 3. preprocess (resize to [3, 48, 320], normalize to [-1, 1])
|
| 202 |
img = cv2.imread("input_word.jpg")
|
| 203 |
h, w = img.shape[:2]
|
| 204 |
ratio = w / h
|
|
|
|
| 210 |
x = (x - 0.5) / 0.5
|
| 211 |
x = torch.from_numpy(x).unsqueeze(0)
|
| 212 |
|
| 213 |
+
# 4. forward + CTC decode
|
| 214 |
with torch.no_grad():
|
| 215 |
logits = net(x)
|
| 216 |
+
post_op = build_post_process({
|
| 217 |
+
"name": "CTCLabelDecode",
|
| 218 |
+
"character_dict_path": dict_abs,
|
| 219 |
+
"use_space_char": True,
|
| 220 |
+
})
|
| 221 |
result = post_op(logits)
|
| 222 |
+
print("prediction:", result) # e.g. [('바탕으로', 0.9998)]
|
| 223 |
```
|
| 224 |
|
| 225 |
+
### Runtime Dependencies
|
| 226 |
|
| 227 |
```
|
| 228 |
+
torch >= 1.13
|
| 229 |
+
safetensors >= 0.4
|
| 230 |
numpy, pillow, opencv-python
|
| 231 |
pyyaml, shapely, pyclipper
|
| 232 |
+
scikit-image # required by det post-processing
|
| 233 |
```
|
| 234 |
|
| 235 |
---
|
| 236 |
|
| 237 |
+
## Conversion & Verification
|
| 238 |
|
| 239 |
+
- **Source weights**: official PaddlePaddle `.pdparams` from
|
| 240 |
+
`https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/`
|
| 241 |
+
- **Converter**: [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) — scripts `converter/ppocr_v5_det_converter.py` / `converter/ppocr_v5_rec_converter.py`
|
| 242 |
+
- **Verification**: end-to-end inference was run on macOS Apple Silicon (M-series) CPU; multilingual rec outputs are **bit-exact** with the original PaddleOCR `.pdparams` (float32 values match to 8 decimal places).
|
| 243 |
|
| 244 |
+
Sample inference results (CPU, < 0.7 s / image):
|
| 245 |
|
| 246 |
+
| Sample | Prediction | Confidence |
|
| 247 |
|---|---|---|
|
| 248 |
+
| Chinese `word_1.jpg` | 韩国小馆 | 0.99797755 |
|
| 249 |
+
| Korean `korean/1.jpg` | 바탕으로 | 0.99977183 |
|
| 250 |
+
| French `french/1.jpg` | de l'amendement, | 0.99656343 |
|
| 251 |
+
| Arabic `arabic/ar_1.jpg` | الكيصياوي | 0.68281130 |
|
| 252 |
|
| 253 |
---
|
| 254 |
|
| 255 |
+
## Legacy Files (`legacy/`)
|
| 256 |
+
|
| 257 |
+
Older PP-OCR (v2 / v3 / v4) checkpoints previously at the repo root have been **moved into `legacy/`** for clarity. They are still present and continue to work — just add the `legacy/` prefix to your path.
|
| 258 |
+
|
| 259 |
+
If you were previously using any of these URLs at the root:
|
| 260 |
+
|
| 261 |
+
```
|
| 262 |
+
legacy/ch_ptocr_mobile_v2.0_cls_infer.pth
|
| 263 |
+
legacy/ch_ptocr_v4_det_infer.pth
|
| 264 |
+
legacy/ch_ptocr_v4_rec_infer.pth
|
| 265 |
+
legacy/en_ptocr_v3_det_infer.pth
|
| 266 |
+
legacy/en_ptocr_v4_rec_infer.pth
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
The 15 PP-OCRv5 safetensors files **remain at the repo root** — their URLs did not change.
|
| 270 |
+
|
| 271 |
+
---
|
| 272 |
+
|
| 273 |
+
## License & Credits
|
| 274 |
|
| 275 |
- **License**: Apache License 2.0
|
| 276 |
+
- Weights originate from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) by the PaddlePaddle team (Apache 2.0).
|
| 277 |
+
- Converted with [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) (Apache 2.0).
|
| 278 |
|
| 279 |
+
If this repo helps you, please also star both of those original projects.
|
| 280 |
|
| 281 |
---
|
| 282 |
|
| 283 |
+
## Citation
|
| 284 |
|
| 285 |
```bibtex
|
| 286 |
@misc{pp_ocrv5_pytorch_joycn_2025,
|
README_zh.md
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- zh
|
| 5 |
+
- en
|
| 6 |
+
- ja
|
| 7 |
+
- ko
|
| 8 |
+
- fr
|
| 9 |
+
- de
|
| 10 |
+
- es
|
| 11 |
+
- ru
|
| 12 |
+
- ar
|
| 13 |
+
- hi
|
| 14 |
+
- th
|
| 15 |
+
- el
|
| 16 |
+
library_name: pytorch
|
| 17 |
+
tags:
|
| 18 |
+
- ocr
|
| 19 |
+
- text-detection
|
| 20 |
+
- text-recognition
|
| 21 |
+
- paddleocr
|
| 22 |
+
- pp-ocrv5
|
| 23 |
+
- multilingual
|
| 24 |
+
- svtr
|
| 25 |
+
- db
|
| 26 |
+
pipeline_tag: image-to-text
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
# PP-OCRv5 PyTorch Model Zoo(中文版)
|
| 30 |
+
|
| 31 |
+
> 本仓库的主 README 为英文版 [README.md](./README.md)。本文件为中文对照版。
|
| 32 |
+
|
| 33 |
+
PP-OCRv5 全系列模型的 **PyTorch** 版本(safetensors 格式),从百度 PaddlePaddle 官方 `.pdparams` 动态图权重精确转换而来,**推理结果与 PaddleOCR 原版位精确一致**。
|
| 34 |
+
|
| 35 |
+
- **文本检测**:2 个(mobile / server)
|
| 36 |
+
- **文本识别(基础)**:2 个,覆盖 简中 / 繁中 / 英文 / 日文
|
| 37 |
+
- **文本识别(多语言)**:11 个,覆盖 100+ 语种(韩 / 法 / 德 / 俄 / 阿拉伯 / 天城文 / 泰 / 希腊 / 泰米尔 / 泰卢固 / 纯英文等)
|
| 38 |
+
|
| 39 |
+
> 本仓库**仅包含权重、配置和字典**,不包含推理代码。推理请配合 [PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 使用,或参考下文"自定义 Python 推理"章节自行集成。
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## 仓库结构
|
| 44 |
+
|
| 45 |
+
```
|
| 46 |
+
.
|
| 47 |
+
├── README.md / README_zh.md
|
| 48 |
+
├── LICENSE # Apache 2.0
|
| 49 |
+
├── config.json # 仓库元数据 + 模型索引
|
| 50 |
+
├── *.safetensors # 15 个 PP-OCRv5 权重(位于根目录,URL 稳定)
|
| 51 |
+
├── ptocr_v5_server_{det,rec}.pth # V5 服务端的 pth 副本(向后兼容保留)
|
| 52 |
+
├── configs/
|
| 53 |
+
│ ├── det/PP-OCRv5/
|
| 54 |
+
│ │ ├── PP-OCRv5_mobile_det.yml # 移动端检测
|
| 55 |
+
│ │ └── PP-OCRv5_server_det.yml # 服务端检测
|
| 56 |
+
│ └── rec/PP-OCRv5/
|
| 57 |
+
│ ├── PP-OCRv5_mobile_rec.yml # 基础识别(中繁英日,移动端)
|
| 58 |
+
│ ├── PP-OCRv5_server_rec.yml # 基础识别(中繁英日,服务端)
|
| 59 |
+
│ └── multi_language/
|
| 60 |
+
│ ├── en_PP-OCRv5_mobile_rec.yaml # 英文专用
|
| 61 |
+
│ ├── korean_PP-OCRv5_mobile_rec.yml # 韩文 + 英文
|
| 62 |
+
│ ├── latin_PP-OCRv5_mobile_rec.yml # 拉丁字母 40+ 语种(法/德/西/意/葡 等)
|
| 63 |
+
│ ├── eslav_PP-OCRv5_mobile_rec.yml # 东斯拉夫(俄/白俄/乌克兰)
|
| 64 |
+
│ ├── cyrillic_PP-OCRv5_mobile_rec.yaml # 西里尔字母 33 种
|
| 65 |
+
│ ├── arabic_PP-OCRv5_mobile_rec.yaml # 阿拉伯 / 波斯 / 维吾尔 / 乌尔都 等
|
| 66 |
+
│ ├── devanagari_PP-OCRv5_mobile_rec.yaml # 天城文系 14 种(印地/马拉地/尼泊尔/梵文 等)
|
| 67 |
+
│ ├── th_PP-OCRv5_mobile_rec.yaml # 泰文
|
| 68 |
+
│ ├── el_PP-OCRv5_mobile_rec.yaml # 希腊文
|
| 69 |
+
│ ├── ta_PP-OCRv5_mobile_rec.yaml # 泰米尔文
|
| 70 |
+
│ └── te_PP-OCRv5_mobile_rec.yaml # 泰卢固文
|
| 71 |
+
└── dicts/ # 字符集字典(rec 推理必需)
|
| 72 |
+
├── ppocrv5_dict.txt # 基础(中繁英日)
|
| 73 |
+
├── ppocrv5_en_dict.txt
|
| 74 |
+
├── ppocrv5_korean_dict.txt
|
| 75 |
+
└── ...(共 12 个)
|
| 76 |
+
|
| 77 |
+
legacy/ # 旧版本(v2/v3/v4)pth 集中目录
|
| 78 |
+
├── ch_ptocr_mobile_v2.0_cls_infer.pth
|
| 79 |
+
├── ch_ptocr_v4_det_infer.pth
|
| 80 |
+
├── ch_ptocr_v4_rec_infer.pth
|
| 81 |
+
├── en_ptocr_v3_det_infer.pth
|
| 82 |
+
└── en_ptocr_v4_rec_infer.pth
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
> 所有 rec yaml 的 `character_dict_path` 已改写为相对路径 `./dicts/...`,`git clone` 或 `snapshot_download` 下载后**无需修改路径**即可使用。
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
## 模型清单
|
| 90 |
+
|
| 91 |
+
### 文本检测
|
| 92 |
+
|
| 93 |
+
| 权重文件 | 对应 yaml | 场景 | 文件大小 |
|
| 94 |
+
|---|---|---|---|
|
| 95 |
+
| `ptocr_v5_mobile_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml` | 移动端 / CPU 推荐 | ~14 MB |
|
| 96 |
+
| `ptocr_v5_server_det.safetensors` | `configs/det/PP-OCRv5/PP-OCRv5_server_det.yml` | 服务端 / 高精度 | ~101 MB |
|
| 97 |
+
|
| 98 |
+
### 文本识别(基础)
|
| 99 |
+
|
| 100 |
+
| 权重文件 | 对应 yaml | 支持语种 | 文件大小 |
|
| 101 |
+
|---|---|---|---|
|
| 102 |
+
| `ptocr_v5_mobile_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~31 MB |
|
| 103 |
+
| `ptocr_v5_server_rec.safetensors` | `configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml` | 简中 / 繁中 / 英文 / 日文 | ~128 MB |
|
| 104 |
+
|
| 105 |
+
### 文本识别(多语言)
|
| 106 |
+
|
| 107 |
+
所有多语言识别模型共享相同网络(`SVTR_LCNet` + `PPLCNetV3`),仅字符集不同。文件大小 23–28 MB。
|
| 108 |
+
|
| 109 |
+
| 权重文件 | 支持语种 |
|
| 110 |
+
|---|---|
|
| 111 |
+
| `ptocr_v5_en_mobile_rec.safetensors` | 英文专用(针对英文场景定向优化) |
|
| 112 |
+
| `ptocr_v5_korean_mobile_rec.safetensors` | 韩文、英文 |
|
| 113 |
+
| `ptocr_v5_latin_mobile_rec.safetensors` | 法文、德文、南非荷兰文、意大利文、西班牙文、葡萄牙文、捷克文、丹麦文、爱沙尼亚文、克罗地亚文、荷兰文、挪威文、波兰文、瑞典文、芬兰文、土耳其文、越南文 等 40+ 语种 |
|
| 114 |
+
| `ptocr_v5_eslav_mobile_rec.safetensors` | 俄罗斯文、白俄罗斯文、乌克兰文、英文 |
|
| 115 |
+
| `ptocr_v5_cyrillic_mobile_rec.safetensors` | 俄文、白俄文、乌克兰文、塞尔维亚(西里尔)、保加利亚、蒙古 等 33 种西里尔字母语言 |
|
| 116 |
+
| `ptocr_v5_arabic_mobile_rec.safetensors` | 阿拉伯文、波斯文、维吾尔文、乌尔都文、普什图文、信德文 等 |
|
| 117 |
+
| `ptocr_v5_devanagari_mobile_rec.safetensors` | 印地文、马拉地文、尼泊尔文、梵文 等 14 种天城文系语言 |
|
| 118 |
+
| `ptocr_v5_th_mobile_rec.safetensors` | 泰文、英文 |
|
| 119 |
+
| `ptocr_v5_el_mobile_rec.safetensors` | 希腊文、英文 |
|
| 120 |
+
| `ptocr_v5_ta_mobile_rec.safetensors` | 泰米尔文、英文 |
|
| 121 |
+
| `ptocr_v5_te_mobile_rec.safetensors` | 泰卢固文、英文 |
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## 快速开始
|
| 126 |
+
|
| 127 |
+
### 下载权重
|
| 128 |
+
|
| 129 |
+
```python
|
| 130 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
| 131 |
+
|
| 132 |
+
# 方式 1:下载整个仓库(权重 + yml + 字典 + README)
|
| 133 |
+
repo_dir = snapshot_download(repo_id="JoyCN/PaddleOCR-Pytorch")
|
| 134 |
+
print("仓库下载到:", repo_dir)
|
| 135 |
+
|
| 136 |
+
# 方式 2:只下载单个权重文件
|
| 137 |
+
weight_path = hf_hub_download(
|
| 138 |
+
repo_id="JoyCN/PaddleOCR-Pytorch",
|
| 139 |
+
filename="ptocr_v5_korean_mobile_rec.safetensors"
|
| 140 |
+
)
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
### 使用 PaddleOCR2Pytorch 项目做推理(推荐)
|
| 144 |
+
|
| 145 |
+
```bash
|
| 146 |
+
# 1. clone 推理代码仓
|
| 147 |
+
git clone https://github.com/frotms/PaddleOCR2Pytorch
|
| 148 |
+
cd PaddleOCR2Pytorch
|
| 149 |
+
pip install torch safetensors pyyaml shapely pyclipper opencv-python pillow scikit-image
|
| 150 |
+
|
| 151 |
+
# 2. 用本仓库下载的权重 + yml(假设下载到 /path/to/hf_repo)
|
| 152 |
+
python tools/infer/predict_rec.py \
|
| 153 |
+
--image_dir doc/imgs_words/korean/1.jpg \
|
| 154 |
+
--rec_algorithm SVTR_LCNet \
|
| 155 |
+
--rec_model_path /path/to/hf_repo/ptocr_v5_korean_mobile_rec.safetensors \
|
| 156 |
+
--rec_yaml_path /path/to/hf_repo/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml \
|
| 157 |
+
--rec_image_shape "3,48,320" \
|
| 158 |
+
--rec_char_dict_path /path/to/hf_repo/dicts/ppocrv5_korean_dict.txt \
|
| 159 |
+
--use_gpu False
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
> PaddleOCR2Pytorch 的 `base_ocr_v20.py` 已原生支持 `.safetensors`(按后缀自动识别,向后兼容 `.pth`)。
|
| 163 |
+
|
| 164 |
+
### 自定义 Python 推理代码
|
| 165 |
+
|
| 166 |
+
如果你不想依赖 PaddleOCR2Pytorch 完整推理栈,下面是**一个最小 rec 推理代码片段**的骨架。它展示了如何加载权重并做前向推理——但你仍然需要 PaddleOCR2Pytorch 项目中的网络定义代码(`pytorchocr/modeling/`)。
|
| 167 |
+
|
| 168 |
+
```python
|
| 169 |
+
import sys, numpy as np, cv2, torch, yaml
|
| 170 |
+
from safetensors.torch import load_file
|
| 171 |
+
|
| 172 |
+
# 以下 import 需要你先 clone https://github.com/frotms/PaddleOCR2Pytorch
|
| 173 |
+
# 并把其根目录加入 PYTHONPATH
|
| 174 |
+
sys.path.insert(0, "/path/to/PaddleOCR2Pytorch")
|
| 175 |
+
from pytorchocr.modeling.architectures.base_model import BaseModel
|
| 176 |
+
from pytorchocr.postprocess import build_post_process
|
| 177 |
+
|
| 178 |
+
HF_REPO = "/path/to/hf_repo" # snapshot_download 得到的路径
|
| 179 |
+
yml_path = f"{HF_REPO}/configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml"
|
| 180 |
+
weight_path = f"{HF_REPO}/ptocr_v5_korean_mobile_rec.safetensors"
|
| 181 |
+
|
| 182 |
+
# 1. 读配置 + 字符集
|
| 183 |
+
with open(yml_path, encoding="utf-8") as f:
|
| 184 |
+
cfg = yaml.safe_load(f)
|
| 185 |
+
dict_path = cfg["Global"]["character_dict_path"] # './dicts/ppocrv5_korean_dict.txt'
|
| 186 |
+
dict_abs = f"{HF_REPO}/{dict_path.lstrip('./')}"
|
| 187 |
+
with open(dict_abs, encoding="utf-8") as f:
|
| 188 |
+
chars = [l.strip("\n\r") for l in f]
|
| 189 |
+
n_char = len(chars) + 2 # +1 blank, +1 space(依 use_space_char 而定)
|
| 190 |
+
|
| 191 |
+
# 2. 构建网络 + 加载权重(safetensors 零代码执行、mmap 快速加载)
|
| 192 |
+
cfg["Architecture"]["Head"]["out_channels_list"] = {
|
| 193 |
+
"CTCLabelDecode": n_char,
|
| 194 |
+
"SARLabelDecode": n_char + 2,
|
| 195 |
+
"NRTRLabelDecode": n_char + 3,
|
| 196 |
+
}
|
| 197 |
+
net = BaseModel(cfg["Architecture"], out_channels=n_char)
|
| 198 |
+
net.load_state_dict(load_file(weight_path, device="cpu"))
|
| 199 |
+
net.eval()
|
| 200 |
+
|
| 201 |
+
# 3. 读图 + 预处理(resize 到 [3, 48, 320],归一化到 [-1, 1])
|
| 202 |
+
img = cv2.imread("input_word.jpg")
|
| 203 |
+
h, w = img.shape[:2]
|
| 204 |
+
ratio = w / h
|
| 205 |
+
tw = min(int(48 * ratio), 320)
|
| 206 |
+
img = cv2.resize(img, (tw, 48))
|
| 207 |
+
canvas = np.zeros((48, 320, 3), dtype=np.uint8)
|
| 208 |
+
canvas[:, :tw] = img
|
| 209 |
+
x = canvas.astype(np.float32).transpose(2, 0, 1) / 255.0
|
| 210 |
+
x = (x - 0.5) / 0.5
|
| 211 |
+
x = torch.from_numpy(x).unsqueeze(0)
|
| 212 |
+
|
| 213 |
+
# 4. 前向 + CTC 解码
|
| 214 |
+
with torch.no_grad():
|
| 215 |
+
logits = net(x)
|
| 216 |
+
post_op = build_post_process({"name": "CTCLabelDecode",
|
| 217 |
+
"character_dict_path": dict_abs,
|
| 218 |
+
"use_space_char": True})
|
| 219 |
+
result = post_op(logits)
|
| 220 |
+
print("识别结果:", result) # e.g. [('바탕으로', 0.9998)]
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
### 推理所需依赖
|
| 224 |
+
|
| 225 |
+
```
|
| 226 |
+
torch >= 1.13
|
| 227 |
+
safetensors >= 0.4
|
| 228 |
+
numpy, pillow, opencv-python
|
| 229 |
+
pyyaml, shapely, pyclipper
|
| 230 |
+
scikit-image # det 后处理需要
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
---
|
| 234 |
+
|
| 235 |
+
## 转换 & 验证来源
|
| 236 |
+
|
| 237 |
+
- 源权重:PaddlePaddle 官方 `.pdparams`,来自 [paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/](https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/)
|
| 238 |
+
- 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch) 中的 `converter/ppocr_v5_det_converter.py` / `ppocr_v5_rec_converter.py`
|
| 239 |
+
- 验证:在 macOS Apple Silicon (M 系列) CPU 环境下做过端到端推理,**多语言识别结果与 PaddleOCR 官方 `.pdparams` 位精确一致**(float32 小数点后 8 位完全相同)
|
| 240 |
+
|
| 241 |
+
样例推理输出(CPU,<0.7 s / 张):
|
| 242 |
+
|
| 243 |
+
| 样例 | 识别结果 | 置信度 |
|
| 244 |
+
|---|---|---|
|
| 245 |
+
| 中文 `word_1.jpg` | 韩国小馆 | 0.99797755 |
|
| 246 |
+
| 韩文 `korean/1.jpg` | 바탕으로 | 0.99977183 |
|
| 247 |
+
| 法文 `french/1.jpg` | de l'amendement, | 0.99656343 |
|
| 248 |
+
| 阿拉伯 `arabic/ar_1.jpg` | الكيصياوي | 0.68281130 |
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
## Legacy 文件说明(`legacy/`)
|
| 253 |
+
|
| 254 |
+
原本放在仓库根目录的 PP-OCR v2 / v3 / v4 老版本权重,现已**统一迁移到 `legacy/` 目录**以便整理。这些文件仍然存在且可正常使用,只需在 URL 路径前面加上 `legacy/` 前缀即可:
|
| 255 |
+
|
| 256 |
+
```
|
| 257 |
+
legacy/ch_ptocr_mobile_v2.0_cls_infer.pth
|
| 258 |
+
legacy/ch_ptocr_v4_det_infer.pth
|
| 259 |
+
legacy/ch_ptocr_v4_rec_infer.pth
|
| 260 |
+
legacy/en_ptocr_v3_det_infer.pth
|
| 261 |
+
legacy/en_ptocr_v4_rec_infer.pth
|
| 262 |
+
```
|
| 263 |
+
|
| 264 |
+
**15 个 PP-OCRv5 safetensors 权重依然位于仓库根目录,URL 未变**。
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
## 许可证 & 致谢
|
| 269 |
+
|
| 270 |
+
- **License**: Apache License 2.0
|
| 271 |
+
- 权重来源:[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) by PaddlePaddle 团队,Apache 2.0
|
| 272 |
+
- 转换工具:[PaddleOCR2Pytorch](https://github.com/frotms/PaddleOCR2Pytorch),Apache 2.0
|
| 273 |
+
|
| 274 |
+
如果本仓库对你有帮助,请同时给上述两个原始项目 star 致谢。
|
| 275 |
+
|
| 276 |
+
---
|
| 277 |
+
|
| 278 |
+
## 引用
|
| 279 |
+
|
| 280 |
+
```bibtex
|
| 281 |
+
@misc{pp_ocrv5_pytorch_joycn_2025,
|
| 282 |
+
title = {PP-OCRv5 PyTorch Model Zoo},
|
| 283 |
+
author = {JoyCN},
|
| 284 |
+
howpublished = {\url{https://huggingface.co/JoyCN/PaddleOCR-Pytorch}},
|
| 285 |
+
year = {2025}
|
| 286 |
+
}
|
| 287 |
+
```
|
config.json
CHANGED
|
@@ -1,27 +1,122 @@
|
|
| 1 |
{
|
| 2 |
"library_name": "pytorch",
|
| 3 |
-
"format": "
|
| 4 |
-
"
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
"rec": {
|
| 10 |
-
"safetensors": "ptocr_v5_server_rec.safetensors",
|
| 11 |
-
"pth": "ptocr_v5_server_rec.pth"
|
| 12 |
-
}
|
| 13 |
-
},
|
| 14 |
-
"homepage": "https://github.com/frotms/PaddleOCR2Pytorch",
|
| 15 |
"created": "2025-09-16",
|
| 16 |
-
"
|
| 17 |
-
"
|
| 18 |
-
"safetensors",
|
| 19 |
-
"pth"
|
| 20 |
-
],
|
| 21 |
"compat": {
|
| 22 |
"project": "PaddleOCR2Pytorch",
|
| 23 |
"repo": "https://github.com/frotms/PaddleOCR2Pytorch",
|
| 24 |
"license": "Apache-2.0"
|
| 25 |
},
|
| 26 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"library_name": "pytorch",
|
| 3 |
+
"format": "safetensors",
|
| 4 |
+
"formats": ["safetensors", "pth"],
|
| 5 |
+
"homepage": "https://huggingface.co/JoyCN/PaddleOCR-Pytorch",
|
| 6 |
+
"source_project": "https://github.com/frotms/PaddleOCR2Pytorch",
|
| 7 |
+
"upstream_project": "https://github.com/PaddlePaddle/PaddleOCR",
|
| 8 |
+
"license": "Apache-2.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"created": "2025-09-16",
|
| 10 |
+
"updated": "2026-04-15",
|
| 11 |
+
"notes": "PP-OCRv5 full model zoo (2 det + 2 base rec + 11 multilingual rec), bit-exact conversion of official PaddlePaddle .pdparams to PyTorch. Older PP-OCR v2/v3/v4 checkpoints kept under legacy/.",
|
|
|
|
|
|
|
|
|
|
| 12 |
"compat": {
|
| 13 |
"project": "PaddleOCR2Pytorch",
|
| 14 |
"repo": "https://github.com/frotms/PaddleOCR2Pytorch",
|
| 15 |
"license": "Apache-2.0"
|
| 16 |
},
|
| 17 |
+
"models": {
|
| 18 |
+
"ppocrv5": {
|
| 19 |
+
"det": {
|
| 20 |
+
"mobile": {
|
| 21 |
+
"safetensors": "ptocr_v5_mobile_det.safetensors",
|
| 22 |
+
"yaml": "configs/det/PP-OCRv5/PP-OCRv5_mobile_det.yml"
|
| 23 |
+
},
|
| 24 |
+
"server": {
|
| 25 |
+
"safetensors": "ptocr_v5_server_det.safetensors",
|
| 26 |
+
"pth": "ptocr_v5_server_det.pth",
|
| 27 |
+
"yaml": "configs/det/PP-OCRv5/PP-OCRv5_server_det.yml"
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"rec_base": {
|
| 31 |
+
"mobile": {
|
| 32 |
+
"safetensors": "ptocr_v5_mobile_rec.safetensors",
|
| 33 |
+
"yaml": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml",
|
| 34 |
+
"dict": "dicts/ppocrv5_dict.txt",
|
| 35 |
+
"languages": ["zh-Hans", "zh-Hant", "en", "ja"]
|
| 36 |
+
},
|
| 37 |
+
"server": {
|
| 38 |
+
"safetensors": "ptocr_v5_server_rec.safetensors",
|
| 39 |
+
"pth": "ptocr_v5_server_rec.pth",
|
| 40 |
+
"yaml": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml",
|
| 41 |
+
"dict": "dicts/ppocrv5_dict.txt",
|
| 42 |
+
"languages": ["zh-Hans", "zh-Hant", "en", "ja"]
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"rec_multilingual": {
|
| 46 |
+
"en": {
|
| 47 |
+
"safetensors": "ptocr_v5_en_mobile_rec.safetensors",
|
| 48 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/en_PP-OCRv5_mobile_rec.yaml",
|
| 49 |
+
"dict": "dicts/ppocrv5_en_dict.txt",
|
| 50 |
+
"languages": ["en"]
|
| 51 |
+
},
|
| 52 |
+
"korean": {
|
| 53 |
+
"safetensors": "ptocr_v5_korean_mobile_rec.safetensors",
|
| 54 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/korean_PP-OCRv5_mobile_rec.yml",
|
| 55 |
+
"dict": "dicts/ppocrv5_korean_dict.txt",
|
| 56 |
+
"languages": ["ko", "en"]
|
| 57 |
+
},
|
| 58 |
+
"latin": {
|
| 59 |
+
"safetensors": "ptocr_v5_latin_mobile_rec.safetensors",
|
| 60 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/latin_PP-OCRv5_mobile_rec.yml",
|
| 61 |
+
"dict": "dicts/ppocrv5_latin_dict.txt",
|
| 62 |
+
"languages": ["fr", "de", "es", "it", "pt", "nl", "sv", "da", "no", "fi", "pl", "cs", "tr", "vi", "af", "bs", "cy", "et", "ga", "hr", "uz", "hu", "id", "is", "lt", "mi", "ms", "sk", "sl", "sq", "sw", "tl", "la", "az", "ku", "lv", "mt", "pi", "ro", "eu", "gl", "lb", "rm", "ca", "qu", "rs_latin", "oc"]
|
| 63 |
+
},
|
| 64 |
+
"eslav": {
|
| 65 |
+
"safetensors": "ptocr_v5_eslav_mobile_rec.safetensors",
|
| 66 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/eslav_PP-OCRv5_mobile_rec.yml",
|
| 67 |
+
"dict": "dicts/ppocrv5_eslav_dict.txt",
|
| 68 |
+
"languages": ["ru", "be", "uk", "en"]
|
| 69 |
+
},
|
| 70 |
+
"cyrillic": {
|
| 71 |
+
"safetensors": "ptocr_v5_cyrillic_mobile_rec.safetensors",
|
| 72 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/cyrillic_PP-OCRv5_mobile_rec.yaml",
|
| 73 |
+
"dict": "dicts/ppocrv5_cyrillic_dict.txt",
|
| 74 |
+
"languages": ["ru", "be", "uk", "rs_cyrillic", "bg", "mn", "kk", "ky", "tg", "mk", "tt", "cv", "ba", "mhr", "mo", "udm", "kv", "os", "bua", "xal", "tyv", "sah", "kaa", "ab", "ady", "kbd", "av", "dar", "inh", "ce", "lki", "lez", "tab", "en"]
|
| 75 |
+
},
|
| 76 |
+
"arabic": {
|
| 77 |
+
"safetensors": "ptocr_v5_arabic_mobile_rec.safetensors",
|
| 78 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/arabic_PP-OCRv5_mobile_rec.yaml",
|
| 79 |
+
"dict": "dicts/ppocrv5_arabic_dict.txt",
|
| 80 |
+
"languages": ["ar", "fa", "ug", "ur", "ps", "ku", "sd", "bal", "en"]
|
| 81 |
+
},
|
| 82 |
+
"devanagari": {
|
| 83 |
+
"safetensors": "ptocr_v5_devanagari_mobile_rec.safetensors",
|
| 84 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/devanagari_PP-OCRv5_mobile_rec.yaml",
|
| 85 |
+
"dict": "dicts/ppocrv5_devanagari_dict.txt",
|
| 86 |
+
"languages": ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc", "en"]
|
| 87 |
+
},
|
| 88 |
+
"th": {
|
| 89 |
+
"safetensors": "ptocr_v5_th_mobile_rec.safetensors",
|
| 90 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/th_PP-OCRv5_mobile_rec.yaml",
|
| 91 |
+
"dict": "dicts/ppocrv5_th_dict.txt",
|
| 92 |
+
"languages": ["th", "en"]
|
| 93 |
+
},
|
| 94 |
+
"el": {
|
| 95 |
+
"safetensors": "ptocr_v5_el_mobile_rec.safetensors",
|
| 96 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/el_PP-OCRv5_mobile_rec.yaml",
|
| 97 |
+
"dict": "dicts/ppocrv5_el_dict.txt",
|
| 98 |
+
"languages": ["el", "en"]
|
| 99 |
+
},
|
| 100 |
+
"ta": {
|
| 101 |
+
"safetensors": "ptocr_v5_ta_mobile_rec.safetensors",
|
| 102 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/ta_PP-OCRv5_mobile_rec.yaml",
|
| 103 |
+
"dict": "dicts/ppocrv5_ta_dict.txt",
|
| 104 |
+
"languages": ["ta", "en"]
|
| 105 |
+
},
|
| 106 |
+
"te": {
|
| 107 |
+
"safetensors": "ptocr_v5_te_mobile_rec.safetensors",
|
| 108 |
+
"yaml": "configs/rec/PP-OCRv5/multi_language/te_PP-OCRv5_mobile_rec.yaml",
|
| 109 |
+
"dict": "dicts/ppocrv5_te_dict.txt",
|
| 110 |
+
"languages": ["te", "en"]
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
},
|
| 115 |
+
"legacy": {
|
| 116 |
+
"ppocrv2_cls_ch": "legacy/ch_ptocr_mobile_v2.0_cls_infer.pth",
|
| 117 |
+
"ppocrv3_det_en": "legacy/en_ptocr_v3_det_infer.pth",
|
| 118 |
+
"ppocrv4_det_ch": "legacy/ch_ptocr_v4_det_infer.pth",
|
| 119 |
+
"ppocrv4_rec_ch": "legacy/ch_ptocr_v4_rec_infer.pth",
|
| 120 |
+
"ppocrv4_rec_en": "legacy/en_ptocr_v4_rec_infer.pth"
|
| 121 |
+
}
|
| 122 |
}
|
ch_ptocr_mobile_v2.0_cls_infer.pth → legacy/ch_ptocr_mobile_v2.0_cls_infer.pth
RENAMED
|
File without changes
|
ch_ptocr_v4_det_infer.pth → legacy/ch_ptocr_v4_det_infer.pth
RENAMED
|
File without changes
|
ch_ptocr_v4_rec_infer.pth → legacy/ch_ptocr_v4_rec_infer.pth
RENAMED
|
File without changes
|
en_ptocr_v3_det_infer.pth → legacy/en_ptocr_v3_det_infer.pth
RENAMED
|
File without changes
|
en_ptocr_v4_rec_infer.pth → legacy/en_ptocr_v4_rec_infer.pth
RENAMED
|
File without changes
|