longtc commited on
Commit
2b45a96
·
verified ·
1 Parent(s): 6d85abb

Upload 3 files

Browse files
PP-OCRv5_mobile_rec.yml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Global:
2
+ model_name: PP-OCRv5_mobile_rec # To use static model for inference.
3
+ debug: false
4
+ use_gpu: true
5
+ epoch_num: 75
6
+ log_smooth_window: 20
7
+ print_batch_step: 10
8
+ save_model_dir: ./output/PP-OCRv5_mobile_rec
9
+ save_epoch_step: 10
10
+ eval_batch_step: [0, 2000]
11
+ cal_metric_during_train: true
12
+ pretrained_model: D:/MyCode/Python/Model/paddleocr/rec_mv3_none_bilstm_ctc_v2.0_train/best_accuracy.pdparams
13
+ checkpoints:
14
+ save_inference_dir:
15
+ use_visualdl: false
16
+ infer_img: doc/imgs_words/ch/word_1.jpg
17
+ character_dict_path: ./ppocr/utils/dict/latin_dict.txt
18
+ max_text_length: &max_text_length 25
19
+ infer_mode: false
20
+ use_space_char: true
21
+ distributed: true
22
+ save_res_path: ./output/rec/predicts_ppocrv5.txt
23
+ d2s_train_image_shape: [3, 48, 320]
24
+
25
+
26
+ Optimizer:
27
+ name: Adam
28
+ beta1: 0.9
29
+ beta2: 0.999
30
+ lr:
31
+ name: Cosine
32
+ learning_rate: 0.0005
33
+ warmup_epoch: 5
34
+ regularizer:
35
+ name: L2
36
+ factor: 3.0e-05
37
+
38
+
39
+ Architecture:
40
+ model_type: rec
41
+ algorithm: SVTR_LCNet
42
+ Transform:
43
+ Backbone:
44
+ name: PPLCNetV3
45
+ scale: 0.95
46
+ Head:
47
+ name: MultiHead
48
+ head_list:
49
+ - CTCHead:
50
+ Neck:
51
+ name: svtr
52
+ dims: 120
53
+ depth: 2
54
+ hidden_dims: 120
55
+ kernel_size: [1, 3]
56
+ use_guide: True
57
+ Head:
58
+ fc_decay: 0.00001
59
+ - NRTRHead:
60
+ nrtr_dim: 384
61
+ max_text_length: *max_text_length
62
+
63
+ Loss:
64
+ name: MultiLoss
65
+ loss_config_list:
66
+ - CTCLoss:
67
+ - NRTRLoss:
68
+
69
+ PostProcess:
70
+ name: CTCLabelDecode
71
+
72
+ Metric:
73
+ name: RecMetric
74
+ main_indicator: acc
75
+
76
+ Train:
77
+ dataset:
78
+ name: MultiScaleDataSet
79
+ ds_width: false
80
+ data_dir: D:/MyCode/Python/Model/paddleocr/total_text/train
81
+ ext_op_transform_idx: 1
82
+ label_file_list:
83
+ - D:/MyCode/Python/Model/paddleocr/total_text/train/train_rec.txt
84
+ transforms:
85
+ - DecodeImage:
86
+ img_mode: BGR
87
+ channel_first: false
88
+ - RecConAug:
89
+ prob: 0.5
90
+ ext_data_num: 2
91
+ image_shape: [48, 320, 3]
92
+ max_text_length: *max_text_length
93
+ - RecAug:
94
+ - MultiLabelEncode:
95
+ gtc_encode: NRTRLabelEncode
96
+ - KeepKeys:
97
+ keep_keys:
98
+ - image
99
+ - label_ctc
100
+ - label_gtc
101
+ - length
102
+ - valid_ratio
103
+ sampler:
104
+ name: MultiScaleSampler
105
+ scales: [[320, 32], [320, 48], [320, 64]]
106
+ first_bs: &bs 128
107
+ fix_bs: false
108
+ divided_factor: [8, 16] # w, h
109
+ is_training: True
110
+ loader:
111
+ shuffle: true
112
+ batch_size_per_card: *bs
113
+ drop_last: true
114
+ num_workers: 8
115
+ Eval:
116
+ dataset:
117
+ name: SimpleDataSet
118
+ data_dir: D:/MyCode/Python/Model/paddleocr/total_text/test
119
+ label_file_list:
120
+ - D:/MyCode/Python/Model/paddleocr/total_text/test/test_rec.txt
121
+ transforms:
122
+ - DecodeImage:
123
+ img_mode: BGR
124
+ channel_first: false
125
+ - MultiLabelEncode:
126
+ gtc_encode: NRTRLabelEncode
127
+ - RecResizeImg:
128
+ image_shape: [3, 48, 320]
129
+ - KeepKeys:
130
+ keep_keys:
131
+ - image
132
+ - label_ctc
133
+ - label_gtc
134
+ - length
135
+ - valid_ratio
136
+ loader:
137
+ shuffle: false
138
+ drop_last: false
139
+ batch_size_per_card: 128
140
+ num_workers: 4
convert_format.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ with open(r'D:\MyCode\Python\Model\paddleocr\total_text\test\train.txt', 'r', encoding='utf-8') as f, open(r'D:\MyCode\Python\Model\paddleocr\total_text\train\train_rec.txt', 'w', encoding='utf-8') as out_f:
4
+ for line in f:
5
+ parts = line.strip().split('\t')
6
+ if len(parts) != 2:
7
+ continue # bỏ qua dòng lỗi
8
+
9
+ img_path, annotations = parts
10
+ try:
11
+ ann_list = json.loads(annotations)
12
+ for ann in ann_list:
13
+ text = ann.get("transcription", "").strip()
14
+ if text:
15
+ out_f.write(f"{img_path}\t{text}\n")
16
+ except json.JSONDecodeError:
17
+ print(f"Lỗi JSON ở dòng: {line}")
convert_format_test.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import cv2
4
+ import numpy as np
5
+
6
+ input_label_file = "D:/MyCode/Python/Model/paddleocr/total_text/test/test.txt"
7
+ image_root = "D:/MyCode/Python/Model/paddleocr/total_text/test"
8
+ output_label_file = "D:/MyCode/Python/Model/paddleocr/total_text/test/test_rec.txt"
9
+ crop_output_dir = os.path.join(image_root, "rec_crop")
10
+ os.makedirs(crop_output_dir, exist_ok=True)
11
+
12
+ with open(input_label_file, "r", encoding="utf-8") as f:
13
+ lines = f.readlines()
14
+
15
+ out_lines = []
16
+ crop_id = 0
17
+
18
+ for line in lines:
19
+ img_path_rel, anns = line.strip().split('\t')
20
+ img_path = os.path.join(image_root, img_path_rel)
21
+ anns = json.loads(anns)
22
+
23
+ if not os.path.exists(img_path):
24
+ print(f"[WARNING] Không tìm thấy ảnh: {img_path}")
25
+ continue
26
+
27
+ img = cv2.imread(img_path)
28
+ if img is None:
29
+ print(f"[WARNING] Lỗi đọc ảnh: {img_path}")
30
+ continue
31
+
32
+ height, width = img.shape[:2]
33
+
34
+ for ann in anns:
35
+ text = ann['transcription']
36
+ points = ann['points']
37
+
38
+ if text.strip().lower() == "###" or not text.strip():
39
+ continue
40
+
41
+ pts = np.array(points, dtype="float32")
42
+ x, y, w, h = cv2.boundingRect(pts.astype("int"))
43
+
44
+ # Giới hạn x, y, w, h nằm trong ảnh
45
+ x = max(0, x)
46
+ y = max(0, y)
47
+ if x + w > width or y + h > height:
48
+ print(f"[WARNING] Box vượt quá kích thước ảnh ({img_path}): x={x}, y={y}, w={w}, h={h}")
49
+ continue
50
+
51
+ cropped = img[y:y+h, x:x+w]
52
+ if cropped is None or cropped.size == 0:
53
+ print(f"[WARNING] Ảnh crop rỗng ({img_path}), bỏ qua.")
54
+ continue
55
+
56
+ crop_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_crop_{crop_id}.jpg"
57
+ crop_img_path = os.path.join(crop_output_dir, crop_img_name)
58
+ cv2.imwrite(crop_img_path, cropped)
59
+
60
+ out_line = f"rec_crop/{crop_img_name}\t{text.strip()}"
61
+ out_lines.append(out_line)
62
+ crop_id += 1
63
+
64
+ with open(output_label_file, "w", encoding="utf-8") as f:
65
+ f.write('\n'.join(out_lines))
66
+
67
+ print(f"✅ Đã tạo {len(out_lines)} mẫu recognition tại: {output_label_file}")