Spaces:
Running
Running
Delete configs
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- configs/det/dbnet/repvit_db.yml +0 -173
- configs/rec/abinet/resnet45_trans_abinet_lang.yml +0 -94
- configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml +0 -93
- configs/rec/abinet/svtrv2_abinet_lang.yml +0 -130
- configs/rec/abinet/svtrv2_abinet_wo_lang.yml +0 -128
- configs/rec/aster/resnet31_lstm_aster_tps_on.yml +0 -93
- configs/rec/aster/svtrv2_aster.yml +0 -127
- configs/rec/aster/svtrv2_aster_tps_on.yml +0 -102
- configs/rec/autostr/autostr_lstm_aster_tps_on.yml +0 -95
- configs/rec/busnet/svtrv2_busnet.yml +0 -135
- configs/rec/busnet/svtrv2_busnet_pretraining.yml +0 -134
- configs/rec/busnet/vit_busnet.yml +0 -104
- configs/rec/busnet/vit_busnet_pretraining.yml +0 -104
- configs/rec/cam/convnextv2_cam_tps_on.yml +0 -118
- configs/rec/cam/convnextv2_tiny_cam_tps_on.yml +0 -118
- configs/rec/cam/svtrv2_cam_tps_on.yml +0 -123
- configs/rec/cdistnet/resnet45_trans_cdistnet.yml +0 -93
- configs/rec/cdistnet/svtrv2_cdistnet.yml +0 -139
- configs/rec/cppd/svtr_base_cppd.yml +0 -123
- configs/rec/cppd/svtr_base_cppd_ch.yml +0 -126
- configs/rec/cppd/svtr_base_cppd_h8.yml +0 -123
- configs/rec/cppd/svtr_base_cppd_syn.yml +0 -124
- configs/rec/cppd/svtrv2_cppd.yml +0 -150
- configs/rec/dan/resnet45_fpn_dan.yml +0 -98
- configs/rec/dan/svtrv2_dan.yml +0 -130
- configs/rec/focalsvtr/focalsvtr_ctc.yml +0 -137
- configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml +0 -168
- configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml +0 -151
- configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml +0 -150
- configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml +0 -152
- configs/rec/igtr/readme.md +0 -189
- configs/rec/igtr/svtr_base_ds_igtr.yml +0 -157
- configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml +0 -133
- configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml +0 -138
- configs/rec/lpv/svtr_base_lpv.yml +0 -124
- configs/rec/lpv/svtr_base_lpv_wo_glrm.yml +0 -123
- configs/rec/lpv/svtrv2_lpv.yml +0 -147
- configs/rec/lpv/svtrv2_lpv_wo_glrm.yml +0 -146
- configs/rec/maerec/vit_nrtr.yml +0 -116
- configs/rec/matrn/resnet45_trans_matrn.yml +0 -95
- configs/rec/matrn/svtrv2_matrn.yml +0 -130
- configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml +0 -140
- configs/rec/mgpstr/vit_base_mgpstr_only_char.yml +0 -111
- configs/rec/mgpstr/vit_large_mgpstr_only_char.yml +0 -110
- configs/rec/mgpstr/vit_mgpstr.yml +0 -110
- configs/rec/mgpstr/vit_mgpstr_only_char.yml +0 -110
- configs/rec/moran/resnet31_lstm_moran.yml +0 -92
- configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml +0 -145
- configs/rec/nrtr/nrtr.yml +0 -107
- configs/rec/nrtr/svtr_base_nrtr.yml +0 -118
configs/det/dbnet/repvit_db.yml
DELETED
|
@@ -1,173 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: &epoch_num 500
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 100
|
| 6 |
-
save_model_dir: ./output/det_repsvtr_db
|
| 7 |
-
save_epoch_step: 10
|
| 8 |
-
eval_batch_step:
|
| 9 |
-
- 0
|
| 10 |
-
- 1000
|
| 11 |
-
cal_metric_during_train: false
|
| 12 |
-
checkpoints:
|
| 13 |
-
pretrained_model: openocr_det_repvit_ch.pth
|
| 14 |
-
save_inference_dir: null
|
| 15 |
-
use_visualdl: false
|
| 16 |
-
infer_img: ./testA
|
| 17 |
-
save_res_path: ./checkpoints/det_db/predicts_db.txt
|
| 18 |
-
distributed: true
|
| 19 |
-
model_type: det
|
| 20 |
-
|
| 21 |
-
Architecture:
|
| 22 |
-
algorithm: DB
|
| 23 |
-
Backbone:
|
| 24 |
-
name: RepSVTR_det
|
| 25 |
-
Neck:
|
| 26 |
-
name: RSEFPN
|
| 27 |
-
out_channels: 96
|
| 28 |
-
shortcut: True
|
| 29 |
-
Head:
|
| 30 |
-
name: DBHead
|
| 31 |
-
k: 50
|
| 32 |
-
|
| 33 |
-
# Loss:
|
| 34 |
-
# name: DBLoss
|
| 35 |
-
# balance_loss: true
|
| 36 |
-
# main_loss_type: DiceLoss
|
| 37 |
-
# alpha: 5
|
| 38 |
-
# beta: 10
|
| 39 |
-
# ohem_ratio: 3
|
| 40 |
-
|
| 41 |
-
# Optimizer:
|
| 42 |
-
# name: Adam
|
| 43 |
-
# beta1: 0.9
|
| 44 |
-
# beta2: 0.999
|
| 45 |
-
# lr:
|
| 46 |
-
# name: Cosine
|
| 47 |
-
# learning_rate: 0.001 #(8*8c)
|
| 48 |
-
# warmup_epoch: 2
|
| 49 |
-
# regularizer:
|
| 50 |
-
# name: L2
|
| 51 |
-
# factor: 5.0e-05
|
| 52 |
-
|
| 53 |
-
PostProcess:
|
| 54 |
-
name: DBPostProcess
|
| 55 |
-
thresh: 0.3
|
| 56 |
-
box_thresh: 0.6
|
| 57 |
-
max_candidates: 1000
|
| 58 |
-
unclip_ratio: 1.5
|
| 59 |
-
score_mode: 'slow'
|
| 60 |
-
|
| 61 |
-
# Metric:
|
| 62 |
-
# name: DetMetric
|
| 63 |
-
# main_indicator: hmean
|
| 64 |
-
|
| 65 |
-
# Train:
|
| 66 |
-
# dataset:
|
| 67 |
-
# name: SimpleDataSet
|
| 68 |
-
# data_dir: ./train_data/icdar2015/text_localization/
|
| 69 |
-
# label_file_list:
|
| 70 |
-
# - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
| 71 |
-
# ratio_list: [1.0]
|
| 72 |
-
# transforms:
|
| 73 |
-
# - DecodeImage:
|
| 74 |
-
# img_mode: BGR
|
| 75 |
-
# channel_first: false
|
| 76 |
-
# - DetLabelEncode: null
|
| 77 |
-
# - CopyPaste: null
|
| 78 |
-
# - IaaAugment:
|
| 79 |
-
# augmenter_args:
|
| 80 |
-
# - type: Fliplr
|
| 81 |
-
# args:
|
| 82 |
-
# p: 0.5
|
| 83 |
-
# - type: Affine
|
| 84 |
-
# args:
|
| 85 |
-
# rotate:
|
| 86 |
-
# - -10
|
| 87 |
-
# - 10
|
| 88 |
-
# - type: Resize
|
| 89 |
-
# args:
|
| 90 |
-
# size:
|
| 91 |
-
# - 0.5
|
| 92 |
-
# - 3
|
| 93 |
-
# - EastRandomCropData:
|
| 94 |
-
# size:
|
| 95 |
-
# - 640
|
| 96 |
-
# - 640
|
| 97 |
-
# max_tries: 50
|
| 98 |
-
# keep_ratio: true
|
| 99 |
-
# - MakeBorderMap:
|
| 100 |
-
# shrink_ratio: 0.4
|
| 101 |
-
# thresh_min: 0.3
|
| 102 |
-
# thresh_max: 0.7
|
| 103 |
-
# total_epoch: *epoch_num
|
| 104 |
-
# - MakeShrinkMap:
|
| 105 |
-
# shrink_ratio: 0.4
|
| 106 |
-
# min_text_size: 8
|
| 107 |
-
# total_epoch: *epoch_num
|
| 108 |
-
# - NormalizeImage:
|
| 109 |
-
# scale: 1./255.
|
| 110 |
-
# mean:
|
| 111 |
-
# - 0.485
|
| 112 |
-
# - 0.456
|
| 113 |
-
# - 0.406
|
| 114 |
-
# std:
|
| 115 |
-
# - 0.229
|
| 116 |
-
# - 0.224
|
| 117 |
-
# - 0.225
|
| 118 |
-
# order: hwc
|
| 119 |
-
# - ToCHWImage: null
|
| 120 |
-
# - KeepKeys:
|
| 121 |
-
# keep_keys:
|
| 122 |
-
# - image
|
| 123 |
-
# - threshold_map
|
| 124 |
-
# - threshold_mask
|
| 125 |
-
# - shrink_map
|
| 126 |
-
# - shrink_mask
|
| 127 |
-
# loader:
|
| 128 |
-
# shuffle: true
|
| 129 |
-
# drop_last: false
|
| 130 |
-
# batch_size_per_card: 8
|
| 131 |
-
# num_workers: 8
|
| 132 |
-
|
| 133 |
-
Eval:
|
| 134 |
-
dataset:
|
| 135 |
-
name: SimpleDataSet
|
| 136 |
-
data_dir: ./train_data/icdar2015/text_localization/
|
| 137 |
-
label_file_list:
|
| 138 |
-
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
| 139 |
-
transforms:
|
| 140 |
-
- DecodeImage:
|
| 141 |
-
img_mode: BGR
|
| 142 |
-
channel_first: false
|
| 143 |
-
- DetLabelEncode: null
|
| 144 |
-
- DetResizeForTest:
|
| 145 |
-
# image_shape: [1280, 1280]
|
| 146 |
-
# keep_ratio: True
|
| 147 |
-
# padding: True
|
| 148 |
-
limit_side_len: 960
|
| 149 |
-
limit_type: max
|
| 150 |
-
- NormalizeImage:
|
| 151 |
-
scale: 1./255.
|
| 152 |
-
mean:
|
| 153 |
-
- 0.485
|
| 154 |
-
- 0.456
|
| 155 |
-
- 0.406
|
| 156 |
-
std:
|
| 157 |
-
- 0.229
|
| 158 |
-
- 0.224
|
| 159 |
-
- 0.225
|
| 160 |
-
order: hwc
|
| 161 |
-
- ToCHWImage: null
|
| 162 |
-
- KeepKeys:
|
| 163 |
-
keep_keys:
|
| 164 |
-
- image
|
| 165 |
-
- shape
|
| 166 |
-
- polys
|
| 167 |
-
- ignore_tags
|
| 168 |
-
loader:
|
| 169 |
-
shuffle: false
|
| 170 |
-
drop_last: false
|
| 171 |
-
batch_size_per_card: 1
|
| 172 |
-
num_workers: 2
|
| 173 |
-
profiler_options: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/abinet/resnet45_trans_abinet_lang.yml
DELETED
|
@@ -1,94 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet45_trans_abinet_lang/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
# ./openocr_nolang_abinet_lang.pth
|
| 12 |
-
checkpoints:
|
| 13 |
-
use_tensorboard: false
|
| 14 |
-
infer_img:
|
| 15 |
-
# for data or label process
|
| 16 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: 25
|
| 18 |
-
use_space_char: False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_resnet45_trans_abinet_lang.txt
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
use_amp: True
|
| 22 |
-
|
| 23 |
-
Optimizer:
|
| 24 |
-
name: Adam
|
| 25 |
-
lr: 0.000267
|
| 26 |
-
weight_decay: 0.0
|
| 27 |
-
filter_bias_and_bn: False
|
| 28 |
-
|
| 29 |
-
LRScheduler:
|
| 30 |
-
name: MultiStepLR
|
| 31 |
-
milestones: [12]
|
| 32 |
-
gamma: 0.1
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: ABINet
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: ResNet45
|
| 40 |
-
in_channels: 3
|
| 41 |
-
strides: [2, 1, 2, 1, 1]
|
| 42 |
-
Decoder:
|
| 43 |
-
name: ABINetDecoder
|
| 44 |
-
iter_size: 3
|
| 45 |
-
|
| 46 |
-
Loss:
|
| 47 |
-
name: ABINetLoss
|
| 48 |
-
|
| 49 |
-
PostProcess:
|
| 50 |
-
name: ABINetLabelDecode
|
| 51 |
-
|
| 52 |
-
Metric:
|
| 53 |
-
name: RecMetric
|
| 54 |
-
main_indicator: acc
|
| 55 |
-
is_filter: True
|
| 56 |
-
|
| 57 |
-
Train:
|
| 58 |
-
dataset:
|
| 59 |
-
name: LMDBDataSet
|
| 60 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 61 |
-
transforms:
|
| 62 |
-
- DecodeImagePIL: # load image
|
| 63 |
-
img_mode: RGB
|
| 64 |
-
- PARSeqAugPIL:
|
| 65 |
-
- ABINetLabelEncode:
|
| 66 |
-
- RecTVResize:
|
| 67 |
-
image_shape: [32, 128]
|
| 68 |
-
padding: False
|
| 69 |
-
- KeepKeys:
|
| 70 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 71 |
-
loader:
|
| 72 |
-
shuffle: True
|
| 73 |
-
batch_size_per_card: 256
|
| 74 |
-
drop_last: True
|
| 75 |
-
num_workers: 4
|
| 76 |
-
|
| 77 |
-
Eval:
|
| 78 |
-
dataset:
|
| 79 |
-
name: LMDBDataSet
|
| 80 |
-
data_dir: ../evaluation
|
| 81 |
-
transforms:
|
| 82 |
-
- DecodeImagePIL: # load image
|
| 83 |
-
img_mode: RGB
|
| 84 |
-
- ABINetLabelEncode:
|
| 85 |
-
- RecTVResize:
|
| 86 |
-
image_shape: [32, 128]
|
| 87 |
-
padding: False
|
| 88 |
-
- KeepKeys:
|
| 89 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 90 |
-
loader:
|
| 91 |
-
shuffle: False
|
| 92 |
-
drop_last: False
|
| 93 |
-
batch_size_per_card: 256
|
| 94 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/abinet/resnet45_trans_abinet_wo_lang.yml
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet45_trans_abinet_wo_lang/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_resnet45_trans_abinet_wo_lang.txt
|
| 19 |
-
grad_clip_val: 20
|
| 20 |
-
use_amp: True
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.000267
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: MultiStepLR
|
| 30 |
-
milestones: [12]
|
| 31 |
-
gamma: 0.1
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: ABINet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ResNet45
|
| 39 |
-
in_channels: 3
|
| 40 |
-
strides: [2, 1, 2, 1, 1]
|
| 41 |
-
Decoder:
|
| 42 |
-
name: ABINetDecoder
|
| 43 |
-
iter_size: 0
|
| 44 |
-
|
| 45 |
-
Loss:
|
| 46 |
-
name: ABINetLoss
|
| 47 |
-
|
| 48 |
-
PostProcess:
|
| 49 |
-
name: ABINetLabelDecode
|
| 50 |
-
|
| 51 |
-
Metric:
|
| 52 |
-
name: RecMetric
|
| 53 |
-
main_indicator: acc
|
| 54 |
-
is_filter: True
|
| 55 |
-
|
| 56 |
-
Train:
|
| 57 |
-
dataset:
|
| 58 |
-
name: LMDBDataSet
|
| 59 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 60 |
-
transforms:
|
| 61 |
-
- DecodeImagePIL: # load image
|
| 62 |
-
img_mode: RGB
|
| 63 |
-
- PARSeqAugPIL:
|
| 64 |
-
- ABINetLabelEncode:
|
| 65 |
-
- RecTVResize:
|
| 66 |
-
image_shape: [32, 128]
|
| 67 |
-
padding: False
|
| 68 |
-
- KeepKeys:
|
| 69 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 70 |
-
loader:
|
| 71 |
-
shuffle: True
|
| 72 |
-
batch_size_per_card: 256
|
| 73 |
-
drop_last: True
|
| 74 |
-
num_workers: 4
|
| 75 |
-
|
| 76 |
-
Eval:
|
| 77 |
-
dataset:
|
| 78 |
-
name: LMDBDataSet
|
| 79 |
-
data_dir: ../evaluation
|
| 80 |
-
transforms:
|
| 81 |
-
- DecodeImagePIL: # load image
|
| 82 |
-
img_mode: RGB
|
| 83 |
-
- ABINetLabelEncode:
|
| 84 |
-
- RecTVResize:
|
| 85 |
-
image_shape: [32, 128]
|
| 86 |
-
padding: False
|
| 87 |
-
- KeepKeys:
|
| 88 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 89 |
-
loader:
|
| 90 |
-
shuffle: False
|
| 91 |
-
drop_last: False
|
| 92 |
-
batch_size_per_card: 256
|
| 93 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/abinet/svtrv2_abinet_lang.yml
DELETED
|
@@ -1,130 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_abinet_lang/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
# ./openocr_svtrv2_nolang_abinet_lang.pth
|
| 12 |
-
checkpoints:
|
| 13 |
-
use_tensorboard: false
|
| 14 |
-
infer_img:
|
| 15 |
-
# for data or label process
|
| 16 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: 25
|
| 18 |
-
use_space_char: False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_abinet_lang.txt
|
| 20 |
-
use_amp: True
|
| 21 |
-
grad_clip_val: 20
|
| 22 |
-
|
| 23 |
-
Optimizer:
|
| 24 |
-
name: AdamW
|
| 25 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 26 |
-
weight_decay: 0.05
|
| 27 |
-
filter_bias_and_bn: True
|
| 28 |
-
|
| 29 |
-
LRScheduler:
|
| 30 |
-
name: OneCycleLR
|
| 31 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 32 |
-
cycle_momentum: False
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: ABINet
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: SVTRv2LNConvTwo33
|
| 40 |
-
use_pos_embed: False
|
| 41 |
-
dims: [128, 256, 384]
|
| 42 |
-
depths: [6, 6, 6]
|
| 43 |
-
num_heads: [4, 8, 12]
|
| 44 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 45 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 46 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 47 |
-
last_stage: false
|
| 48 |
-
feat2d: True
|
| 49 |
-
Decoder:
|
| 50 |
-
name: ABINetDecoder
|
| 51 |
-
iter_size: 3
|
| 52 |
-
num_layers: 0
|
| 53 |
-
|
| 54 |
-
Loss:
|
| 55 |
-
name: ABINetLoss
|
| 56 |
-
|
| 57 |
-
PostProcess:
|
| 58 |
-
name: ABINetLabelDecode
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: RatioDataSetTVResize
|
| 68 |
-
ds_width: True
|
| 69 |
-
padding: false
|
| 70 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 75 |
-
]
|
| 76 |
-
transforms:
|
| 77 |
-
- DecodeImagePIL: # load image
|
| 78 |
-
img_mode: RGB
|
| 79 |
-
- PARSeqAugPIL:
|
| 80 |
-
- ABINetLabelEncode:
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 83 |
-
sampler:
|
| 84 |
-
name: RatioSampler
|
| 85 |
-
scales: [[128, 32]] # w, h
|
| 86 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 87 |
-
first_bs: &bs 256
|
| 88 |
-
fix_bs: false
|
| 89 |
-
divided_factor: [4, 16] # w, h
|
| 90 |
-
is_training: True
|
| 91 |
-
loader:
|
| 92 |
-
shuffle: True
|
| 93 |
-
batch_size_per_card: *bs
|
| 94 |
-
drop_last: True
|
| 95 |
-
max_ratio: &max_ratio 4
|
| 96 |
-
num_workers: 4
|
| 97 |
-
|
| 98 |
-
Eval:
|
| 99 |
-
dataset:
|
| 100 |
-
name: RatioDataSetTVResize
|
| 101 |
-
ds_width: True
|
| 102 |
-
padding: False
|
| 103 |
-
data_dir_list: [
|
| 104 |
-
'../evaluation/CUTE80',
|
| 105 |
-
'../evaluation/IC13_857',
|
| 106 |
-
'../evaluation/IC15_1811',
|
| 107 |
-
'../evaluation/IIIT5k',
|
| 108 |
-
'../evaluation/SVT',
|
| 109 |
-
'../evaluation/SVTP',
|
| 110 |
-
]
|
| 111 |
-
transforms:
|
| 112 |
-
- DecodeImagePIL: # load image
|
| 113 |
-
img_mode: RGB
|
| 114 |
-
- ABINetLabelEncode:
|
| 115 |
-
- KeepKeys:
|
| 116 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 117 |
-
sampler:
|
| 118 |
-
name: RatioSampler
|
| 119 |
-
scales: [[128, 32]] # w, h
|
| 120 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 121 |
-
first_bs: *bs
|
| 122 |
-
fix_bs: false
|
| 123 |
-
divided_factor: [4, 16] # w, h
|
| 124 |
-
is_training: False
|
| 125 |
-
loader:
|
| 126 |
-
shuffle: False
|
| 127 |
-
drop_last: False
|
| 128 |
-
batch_size_per_card: *bs
|
| 129 |
-
max_ratio: *max_ratio
|
| 130 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/abinet/svtrv2_abinet_wo_lang.yml
DELETED
|
@@ -1,128 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_abinet_wo_lang/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_abinet_wo_lang.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: ABINet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: SVTRv2LNConvTwo33
|
| 39 |
-
use_pos_embed: False
|
| 40 |
-
dims: [128, 256, 384]
|
| 41 |
-
depths: [6, 6, 6]
|
| 42 |
-
num_heads: [4, 8, 12]
|
| 43 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 44 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 45 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 46 |
-
last_stage: false
|
| 47 |
-
feat2d: True
|
| 48 |
-
Decoder:
|
| 49 |
-
name: ABINetDecoder
|
| 50 |
-
iter_size: 0
|
| 51 |
-
num_layers: 0
|
| 52 |
-
Loss:
|
| 53 |
-
name: ABINetLoss
|
| 54 |
-
|
| 55 |
-
PostProcess:
|
| 56 |
-
name: ABINetLabelDecode
|
| 57 |
-
|
| 58 |
-
Metric:
|
| 59 |
-
name: RecMetric
|
| 60 |
-
main_indicator: acc
|
| 61 |
-
is_filter: True
|
| 62 |
-
|
| 63 |
-
Train:
|
| 64 |
-
dataset:
|
| 65 |
-
name: RatioDataSetTVResize
|
| 66 |
-
ds_width: True
|
| 67 |
-
padding: false
|
| 68 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 69 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 70 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 73 |
-
]
|
| 74 |
-
transforms:
|
| 75 |
-
- DecodeImagePIL: # load image
|
| 76 |
-
img_mode: RGB
|
| 77 |
-
- PARSeqAugPIL:
|
| 78 |
-
- ABINetLabelEncode:
|
| 79 |
-
- KeepKeys:
|
| 80 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 81 |
-
sampler:
|
| 82 |
-
name: RatioSampler
|
| 83 |
-
scales: [[128, 32]] # w, h
|
| 84 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 85 |
-
first_bs: &bs 256
|
| 86 |
-
fix_bs: false
|
| 87 |
-
divided_factor: [4, 16] # w, h
|
| 88 |
-
is_training: True
|
| 89 |
-
loader:
|
| 90 |
-
shuffle: True
|
| 91 |
-
batch_size_per_card: *bs
|
| 92 |
-
drop_last: True
|
| 93 |
-
max_ratio: &max_ratio 4
|
| 94 |
-
num_workers: 4
|
| 95 |
-
|
| 96 |
-
Eval:
|
| 97 |
-
dataset:
|
| 98 |
-
name: RatioDataSetTVResize
|
| 99 |
-
ds_width: True
|
| 100 |
-
padding: False
|
| 101 |
-
data_dir_list: [
|
| 102 |
-
'../evaluation/CUTE80',
|
| 103 |
-
'../evaluation/IC13_857',
|
| 104 |
-
'../evaluation/IC15_1811',
|
| 105 |
-
'../evaluation/IIIT5k',
|
| 106 |
-
'../evaluation/SVT',
|
| 107 |
-
'../evaluation/SVTP',
|
| 108 |
-
]
|
| 109 |
-
transforms:
|
| 110 |
-
- DecodeImagePIL: # load image
|
| 111 |
-
img_mode: RGB
|
| 112 |
-
- ABINetLabelEncode:
|
| 113 |
-
- KeepKeys:
|
| 114 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 115 |
-
sampler:
|
| 116 |
-
name: RatioSampler
|
| 117 |
-
scales: [[128, 32]] # w, h
|
| 118 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 119 |
-
first_bs: *bs
|
| 120 |
-
fix_bs: false
|
| 121 |
-
divided_factor: [4, 16] # w, h
|
| 122 |
-
is_training: False
|
| 123 |
-
loader:
|
| 124 |
-
shuffle: False
|
| 125 |
-
drop_last: False
|
| 126 |
-
batch_size_per_card: *bs
|
| 127 |
-
max_ratio: *max_ratio
|
| 128 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/aster/resnet31_lstm_aster_tps_on.yml
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet31_lstm_aster_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/predicts_aster_tps.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 1.0
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.002 # for 1gpus bs1024/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: aster
|
| 36 |
-
Transform:
|
| 37 |
-
name: Aster_TPS
|
| 38 |
-
tps_inputsize: [32, 64]
|
| 39 |
-
tps_outputsize: [32, 128]
|
| 40 |
-
Encoder:
|
| 41 |
-
name: ResNet_ASTER
|
| 42 |
-
Decoder:
|
| 43 |
-
name: ASTERDecoder
|
| 44 |
-
|
| 45 |
-
Loss:
|
| 46 |
-
name: ARLoss
|
| 47 |
-
|
| 48 |
-
Metric:
|
| 49 |
-
name: RecMetric
|
| 50 |
-
main_indicator: acc
|
| 51 |
-
is_filter: True
|
| 52 |
-
|
| 53 |
-
PostProcess:
|
| 54 |
-
name: ARLabelDecode
|
| 55 |
-
|
| 56 |
-
Train:
|
| 57 |
-
dataset:
|
| 58 |
-
name: LMDBDataSet
|
| 59 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 60 |
-
transforms:
|
| 61 |
-
- DecodeImagePIL: # load image
|
| 62 |
-
img_mode: RGB
|
| 63 |
-
- PARSeqAugPIL:
|
| 64 |
-
- ARLabelEncode: # Class handling label
|
| 65 |
-
- RecTVResize:
|
| 66 |
-
image_shape: [64, 256]
|
| 67 |
-
padding: False
|
| 68 |
-
- KeepKeys:
|
| 69 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 70 |
-
loader:
|
| 71 |
-
shuffle: True
|
| 72 |
-
batch_size_per_card: 1024
|
| 73 |
-
drop_last: True
|
| 74 |
-
num_workers: 4
|
| 75 |
-
|
| 76 |
-
Eval:
|
| 77 |
-
dataset:
|
| 78 |
-
name: LMDBDataSet
|
| 79 |
-
data_dir: ../evaluation
|
| 80 |
-
transforms:
|
| 81 |
-
- DecodeImagePIL: # load image
|
| 82 |
-
img_mode: RGB
|
| 83 |
-
- ARLabelEncode: # Class handling label
|
| 84 |
-
- RecTVResize:
|
| 85 |
-
image_shape: [64, 256]
|
| 86 |
-
padding: False
|
| 87 |
-
- KeepKeys:
|
| 88 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 89 |
-
loader:
|
| 90 |
-
shuffle: False
|
| 91 |
-
drop_last: False
|
| 92 |
-
batch_size_per_card: 256
|
| 93 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/aster/svtrv2_aster.yml
DELETED
|
@@ -1,127 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_aster
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_aster.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
|
| 27 |
-
LRScheduler:
|
| 28 |
-
name: OneCycleLR
|
| 29 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 30 |
-
cycle_momentum: False
|
| 31 |
-
|
| 32 |
-
Architecture:
|
| 33 |
-
model_type: rec
|
| 34 |
-
algorithm: aster
|
| 35 |
-
Transform:
|
| 36 |
-
Encoder:
|
| 37 |
-
name: SVTRv2LNConvTwo33
|
| 38 |
-
use_pos_embed: False
|
| 39 |
-
out_channels: 256
|
| 40 |
-
dims: [128, 256, 384]
|
| 41 |
-
depths: [6, 6, 6]
|
| 42 |
-
num_heads: [4, 8, 12]
|
| 43 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 44 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 45 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 46 |
-
last_stage: false
|
| 47 |
-
feat2d: False
|
| 48 |
-
Decoder:
|
| 49 |
-
name: ASTERDecoder
|
| 50 |
-
|
| 51 |
-
Loss:
|
| 52 |
-
name: ARLoss
|
| 53 |
-
|
| 54 |
-
Metric:
|
| 55 |
-
name: RecMetric
|
| 56 |
-
main_indicator: acc
|
| 57 |
-
is_filter: True
|
| 58 |
-
|
| 59 |
-
PostProcess:
|
| 60 |
-
name: ARLabelDecode
|
| 61 |
-
|
| 62 |
-
Train:
|
| 63 |
-
dataset:
|
| 64 |
-
name: RatioDataSetTVResize
|
| 65 |
-
ds_width: True
|
| 66 |
-
padding: false
|
| 67 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 68 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 69 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 70 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 72 |
-
]
|
| 73 |
-
transforms:
|
| 74 |
-
- DecodeImagePIL: # load image
|
| 75 |
-
img_mode: RGB
|
| 76 |
-
- PARSeqAugPIL:
|
| 77 |
-
- ARLabelEncode: # Class handling label
|
| 78 |
-
- KeepKeys:
|
| 79 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 80 |
-
sampler:
|
| 81 |
-
name: RatioSampler
|
| 82 |
-
scales: [[128, 32]] # w, h
|
| 83 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 84 |
-
first_bs: &bs 256
|
| 85 |
-
fix_bs: false
|
| 86 |
-
divided_factor: [4, 16] # w, h
|
| 87 |
-
is_training: True
|
| 88 |
-
loader:
|
| 89 |
-
shuffle: True
|
| 90 |
-
batch_size_per_card: *bs
|
| 91 |
-
drop_last: True
|
| 92 |
-
max_ratio: &max_ratio 4
|
| 93 |
-
num_workers: 4
|
| 94 |
-
|
| 95 |
-
Eval:
|
| 96 |
-
dataset:
|
| 97 |
-
name: RatioDataSetTVResize
|
| 98 |
-
ds_width: True
|
| 99 |
-
padding: False
|
| 100 |
-
data_dir_list: [
|
| 101 |
-
'../evaluation/CUTE80',
|
| 102 |
-
'../evaluation/IC13_857',
|
| 103 |
-
'../evaluation/IC15_1811',
|
| 104 |
-
'../evaluation/IIIT5k',
|
| 105 |
-
'../evaluation/SVT',
|
| 106 |
-
'../evaluation/SVTP',
|
| 107 |
-
]
|
| 108 |
-
transforms:
|
| 109 |
-
- DecodeImagePIL: # load image
|
| 110 |
-
img_mode: RGB
|
| 111 |
-
- ARLabelEncode: # Class handling label
|
| 112 |
-
- KeepKeys:
|
| 113 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 114 |
-
sampler:
|
| 115 |
-
name: RatioSampler
|
| 116 |
-
scales: [[128, 32]] # w, h
|
| 117 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 118 |
-
first_bs: *bs
|
| 119 |
-
fix_bs: false
|
| 120 |
-
divided_factor: [4, 16] # w, h
|
| 121 |
-
is_training: False
|
| 122 |
-
loader:
|
| 123 |
-
shuffle: False
|
| 124 |
-
drop_last: False
|
| 125 |
-
batch_size_per_card: *bs
|
| 126 |
-
max_ratio: *max_ratio
|
| 127 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/aster/svtrv2_aster_tps_on.yml
DELETED
|
@@ -1,102 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_aster_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_aster_tps_on.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
|
| 27 |
-
LRScheduler:
|
| 28 |
-
name: OneCycleLR
|
| 29 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 30 |
-
cycle_momentum: False
|
| 31 |
-
|
| 32 |
-
Architecture:
|
| 33 |
-
model_type: rec
|
| 34 |
-
algorithm: aster
|
| 35 |
-
Transform:
|
| 36 |
-
name: Aster_TPS
|
| 37 |
-
tps_inputsize: [32, 64]
|
| 38 |
-
tps_outputsize: [32, 128]
|
| 39 |
-
Encoder:
|
| 40 |
-
name: SVTRv2LNConvTwo33
|
| 41 |
-
use_pos_embed: False
|
| 42 |
-
out_channels: 256
|
| 43 |
-
dims: [128, 256, 384]
|
| 44 |
-
depths: [6, 6, 6]
|
| 45 |
-
num_heads: [4, 8, 12]
|
| 46 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 47 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 48 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 49 |
-
last_stage: false
|
| 50 |
-
feat2d: False
|
| 51 |
-
Decoder:
|
| 52 |
-
name: ASTERDecoder
|
| 53 |
-
|
| 54 |
-
Loss:
|
| 55 |
-
name: ARLoss
|
| 56 |
-
|
| 57 |
-
Metric:
|
| 58 |
-
name: RecMetric
|
| 59 |
-
main_indicator: acc
|
| 60 |
-
is_filter: True
|
| 61 |
-
|
| 62 |
-
PostProcess:
|
| 63 |
-
name: ARLabelDecode
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- ARLabelEncode: # Class handling label
|
| 74 |
-
- RecTVResize:
|
| 75 |
-
image_shape: [64, 256]
|
| 76 |
-
padding: False
|
| 77 |
-
- KeepKeys:
|
| 78 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 79 |
-
loader:
|
| 80 |
-
shuffle: True
|
| 81 |
-
batch_size_per_card: 256
|
| 82 |
-
drop_last: True
|
| 83 |
-
num_workers: 4
|
| 84 |
-
|
| 85 |
-
Eval:
|
| 86 |
-
dataset:
|
| 87 |
-
name: LMDBDataSet
|
| 88 |
-
data_dir: ../evaluation
|
| 89 |
-
transforms:
|
| 90 |
-
- DecodeImagePIL: # load image
|
| 91 |
-
img_mode: RGB
|
| 92 |
-
- ARLabelEncode: # Class handling label
|
| 93 |
-
- RecTVResize:
|
| 94 |
-
image_shape: [64, 256]
|
| 95 |
-
padding: False
|
| 96 |
-
- KeepKeys:
|
| 97 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 98 |
-
loader:
|
| 99 |
-
shuffle: False
|
| 100 |
-
drop_last: False
|
| 101 |
-
batch_size_per_card: 256
|
| 102 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/autostr/autostr_lstm_aster_tps_on.yml
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/autostr_lstm_aster_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_autostr_lstm_aster_tps_on.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 1.0
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.002 # for 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: autostr
|
| 36 |
-
Transform:
|
| 37 |
-
name: Aster_TPS
|
| 38 |
-
tps_inputsize: [32, 64]
|
| 39 |
-
tps_outputsize: [32, 128]
|
| 40 |
-
Encoder:
|
| 41 |
-
name: AutoSTREncoder
|
| 42 |
-
stride_stages: '[(2, 2), (2, 1), (2, 2), (2, 1), (2, 1)]'
|
| 43 |
-
conv_op_ids: [2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 4, 1, 1, 6, 6]
|
| 44 |
-
Decoder:
|
| 45 |
-
name: ASTERDecoder
|
| 46 |
-
|
| 47 |
-
Loss:
|
| 48 |
-
name: ARLoss
|
| 49 |
-
|
| 50 |
-
Metric:
|
| 51 |
-
name: RecMetric
|
| 52 |
-
main_indicator: acc
|
| 53 |
-
is_filter: True
|
| 54 |
-
|
| 55 |
-
PostProcess:
|
| 56 |
-
name: ARLabelDecode
|
| 57 |
-
|
| 58 |
-
Train:
|
| 59 |
-
dataset:
|
| 60 |
-
name: LMDBDataSet
|
| 61 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 62 |
-
transforms:
|
| 63 |
-
- DecodeImagePIL: # load image
|
| 64 |
-
img_mode: RGB
|
| 65 |
-
- PARSeqAugPIL:
|
| 66 |
-
- ARLabelEncode: # Class handling label
|
| 67 |
-
- RecTVResize:
|
| 68 |
-
image_shape: [64, 256]
|
| 69 |
-
padding: False
|
| 70 |
-
- KeepKeys:
|
| 71 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 72 |
-
loader:
|
| 73 |
-
shuffle: True
|
| 74 |
-
batch_size_per_card: 256
|
| 75 |
-
drop_last: True
|
| 76 |
-
num_workers: 4
|
| 77 |
-
|
| 78 |
-
Eval:
|
| 79 |
-
dataset:
|
| 80 |
-
name: LMDBDataSet
|
| 81 |
-
data_dir: ../evaluation
|
| 82 |
-
transforms:
|
| 83 |
-
- DecodeImagePIL: # load image
|
| 84 |
-
img_mode: RGB
|
| 85 |
-
- ARLabelEncode: # Class handling label
|
| 86 |
-
- RecTVResize:
|
| 87 |
-
image_shape: [64, 256]
|
| 88 |
-
padding: False
|
| 89 |
-
- KeepKeys:
|
| 90 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 91 |
-
loader:
|
| 92 |
-
shuffle: False
|
| 93 |
-
drop_last: False
|
| 94 |
-
batch_size_per_card: 256
|
| 95 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/busnet/svtrv2_busnet.yml
DELETED
|
@@ -1,135 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 10
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_busnet/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
# ./output/rec/u14m_filter/svtrv2_busnet_pretraining/best.pth
|
| 12 |
-
checkpoints:
|
| 13 |
-
use_tensorboard: false
|
| 14 |
-
infer_img:
|
| 15 |
-
# for data or label process
|
| 16 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: 25
|
| 18 |
-
use_space_char: False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_busnet.txt
|
| 20 |
-
use_amp: True
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065 # 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: BUSBet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: SVTRv2LNConvTwo33
|
| 39 |
-
use_pos_embed: False
|
| 40 |
-
dims: [128, 256, 384]
|
| 41 |
-
depths: [6, 6, 6]
|
| 42 |
-
num_heads: [4, 8, 12]
|
| 43 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 44 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 45 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 46 |
-
last_stage: false
|
| 47 |
-
feat2d: False
|
| 48 |
-
Decoder:
|
| 49 |
-
name: BUSDecoder
|
| 50 |
-
nhead: 6
|
| 51 |
-
num_layers: 6
|
| 52 |
-
dim_feedforward: 1536
|
| 53 |
-
ignore_index: &ignore_index 100
|
| 54 |
-
pretraining: False
|
| 55 |
-
# return_id: 2
|
| 56 |
-
Loss:
|
| 57 |
-
name: ABINetLoss
|
| 58 |
-
ignore_index: *ignore_index
|
| 59 |
-
|
| 60 |
-
PostProcess:
|
| 61 |
-
name: ABINetLabelDecode
|
| 62 |
-
|
| 63 |
-
Metric:
|
| 64 |
-
name: RecMetric
|
| 65 |
-
main_indicator: acc
|
| 66 |
-
is_filter: True
|
| 67 |
-
|
| 68 |
-
Train:
|
| 69 |
-
dataset:
|
| 70 |
-
name: RatioDataSetTVResize
|
| 71 |
-
ds_width: True
|
| 72 |
-
padding: false
|
| 73 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 75 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 76 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 77 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 78 |
-
]
|
| 79 |
-
transforms:
|
| 80 |
-
- DecodeImagePIL: # load image
|
| 81 |
-
img_mode: RGB
|
| 82 |
-
- PARSeqAugPIL:
|
| 83 |
-
- ABINetLabelEncode:
|
| 84 |
-
ignore_index: *ignore_index
|
| 85 |
-
- KeepKeys:
|
| 86 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 87 |
-
sampler:
|
| 88 |
-
name: RatioSampler
|
| 89 |
-
scales: [[128, 32]] # w, h
|
| 90 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 91 |
-
first_bs: &bs 256
|
| 92 |
-
fix_bs: false
|
| 93 |
-
divided_factor: [4, 16] # w, h
|
| 94 |
-
is_training: True
|
| 95 |
-
loader:
|
| 96 |
-
shuffle: True
|
| 97 |
-
batch_size_per_card: *bs
|
| 98 |
-
drop_last: True
|
| 99 |
-
max_ratio: &max_ratio 4
|
| 100 |
-
num_workers: 4
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: RatioDataSetTVResize
|
| 105 |
-
ds_width: True
|
| 106 |
-
padding: False
|
| 107 |
-
data_dir_list: [
|
| 108 |
-
'../evaluation/CUTE80',
|
| 109 |
-
'../evaluation/IC13_857',
|
| 110 |
-
'../evaluation/IC15_1811',
|
| 111 |
-
'../evaluation/IIIT5k',
|
| 112 |
-
'../evaluation/SVT',
|
| 113 |
-
'../evaluation/SVTP',
|
| 114 |
-
]
|
| 115 |
-
transforms:
|
| 116 |
-
- DecodeImagePIL: # load image
|
| 117 |
-
img_mode: RGB
|
| 118 |
-
- ABINetLabelEncode:
|
| 119 |
-
ignore_index: *ignore_index
|
| 120 |
-
- KeepKeys:
|
| 121 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 122 |
-
sampler:
|
| 123 |
-
name: RatioSampler
|
| 124 |
-
scales: [[128, 32]] # w, h
|
| 125 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 126 |
-
first_bs: *bs
|
| 127 |
-
fix_bs: false
|
| 128 |
-
divided_factor: [4, 16] # w, h
|
| 129 |
-
is_training: False
|
| 130 |
-
loader:
|
| 131 |
-
shuffle: False
|
| 132 |
-
drop_last: False
|
| 133 |
-
batch_size_per_card: *bs
|
| 134 |
-
max_ratio: *max_ratio
|
| 135 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/busnet/svtrv2_busnet_pretraining.yml
DELETED
|
@@ -1,134 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 10
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_busnet_pretraining/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_busnet_pretraining.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.00065 # 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
|
| 27 |
-
LRScheduler:
|
| 28 |
-
name: OneCycleLR
|
| 29 |
-
warmup_epoch: 1 # pct_start 0.075*20 = 1.5ep
|
| 30 |
-
cycle_momentum: False
|
| 31 |
-
|
| 32 |
-
Architecture:
|
| 33 |
-
model_type: rec
|
| 34 |
-
algorithm: BUSBet
|
| 35 |
-
Transform:
|
| 36 |
-
Encoder:
|
| 37 |
-
name: SVTRv2LNConvTwo33
|
| 38 |
-
use_pos_embed: False
|
| 39 |
-
dims: [128, 256, 384]
|
| 40 |
-
depths: [6, 6, 6]
|
| 41 |
-
num_heads: [4, 8, 12]
|
| 42 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 43 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 44 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 45 |
-
last_stage: false
|
| 46 |
-
feat2d: False
|
| 47 |
-
Decoder:
|
| 48 |
-
name: BUSDecoder
|
| 49 |
-
nhead: 6
|
| 50 |
-
num_layers: 6
|
| 51 |
-
dim_feedforward: 1536
|
| 52 |
-
ignore_index: &ignore_index 100
|
| 53 |
-
pretraining: True
|
| 54 |
-
# return_id: 0
|
| 55 |
-
Loss:
|
| 56 |
-
name: ABINetLoss
|
| 57 |
-
ignore_index: *ignore_index
|
| 58 |
-
|
| 59 |
-
PostProcess:
|
| 60 |
-
name: ABINetLabelDecode
|
| 61 |
-
|
| 62 |
-
Metric:
|
| 63 |
-
name: RecMetric
|
| 64 |
-
main_indicator: acc
|
| 65 |
-
is_filter: True
|
| 66 |
-
|
| 67 |
-
Train:
|
| 68 |
-
dataset:
|
| 69 |
-
name: RatioDataSetTVResize
|
| 70 |
-
ds_width: True
|
| 71 |
-
padding: false
|
| 72 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 75 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 76 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 77 |
-
]
|
| 78 |
-
transforms:
|
| 79 |
-
- DecodeImagePIL: # load image
|
| 80 |
-
img_mode: RGB
|
| 81 |
-
- PARSeqAugPIL:
|
| 82 |
-
- ABINetLabelEncode:
|
| 83 |
-
ignore_index: *ignore_index
|
| 84 |
-
- KeepKeys:
|
| 85 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 86 |
-
sampler:
|
| 87 |
-
name: RatioSampler
|
| 88 |
-
scales: [[128, 32]] # w, h
|
| 89 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 90 |
-
first_bs: &bs 256
|
| 91 |
-
fix_bs: false
|
| 92 |
-
divided_factor: [4, 16] # w, h
|
| 93 |
-
is_training: True
|
| 94 |
-
loader:
|
| 95 |
-
shuffle: True
|
| 96 |
-
batch_size_per_card: *bs
|
| 97 |
-
drop_last: True
|
| 98 |
-
max_ratio: &max_ratio 4
|
| 99 |
-
num_workers: 4
|
| 100 |
-
|
| 101 |
-
Eval:
|
| 102 |
-
dataset:
|
| 103 |
-
name: RatioDataSetTVResize
|
| 104 |
-
ds_width: True
|
| 105 |
-
padding: False
|
| 106 |
-
data_dir_list: [
|
| 107 |
-
'../evaluation/CUTE80',
|
| 108 |
-
'../evaluation/IC13_857',
|
| 109 |
-
'../evaluation/IC15_1811',
|
| 110 |
-
'../evaluation/IIIT5k',
|
| 111 |
-
'../evaluation/SVT',
|
| 112 |
-
'../evaluation/SVTP',
|
| 113 |
-
]
|
| 114 |
-
transforms:
|
| 115 |
-
- DecodeImagePIL: # load image
|
| 116 |
-
img_mode: RGB
|
| 117 |
-
- ABINetLabelEncode:
|
| 118 |
-
ignore_index: *ignore_index
|
| 119 |
-
- KeepKeys:
|
| 120 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 121 |
-
sampler:
|
| 122 |
-
name: RatioSampler
|
| 123 |
-
scales: [[128, 32]] # w, h
|
| 124 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 125 |
-
first_bs: *bs
|
| 126 |
-
fix_bs: false
|
| 127 |
-
divided_factor: [4, 16] # w, h
|
| 128 |
-
is_training: False
|
| 129 |
-
loader:
|
| 130 |
-
shuffle: False
|
| 131 |
-
drop_last: False
|
| 132 |
-
batch_size_per_card: *bs
|
| 133 |
-
max_ratio: *max_ratio
|
| 134 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/busnet/vit_busnet.yml
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 10
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_busnet/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_busnet.txt
|
| 19 |
-
grad_clip_val: 20
|
| 20 |
-
use_amp: True
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.00053 # 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: MultiStepLR
|
| 30 |
-
milestones: [6]
|
| 31 |
-
gamma: 0.1
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: BUSBet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ViT
|
| 39 |
-
img_size: [32,128]
|
| 40 |
-
patch_size: [4, 8]
|
| 41 |
-
embed_dim: 384
|
| 42 |
-
depth: 12
|
| 43 |
-
num_heads: 6
|
| 44 |
-
mlp_ratio: 4
|
| 45 |
-
qkv_bias: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: BUSDecoder
|
| 48 |
-
nhead: 6
|
| 49 |
-
num_layers: 6
|
| 50 |
-
dim_feedforward: 1536
|
| 51 |
-
ignore_index: &ignore_index 100
|
| 52 |
-
pretraining: False
|
| 53 |
-
Loss:
|
| 54 |
-
name: ABINetLoss
|
| 55 |
-
ignore_index: *ignore_index
|
| 56 |
-
|
| 57 |
-
PostProcess:
|
| 58 |
-
name: ABINetLabelDecode
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- ABINetLabelEncode:
|
| 74 |
-
ignore_index: *ignore_index
|
| 75 |
-
- RecTVResize:
|
| 76 |
-
image_shape: [32, 128]
|
| 77 |
-
padding: False
|
| 78 |
-
- KeepKeys:
|
| 79 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 80 |
-
loader:
|
| 81 |
-
shuffle: True
|
| 82 |
-
batch_size_per_card: 256
|
| 83 |
-
drop_last: True
|
| 84 |
-
num_workers: 4
|
| 85 |
-
|
| 86 |
-
Eval:
|
| 87 |
-
dataset:
|
| 88 |
-
name: LMDBDataSet
|
| 89 |
-
data_dir: ../evaluation
|
| 90 |
-
transforms:
|
| 91 |
-
- DecodeImagePIL: # load image
|
| 92 |
-
img_mode: RGB
|
| 93 |
-
- ABINetLabelEncode:
|
| 94 |
-
ignore_index: *ignore_index
|
| 95 |
-
- RecTVResize:
|
| 96 |
-
image_shape: [32, 128]
|
| 97 |
-
padding: False
|
| 98 |
-
- KeepKeys:
|
| 99 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 100 |
-
loader:
|
| 101 |
-
shuffle: False
|
| 102 |
-
drop_last: False
|
| 103 |
-
batch_size_per_card: 256
|
| 104 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/busnet/vit_busnet_pretraining.yml
DELETED
|
@@ -1,104 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 10
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_busnet_pretraining/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_busnet_pretraining.txt
|
| 19 |
-
grad_clip_val: 20
|
| 20 |
-
use_amp: True
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.00053 # 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: MultiStepLR
|
| 30 |
-
milestones: [6]
|
| 31 |
-
gamma: 0.1
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: BUSBet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ViT
|
| 39 |
-
img_size: [32,128]
|
| 40 |
-
patch_size: [4, 8]
|
| 41 |
-
embed_dim: 384
|
| 42 |
-
depth: 12
|
| 43 |
-
num_heads: 6
|
| 44 |
-
mlp_ratio: 4
|
| 45 |
-
qkv_bias: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: BUSDecoder
|
| 48 |
-
nhead: 6
|
| 49 |
-
num_layers: 6
|
| 50 |
-
dim_feedforward: 1536
|
| 51 |
-
ignore_index: &ignore_index 100
|
| 52 |
-
pretraining: True
|
| 53 |
-
Loss:
|
| 54 |
-
name: ABINetLoss
|
| 55 |
-
ignore_index: *ignore_index
|
| 56 |
-
|
| 57 |
-
PostProcess:
|
| 58 |
-
name: ABINetLabelDecode
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- ABINetLabelEncode:
|
| 74 |
-
ignore_index: *ignore_index
|
| 75 |
-
- RecTVResize:
|
| 76 |
-
image_shape: [32, 128]
|
| 77 |
-
padding: False
|
| 78 |
-
- KeepKeys:
|
| 79 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 80 |
-
loader:
|
| 81 |
-
shuffle: True
|
| 82 |
-
batch_size_per_card: 256
|
| 83 |
-
drop_last: True
|
| 84 |
-
num_workers: 4
|
| 85 |
-
|
| 86 |
-
Eval:
|
| 87 |
-
dataset:
|
| 88 |
-
name: LMDBDataSet
|
| 89 |
-
data_dir: ../evaluation
|
| 90 |
-
transforms:
|
| 91 |
-
- DecodeImagePIL: # load image
|
| 92 |
-
img_mode: RGB
|
| 93 |
-
- ABINetLabelEncode:
|
| 94 |
-
ignore_index: *ignore_index
|
| 95 |
-
- RecTVResize:
|
| 96 |
-
image_shape: [32, 128]
|
| 97 |
-
padding: False
|
| 98 |
-
- KeepKeys:
|
| 99 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 100 |
-
loader:
|
| 101 |
-
shuffle: False
|
| 102 |
-
drop_last: False
|
| 103 |
-
batch_size_per_card: 256
|
| 104 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cam/convnextv2_cam_tps_on.yml
DELETED
|
@@ -1,118 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/convnextv2_cam_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_convnextv2_cam_tps_on.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.0008 # for 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
eps: 1.e-8
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 : 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: CAM
|
| 36 |
-
Transform:
|
| 37 |
-
name: Aster_TPS
|
| 38 |
-
tps_inputsize: [32, 64]
|
| 39 |
-
tps_outputsize: &img_shape [32, 128]
|
| 40 |
-
Encoder:
|
| 41 |
-
name: CAMEncoder
|
| 42 |
-
encoder_config:
|
| 43 |
-
name: ConvNeXtV2
|
| 44 |
-
depths: [2, 2, 8, 2]
|
| 45 |
-
dims: [80, 160, 320, 640]
|
| 46 |
-
strides: [[4,4], [2,1], [2,1], [1,1]]
|
| 47 |
-
drop_path_rate: 0.2
|
| 48 |
-
feat2d: True
|
| 49 |
-
nb_classes: 97
|
| 50 |
-
strides: [[4,4], [2,1], [2,1], [1,1]]
|
| 51 |
-
deform_stride: 2
|
| 52 |
-
stage_idx: 2
|
| 53 |
-
use_depthwise_unet: True
|
| 54 |
-
use_more_unet: False
|
| 55 |
-
binary_loss_type: BanlanceMultiClassCrossEntropyLoss
|
| 56 |
-
mid_size: True
|
| 57 |
-
d_embedding: 384
|
| 58 |
-
Decoder:
|
| 59 |
-
name: CAMDecoder
|
| 60 |
-
num_encoder_layers: -1
|
| 61 |
-
beam_size: 0
|
| 62 |
-
num_decoder_layers: 2
|
| 63 |
-
nhead: 8
|
| 64 |
-
max_len: *max_text_length
|
| 65 |
-
|
| 66 |
-
Loss:
|
| 67 |
-
name: CAMLoss
|
| 68 |
-
loss_weight_binary: 1.5
|
| 69 |
-
label_smoothing: 0.
|
| 70 |
-
|
| 71 |
-
Metric:
|
| 72 |
-
name: RecMetric
|
| 73 |
-
main_indicator: acc
|
| 74 |
-
is_filter: True
|
| 75 |
-
|
| 76 |
-
PostProcess:
|
| 77 |
-
name: ARLabelDecode
|
| 78 |
-
|
| 79 |
-
Train:
|
| 80 |
-
dataset:
|
| 81 |
-
name: LMDBDataSet
|
| 82 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 83 |
-
transforms:
|
| 84 |
-
- DecodeImagePIL: # load image
|
| 85 |
-
img_mode: RGB
|
| 86 |
-
- PARSeqAugPIL:
|
| 87 |
-
- CAMLabelEncode: # Class handling label
|
| 88 |
-
font_path: ./arial.ttf
|
| 89 |
-
image_shape: *img_shape
|
| 90 |
-
- RecTVResize:
|
| 91 |
-
image_shape: [64, 256]
|
| 92 |
-
padding: False
|
| 93 |
-
- KeepKeys:
|
| 94 |
-
keep_keys: ['image', 'label', 'length', 'binary_mask'] # dataloader will return list in this order
|
| 95 |
-
loader:
|
| 96 |
-
shuffle: True
|
| 97 |
-
batch_size_per_card: 256
|
| 98 |
-
drop_last: True
|
| 99 |
-
num_workers: 4
|
| 100 |
-
|
| 101 |
-
Eval:
|
| 102 |
-
dataset:
|
| 103 |
-
name: LMDBDataSet
|
| 104 |
-
data_dir: ../evaluation
|
| 105 |
-
transforms:
|
| 106 |
-
- DecodeImagePIL: # load image
|
| 107 |
-
img_mode: RGB
|
| 108 |
-
- ARLabelEncode: # Class handling label
|
| 109 |
-
- RecTVResize:
|
| 110 |
-
image_shape: [64, 256]
|
| 111 |
-
padding: False
|
| 112 |
-
- KeepKeys:
|
| 113 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 114 |
-
loader:
|
| 115 |
-
shuffle: False
|
| 116 |
-
drop_last: False
|
| 117 |
-
batch_size_per_card: 256
|
| 118 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cam/convnextv2_tiny_cam_tps_on.yml
DELETED
|
@@ -1,118 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/convnextv2_tiny_cam_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_convnextv2_cam_tps_on.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.0008 # for 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
eps: 1.e-8
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 : 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: CAM
|
| 36 |
-
Transform:
|
| 37 |
-
name: Aster_TPS
|
| 38 |
-
tps_inputsize: [32, 64]
|
| 39 |
-
tps_outputsize: &img_shape [32, 128]
|
| 40 |
-
Encoder:
|
| 41 |
-
name: CAMEncoder
|
| 42 |
-
encoder_config:
|
| 43 |
-
name: ConvNeXtV2
|
| 44 |
-
depths: [3, 3, 9, 3]
|
| 45 |
-
dims: [96, 192, 384, 768]
|
| 46 |
-
strides: [[4,4], [2,1], [2,1], [1,1]]
|
| 47 |
-
drop_path_rate: 0.2
|
| 48 |
-
feat2d: True
|
| 49 |
-
nb_classes: 97
|
| 50 |
-
strides: [[4,4], [2,1], [2,1], [1,1]]
|
| 51 |
-
deform_stride: 2
|
| 52 |
-
stage_idx: 2
|
| 53 |
-
use_depthwise_unet: True
|
| 54 |
-
use_more_unet: False
|
| 55 |
-
binary_loss_type: BanlanceMultiClassCrossEntropyLoss
|
| 56 |
-
mid_size: False
|
| 57 |
-
d_embedding: 512
|
| 58 |
-
Decoder:
|
| 59 |
-
name: CAMDecoder
|
| 60 |
-
num_encoder_layers: -1
|
| 61 |
-
beam_size: 0
|
| 62 |
-
num_decoder_layers: 2
|
| 63 |
-
nhead: 8
|
| 64 |
-
max_len: *max_text_length
|
| 65 |
-
|
| 66 |
-
Loss:
|
| 67 |
-
name: CAMLoss
|
| 68 |
-
loss_weight_binary: 1.5
|
| 69 |
-
label_smoothing: 0.
|
| 70 |
-
|
| 71 |
-
Metric:
|
| 72 |
-
name: RecMetric
|
| 73 |
-
main_indicator: acc
|
| 74 |
-
is_filter: True
|
| 75 |
-
|
| 76 |
-
PostProcess:
|
| 77 |
-
name: ARLabelDecode
|
| 78 |
-
|
| 79 |
-
Train:
|
| 80 |
-
dataset:
|
| 81 |
-
name: LMDBDataSet
|
| 82 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 83 |
-
transforms:
|
| 84 |
-
- DecodeImagePIL: # load image
|
| 85 |
-
img_mode: RGB
|
| 86 |
-
- PARSeqAugPIL:
|
| 87 |
-
- CAMLabelEncode: # Class handling label
|
| 88 |
-
font_path: ./arial.ttf
|
| 89 |
-
image_shape: *img_shape
|
| 90 |
-
- RecTVResize:
|
| 91 |
-
image_shape: [64, 256]
|
| 92 |
-
padding: False
|
| 93 |
-
- KeepKeys:
|
| 94 |
-
keep_keys: ['image', 'label', 'length', 'binary_mask'] # dataloader will return list in this order
|
| 95 |
-
loader:
|
| 96 |
-
shuffle: True
|
| 97 |
-
batch_size_per_card: 256
|
| 98 |
-
drop_last: True
|
| 99 |
-
num_workers: 4
|
| 100 |
-
|
| 101 |
-
Eval:
|
| 102 |
-
dataset:
|
| 103 |
-
name: LMDBDataSet
|
| 104 |
-
data_dir: ../evaluation
|
| 105 |
-
transforms:
|
| 106 |
-
- DecodeImagePIL: # load image
|
| 107 |
-
img_mode: RGB
|
| 108 |
-
- ARLabelEncode: # Class handling label
|
| 109 |
-
- RecTVResize:
|
| 110 |
-
image_shape: [64, 256]
|
| 111 |
-
padding: False
|
| 112 |
-
- KeepKeys:
|
| 113 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 114 |
-
loader:
|
| 115 |
-
shuffle: False
|
| 116 |
-
drop_last: False
|
| 117 |
-
batch_size_per_card: 256
|
| 118 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cam/svtrv2_cam_tps_on.yml
DELETED
|
@@ -1,123 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_cam_tps_on
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_cam_tps_on.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
|
| 27 |
-
LRScheduler:
|
| 28 |
-
name: OneCycleLR
|
| 29 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 : 1.5ep
|
| 30 |
-
cycle_momentum: False
|
| 31 |
-
|
| 32 |
-
Architecture:
|
| 33 |
-
model_type: rec
|
| 34 |
-
algorithm: CAM
|
| 35 |
-
Transform:
|
| 36 |
-
name: Aster_TPS
|
| 37 |
-
tps_inputsize: [32, 64]
|
| 38 |
-
tps_outputsize: &img_shape [32, 128]
|
| 39 |
-
Encoder:
|
| 40 |
-
name: CAMEncoder
|
| 41 |
-
encoder_config:
|
| 42 |
-
name: SVTRv2LNConvTwo33
|
| 43 |
-
use_pos_embed: False
|
| 44 |
-
dims: [128, 256, 384]
|
| 45 |
-
depths: [6, 6, 6]
|
| 46 |
-
num_heads: [4, 8, 12]
|
| 47 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 48 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 49 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 50 |
-
last_stage: false
|
| 51 |
-
feat2d: True
|
| 52 |
-
nb_classes: 97
|
| 53 |
-
strides: [[4, 4], [1, 1], [2, 1], [1, 1]]
|
| 54 |
-
k_size: [[2, 2], [1, 1], [2, 1], [1, 1]]
|
| 55 |
-
q_size: [4, 32]
|
| 56 |
-
deform_stride: 2
|
| 57 |
-
stage_idx: 2
|
| 58 |
-
use_depthwise_unet: True
|
| 59 |
-
use_more_unet: False
|
| 60 |
-
binary_loss_type: BanlanceMultiClassCrossEntropyLoss
|
| 61 |
-
mid_size: True
|
| 62 |
-
d_embedding: 384
|
| 63 |
-
Decoder:
|
| 64 |
-
name: CAMDecoder
|
| 65 |
-
num_encoder_layers: -1
|
| 66 |
-
beam_size: 0
|
| 67 |
-
num_decoder_layers: 2
|
| 68 |
-
nhead: 8
|
| 69 |
-
max_len: *max_text_length
|
| 70 |
-
|
| 71 |
-
Loss:
|
| 72 |
-
name: CAMLoss
|
| 73 |
-
loss_weight_binary: 1.5
|
| 74 |
-
label_smoothing: 0.
|
| 75 |
-
|
| 76 |
-
Metric:
|
| 77 |
-
name: RecMetric
|
| 78 |
-
main_indicator: acc
|
| 79 |
-
is_filter: True
|
| 80 |
-
|
| 81 |
-
PostProcess:
|
| 82 |
-
name: ARLabelDecode
|
| 83 |
-
|
| 84 |
-
Train:
|
| 85 |
-
dataset:
|
| 86 |
-
name: LMDBDataSet
|
| 87 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 88 |
-
transforms:
|
| 89 |
-
- DecodeImagePIL: # load image
|
| 90 |
-
img_mode: RGB
|
| 91 |
-
- PARSeqAugPIL:
|
| 92 |
-
- CAMLabelEncode: # Class handling label
|
| 93 |
-
font_path: ./arial.ttf
|
| 94 |
-
image_shape: *img_shape
|
| 95 |
-
- RecTVResize:
|
| 96 |
-
image_shape: [64, 256]
|
| 97 |
-
padding: False
|
| 98 |
-
- KeepKeys:
|
| 99 |
-
keep_keys: ['image', 'label', 'length', 'binary_mask'] # dataloader will return list in this order
|
| 100 |
-
loader:
|
| 101 |
-
shuffle: True
|
| 102 |
-
batch_size_per_card: 256
|
| 103 |
-
drop_last: True
|
| 104 |
-
num_workers: 4
|
| 105 |
-
|
| 106 |
-
Eval:
|
| 107 |
-
dataset:
|
| 108 |
-
name: LMDBDataSet
|
| 109 |
-
data_dir: ../evaluation
|
| 110 |
-
transforms:
|
| 111 |
-
- DecodeImagePIL: # load image
|
| 112 |
-
img_mode: RGB
|
| 113 |
-
- ARLabelEncode: # Class handling label
|
| 114 |
-
- RecTVResize:
|
| 115 |
-
image_shape: [64, 256]
|
| 116 |
-
padding: False
|
| 117 |
-
- KeepKeys:
|
| 118 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 119 |
-
loader:
|
| 120 |
-
shuffle: False
|
| 121 |
-
drop_last: False
|
| 122 |
-
batch_size_per_card: 256
|
| 123 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cdistnet/resnet45_trans_cdistnet.yml
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet45_trans_cdistnet
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_resnet45_trans_cdistnet.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 5
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.002 # for 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: CDistNet
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ResNet45
|
| 39 |
-
in_channels: 3
|
| 40 |
-
strides: [2, 1, 2, 1, 1]
|
| 41 |
-
Decoder:
|
| 42 |
-
name: CDistNetDecoder
|
| 43 |
-
add_conv: True
|
| 44 |
-
|
| 45 |
-
Loss:
|
| 46 |
-
name: ARLoss
|
| 47 |
-
|
| 48 |
-
PostProcess:
|
| 49 |
-
name: ARLabelDecode
|
| 50 |
-
|
| 51 |
-
Metric:
|
| 52 |
-
name: RecMetric
|
| 53 |
-
main_indicator: acc
|
| 54 |
-
is_filter: True
|
| 55 |
-
|
| 56 |
-
Train:
|
| 57 |
-
dataset:
|
| 58 |
-
name: LMDBDataSet
|
| 59 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 60 |
-
transforms:
|
| 61 |
-
- DecodeImagePIL: # load image
|
| 62 |
-
img_mode: RGB
|
| 63 |
-
- PARSeqAugPIL:
|
| 64 |
-
- ARLabelEncode: # Class handling label
|
| 65 |
-
- RecTVResize:
|
| 66 |
-
image_shape: [32, 128]
|
| 67 |
-
padding: False
|
| 68 |
-
- KeepKeys:
|
| 69 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 70 |
-
loader:
|
| 71 |
-
shuffle: True
|
| 72 |
-
batch_size_per_card: 256
|
| 73 |
-
drop_last: True
|
| 74 |
-
num_workers: 4
|
| 75 |
-
|
| 76 |
-
Eval:
|
| 77 |
-
dataset:
|
| 78 |
-
name: LMDBDataSet
|
| 79 |
-
data_dir: ../evaluation
|
| 80 |
-
transforms:
|
| 81 |
-
- DecodeImagePIL: # load image
|
| 82 |
-
img_mode: RGB
|
| 83 |
-
- ARLabelEncode: # Class handling label
|
| 84 |
-
- RecTVResize:
|
| 85 |
-
image_shape: [32, 128]
|
| 86 |
-
padding: False
|
| 87 |
-
- KeepKeys:
|
| 88 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 89 |
-
loader:
|
| 90 |
-
shuffle: False
|
| 91 |
-
drop_last: False
|
| 92 |
-
batch_size_per_card: 256
|
| 93 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cdistnet/svtrv2_cdistnet.yml
DELETED
|
@@ -1,139 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_cdistnet/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 16 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 17 |
-
max_text_length: &max_text_length 25
|
| 18 |
-
use_space_char: &use_space_char False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_cdistnet.txt
|
| 20 |
-
use_amp: True
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065 #4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: CDistNet
|
| 36 |
-
in_channels: 3
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: SVTRv2LNConvTwo33
|
| 40 |
-
use_pos_embed: False
|
| 41 |
-
out_channels: 256
|
| 42 |
-
dims: [128, 256, 384]
|
| 43 |
-
depths: [6, 6, 6]
|
| 44 |
-
num_heads: [4, 8, 12]
|
| 45 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 46 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 47 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 48 |
-
last_stage: false
|
| 49 |
-
feat2d: True
|
| 50 |
-
Decoder:
|
| 51 |
-
name: CDistNetDecoder
|
| 52 |
-
add_conv: False
|
| 53 |
-
num_encoder_blocks: 0
|
| 54 |
-
|
| 55 |
-
Loss:
|
| 56 |
-
name: ARLoss
|
| 57 |
-
|
| 58 |
-
PostProcess:
|
| 59 |
-
name: ARLabelDecode
|
| 60 |
-
character_dict_path: *character_dict_path
|
| 61 |
-
use_space_char: *use_space_char
|
| 62 |
-
|
| 63 |
-
Metric:
|
| 64 |
-
name: RecMetric
|
| 65 |
-
main_indicator: acc
|
| 66 |
-
is_filter: True
|
| 67 |
-
|
| 68 |
-
Train:
|
| 69 |
-
dataset:
|
| 70 |
-
name: RatioDataSetTVResize
|
| 71 |
-
ds_width: True
|
| 72 |
-
padding: false
|
| 73 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 75 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 76 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 77 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 78 |
-
]
|
| 79 |
-
transforms:
|
| 80 |
-
- DecodeImagePIL: # load image
|
| 81 |
-
img_mode: RGB
|
| 82 |
-
- PARSeqAugPIL:
|
| 83 |
-
- ARLabelEncode: # Class handling label
|
| 84 |
-
character_dict_path: *character_dict_path
|
| 85 |
-
use_space_char: *use_space_char
|
| 86 |
-
max_text_length: *max_text_length
|
| 87 |
-
- KeepKeys:
|
| 88 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 89 |
-
sampler:
|
| 90 |
-
name: RatioSampler
|
| 91 |
-
scales: [[128, 32]] # w, h
|
| 92 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 93 |
-
first_bs: &bs 256
|
| 94 |
-
fix_bs: false
|
| 95 |
-
divided_factor: [4, 16] # w, h
|
| 96 |
-
is_training: True
|
| 97 |
-
loader:
|
| 98 |
-
shuffle: True
|
| 99 |
-
batch_size_per_card: *bs
|
| 100 |
-
drop_last: True
|
| 101 |
-
max_ratio: &max_ratio 4
|
| 102 |
-
num_workers: 4
|
| 103 |
-
|
| 104 |
-
Eval:
|
| 105 |
-
dataset:
|
| 106 |
-
name: RatioDataSetTVResize
|
| 107 |
-
ds_width: True
|
| 108 |
-
padding: False
|
| 109 |
-
data_dir_list: [
|
| 110 |
-
'../evaluation/CUTE80',
|
| 111 |
-
'../evaluation/IC13_857',
|
| 112 |
-
'../evaluation/IC15_1811',
|
| 113 |
-
'../evaluation/IIIT5k',
|
| 114 |
-
'../evaluation/SVT',
|
| 115 |
-
'../evaluation/SVTP',
|
| 116 |
-
]
|
| 117 |
-
transforms:
|
| 118 |
-
- DecodeImagePIL: # load image
|
| 119 |
-
img_mode: RGB
|
| 120 |
-
- ARLabelEncode: # Class handling label
|
| 121 |
-
character_dict_path: *character_dict_path
|
| 122 |
-
use_space_char: *use_space_char
|
| 123 |
-
max_text_length: *max_text_length
|
| 124 |
-
- KeepKeys:
|
| 125 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 126 |
-
sampler:
|
| 127 |
-
name: RatioSampler
|
| 128 |
-
scales: [[128, 32]] # w, h
|
| 129 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 130 |
-
first_bs: *bs
|
| 131 |
-
fix_bs: false
|
| 132 |
-
divided_factor: [4, 16] # w, h
|
| 133 |
-
is_training: False
|
| 134 |
-
loader:
|
| 135 |
-
shuffle: False
|
| 136 |
-
drop_last: False
|
| 137 |
-
batch_size_per_card: *bs
|
| 138 |
-
max_ratio: *max_ratio
|
| 139 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cppd/svtr_base_cppd.yml
DELETED
|
@@ -1,123 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_cppd/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path
|
| 18 |
-
# ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_cppd.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
|
| 36 |
-
Architecture:
|
| 37 |
-
model_type: rec
|
| 38 |
-
algorithm: CPPD
|
| 39 |
-
in_channels: 3
|
| 40 |
-
Transform:
|
| 41 |
-
Encoder:
|
| 42 |
-
name: SVTRNet
|
| 43 |
-
img_size: [32, 128]
|
| 44 |
-
out_char_num: 25
|
| 45 |
-
out_channels: 256
|
| 46 |
-
patch_merging: 'Conv'
|
| 47 |
-
embed_dim: [128, 256, 384]
|
| 48 |
-
depth: [6, 6, 6]
|
| 49 |
-
num_heads: [4, 8, 12]
|
| 50 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 51 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 52 |
-
last_stage: False
|
| 53 |
-
prenorm: True
|
| 54 |
-
Decoder:
|
| 55 |
-
name: CPPDDecoder
|
| 56 |
-
vis_seq: 64
|
| 57 |
-
num_layer: 2
|
| 58 |
-
pos_len: False
|
| 59 |
-
rec_layer: 1
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
Loss:
|
| 63 |
-
name: CPPDLoss
|
| 64 |
-
ignore_index: 100
|
| 65 |
-
smoothing: True
|
| 66 |
-
pos_len: False
|
| 67 |
-
sideloss_weight: 1.0
|
| 68 |
-
|
| 69 |
-
PostProcess:
|
| 70 |
-
name: CPPDLabelDecode
|
| 71 |
-
character_dict_path: *character_dict_path
|
| 72 |
-
use_space_char: *use_space_char
|
| 73 |
-
|
| 74 |
-
Metric:
|
| 75 |
-
name: RecMetric
|
| 76 |
-
main_indicator: acc
|
| 77 |
-
|
| 78 |
-
Train:
|
| 79 |
-
dataset:
|
| 80 |
-
name: LMDBDataSet
|
| 81 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 82 |
-
transforms:
|
| 83 |
-
- DecodeImagePIL: # load image
|
| 84 |
-
img_mode: RGB
|
| 85 |
-
- PARSeqAugPIL:
|
| 86 |
-
- CPPDLabelEncode: # Class handling label
|
| 87 |
-
pos_len: False
|
| 88 |
-
character_dict_path: *character_dict_path
|
| 89 |
-
use_space_char: *use_space_char
|
| 90 |
-
max_text_length: *max_text_length
|
| 91 |
-
- RecTVResize:
|
| 92 |
-
image_shape: [32, 128]
|
| 93 |
-
padding: False
|
| 94 |
-
- KeepKeys:
|
| 95 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 96 |
-
loader:
|
| 97 |
-
shuffle: True
|
| 98 |
-
batch_size_per_card: 256
|
| 99 |
-
drop_last: True
|
| 100 |
-
num_workers: 4
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: LMDBDataSet
|
| 105 |
-
data_dir: ../evaluation/
|
| 106 |
-
transforms:
|
| 107 |
-
- DecodeImagePIL: # load image
|
| 108 |
-
img_mode: RGB
|
| 109 |
-
- CPPDLabelEncode: # Class handling label
|
| 110 |
-
pos_len: False
|
| 111 |
-
character_dict_path: *character_dict_path
|
| 112 |
-
use_space_char: *use_space_char
|
| 113 |
-
max_text_length: *max_text_length
|
| 114 |
-
- RecTVResize:
|
| 115 |
-
image_shape: [32, 128]
|
| 116 |
-
padding: False
|
| 117 |
-
- KeepKeys:
|
| 118 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 119 |
-
loader:
|
| 120 |
-
shuffle: False
|
| 121 |
-
drop_last: False
|
| 122 |
-
batch_size_per_card: 128
|
| 123 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cppd/svtr_base_cppd_ch.yml
DELETED
|
@@ -1,126 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 100
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/ch/svtr_base_cppd/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 2000]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: False
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/ppocr_keys_v1.txt
|
| 18 |
-
# ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/ch/predicts_svtr_base_cppd.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.0005 # for 4gpus bs128/gpu
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: CosineAnnealingLR
|
| 33 |
-
warmup_epoch: 5
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: CPPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRNet
|
| 42 |
-
img_size: [32, 256]
|
| 43 |
-
patch_merging: 'Conv'
|
| 44 |
-
embed_dim: [128, 256, 384]
|
| 45 |
-
depth: [6, 6, 4]
|
| 46 |
-
num_heads: [4, 8, 12]
|
| 47 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 48 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 49 |
-
last_stage: False
|
| 50 |
-
prenorm: True
|
| 51 |
-
Decoder:
|
| 52 |
-
name: CPPDDecoder
|
| 53 |
-
vis_seq: 128
|
| 54 |
-
num_layer: 3
|
| 55 |
-
pos_len: False
|
| 56 |
-
rec_layer: 1
|
| 57 |
-
ch: True
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
Loss:
|
| 61 |
-
name: CPPDLoss
|
| 62 |
-
ignore_index: 7000
|
| 63 |
-
smoothing: True
|
| 64 |
-
pos_len: False
|
| 65 |
-
sideloss_weight: 1.0
|
| 66 |
-
|
| 67 |
-
PostProcess:
|
| 68 |
-
name: CPPDLabelDecode
|
| 69 |
-
character_dict_path: *character_dict_path
|
| 70 |
-
use_space_char: *use_space_char
|
| 71 |
-
|
| 72 |
-
Metric:
|
| 73 |
-
name: RecMetric
|
| 74 |
-
main_indicator: acc
|
| 75 |
-
|
| 76 |
-
Train:
|
| 77 |
-
dataset:
|
| 78 |
-
name: LMDBDataSet
|
| 79 |
-
data_dir: ../benchmark_bctr/benchmark_bctr_train
|
| 80 |
-
transforms:
|
| 81 |
-
- DecodeImage: # load image
|
| 82 |
-
img_mode: BGR
|
| 83 |
-
channel_first: False
|
| 84 |
-
- CPPDLabelEncode: # Class handling label
|
| 85 |
-
pos_len: False
|
| 86 |
-
ch: True
|
| 87 |
-
ignore_index: 7000
|
| 88 |
-
character_dict_path: *character_dict_path
|
| 89 |
-
use_space_char: *use_space_char
|
| 90 |
-
max_text_length: *max_text_length
|
| 91 |
-
- SVTRResize:
|
| 92 |
-
image_shape: [3, 32, 256]
|
| 93 |
-
padding: True
|
| 94 |
-
- KeepKeys:
|
| 95 |
-
keep_keys: ['image', 'label', 'label_node', 'label_index', 'length'] # dataloader will return list in this order
|
| 96 |
-
loader:
|
| 97 |
-
shuffle: True
|
| 98 |
-
batch_size_per_card: 128
|
| 99 |
-
drop_last: True
|
| 100 |
-
num_workers: 8
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: LMDBDataSet
|
| 105 |
-
data_dir: ../benchmark_bctr/benchmark_bctr_test/scene_test
|
| 106 |
-
transforms:
|
| 107 |
-
- DecodeImage: # load image
|
| 108 |
-
img_mode: BGR
|
| 109 |
-
channel_first: False
|
| 110 |
-
- CPPDLabelEncode: # Class handling label
|
| 111 |
-
pos_len: False
|
| 112 |
-
ch: True
|
| 113 |
-
ignore_index: 7000
|
| 114 |
-
character_dict_path: *character_dict_path
|
| 115 |
-
use_space_char: *use_space_char
|
| 116 |
-
max_text_length: *max_text_length
|
| 117 |
-
- SVTRResize:
|
| 118 |
-
image_shape: [3, 32, 256]
|
| 119 |
-
padding: True
|
| 120 |
-
- KeepKeys:
|
| 121 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 122 |
-
loader:
|
| 123 |
-
shuffle: False
|
| 124 |
-
drop_last: False
|
| 125 |
-
batch_size_per_card: 256
|
| 126 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cppd/svtr_base_cppd_h8.yml
DELETED
|
@@ -1,123 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_h8_cppd/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_cppd.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
|
| 24 |
-
Optimizer:
|
| 25 |
-
name: AdamW
|
| 26 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 27 |
-
weight_decay: 0.05
|
| 28 |
-
filter_bias_and_bn: True
|
| 29 |
-
|
| 30 |
-
LRScheduler:
|
| 31 |
-
name: OneCycleLR
|
| 32 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 33 |
-
cycle_momentum: False
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: CPPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRNet
|
| 42 |
-
img_size: [32, 128]
|
| 43 |
-
out_char_num: 25
|
| 44 |
-
out_channels: 256
|
| 45 |
-
patch_merging: 'Conv'
|
| 46 |
-
embed_dim: [128, 256, 384]
|
| 47 |
-
depth: [6, 6, 6]
|
| 48 |
-
num_heads: [4, 8, 12]
|
| 49 |
-
sub_k: [[1, 1], [2, 1]]
|
| 50 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 51 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 52 |
-
last_stage: False
|
| 53 |
-
prenorm: True
|
| 54 |
-
Decoder:
|
| 55 |
-
name: CPPDDecoder
|
| 56 |
-
vis_seq: 128
|
| 57 |
-
num_layer: 2
|
| 58 |
-
pos_len: False
|
| 59 |
-
rec_layer: 1
|
| 60 |
-
|
| 61 |
-
Loss:
|
| 62 |
-
name: CPPDLoss
|
| 63 |
-
ignore_index: 100
|
| 64 |
-
smoothing: True
|
| 65 |
-
pos_len: False
|
| 66 |
-
sideloss_weight: 1.0
|
| 67 |
-
|
| 68 |
-
PostProcess:
|
| 69 |
-
name: CPPDLabelDecode
|
| 70 |
-
character_dict_path: *character_dict_path
|
| 71 |
-
use_space_char: *use_space_char
|
| 72 |
-
|
| 73 |
-
Metric:
|
| 74 |
-
name: RecMetric
|
| 75 |
-
main_indicator: acc
|
| 76 |
-
is_filter: True
|
| 77 |
-
|
| 78 |
-
Train:
|
| 79 |
-
dataset:
|
| 80 |
-
name: LMDBDataSet
|
| 81 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 82 |
-
transforms:
|
| 83 |
-
- DecodeImagePIL: # load image
|
| 84 |
-
img_mode: RGB
|
| 85 |
-
- PARSeqAugPIL:
|
| 86 |
-
- CPPDLabelEncode: # Class handling label
|
| 87 |
-
pos_len: False
|
| 88 |
-
character_dict_path: *character_dict_path
|
| 89 |
-
use_space_char: *use_space_char
|
| 90 |
-
max_text_length: *max_text_length
|
| 91 |
-
- RecTVResize:
|
| 92 |
-
image_shape: [32, 128]
|
| 93 |
-
padding: False
|
| 94 |
-
- KeepKeys:
|
| 95 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 96 |
-
loader:
|
| 97 |
-
shuffle: True
|
| 98 |
-
batch_size_per_card: 256
|
| 99 |
-
drop_last: True
|
| 100 |
-
num_workers: 4
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: LMDBDataSet
|
| 105 |
-
data_dir: ../evaluation/
|
| 106 |
-
transforms:
|
| 107 |
-
- DecodeImagePIL: # load image
|
| 108 |
-
img_mode: RGB
|
| 109 |
-
- CPPDLabelEncode: # Class handling label
|
| 110 |
-
pos_len: False
|
| 111 |
-
character_dict_path: *character_dict_path
|
| 112 |
-
use_space_char: *use_space_char
|
| 113 |
-
max_text_length: *max_text_length
|
| 114 |
-
- RecTVResize:
|
| 115 |
-
image_shape: [32, 128]
|
| 116 |
-
padding: False
|
| 117 |
-
- KeepKeys:
|
| 118 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 119 |
-
loader:
|
| 120 |
-
shuffle: False
|
| 121 |
-
drop_last: False
|
| 122 |
-
batch_size_per_card: 128
|
| 123 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cppd/svtr_base_cppd_syn.yml
DELETED
|
@@ -1,124 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 60
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/syn/svtr_base_cppd/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path
|
| 18 |
-
# ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/syn/predicts_svtr_base_cppd.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.0005 # for 4gpus bs256/gpu
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: CosineAnnealingLR
|
| 33 |
-
warmup_epoch: 6
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: CPPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRNet
|
| 42 |
-
img_size: [32, 100]
|
| 43 |
-
out_char_num: 25
|
| 44 |
-
out_channels: 256
|
| 45 |
-
patch_merging: 'Conv'
|
| 46 |
-
embed_dim: [128, 256, 384]
|
| 47 |
-
depth: [6, 6, 4]
|
| 48 |
-
num_heads: [4, 8, 12]
|
| 49 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 50 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 51 |
-
last_stage: False
|
| 52 |
-
prenorm: True
|
| 53 |
-
Decoder:
|
| 54 |
-
name: CPPDDecoder
|
| 55 |
-
vis_seq: 50
|
| 56 |
-
num_layer: 3
|
| 57 |
-
pos_len: False
|
| 58 |
-
rec_layer: 1
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
Loss:
|
| 62 |
-
name: CPPDLoss
|
| 63 |
-
ignore_index: 100
|
| 64 |
-
smoothing: True
|
| 65 |
-
pos_len: False
|
| 66 |
-
sideloss_weight: 1.0
|
| 67 |
-
|
| 68 |
-
PostProcess:
|
| 69 |
-
name: CPPDLabelDecode
|
| 70 |
-
character_dict_path: *character_dict_path
|
| 71 |
-
use_space_char: *use_space_char
|
| 72 |
-
|
| 73 |
-
Metric:
|
| 74 |
-
name: RecMetric
|
| 75 |
-
main_indicator: acc
|
| 76 |
-
|
| 77 |
-
Train:
|
| 78 |
-
dataset:
|
| 79 |
-
name: STRLMDBDataSet
|
| 80 |
-
data_dir: ./
|
| 81 |
-
transforms:
|
| 82 |
-
- DecodeImage: # load image
|
| 83 |
-
img_mode: BGR
|
| 84 |
-
channel_first: False
|
| 85 |
-
# - SVTRRAug:
|
| 86 |
-
- CPPDLabelEncode: # Class handling label
|
| 87 |
-
pos_len: False
|
| 88 |
-
character_dict_path: *character_dict_path
|
| 89 |
-
use_space_char: *use_space_char
|
| 90 |
-
max_text_length: *max_text_length
|
| 91 |
-
- SVTRResize:
|
| 92 |
-
image_shape: [3, 32, 100]
|
| 93 |
-
padding: False
|
| 94 |
-
- KeepKeys:
|
| 95 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 96 |
-
loader:
|
| 97 |
-
shuffle: True
|
| 98 |
-
batch_size_per_card: 256
|
| 99 |
-
drop_last: True
|
| 100 |
-
num_workers: 8
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: LMDBDataSet
|
| 105 |
-
data_dir: ../evaluation/
|
| 106 |
-
transforms:
|
| 107 |
-
- DecodeImage: # load image
|
| 108 |
-
img_mode: BGR
|
| 109 |
-
channel_first: False
|
| 110 |
-
- CPPDLabelEncode: # Class handling label
|
| 111 |
-
pos_len: False
|
| 112 |
-
character_dict_path: *character_dict_path
|
| 113 |
-
use_space_char: *use_space_char
|
| 114 |
-
max_text_length: *max_text_length
|
| 115 |
-
- SVTRResize:
|
| 116 |
-
image_shape: [3, 32, 100]
|
| 117 |
-
padding: False
|
| 118 |
-
- KeepKeys:
|
| 119 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 120 |
-
loader:
|
| 121 |
-
shuffle: False
|
| 122 |
-
drop_last: False
|
| 123 |
-
batch_size_per_card: 256
|
| 124 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/cppd/svtrv2_cppd.yml
DELETED
|
@@ -1,150 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_cppd/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_cppd.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
|
| 24 |
-
Optimizer:
|
| 25 |
-
name: AdamW
|
| 26 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 27 |
-
weight_decay: 0.05
|
| 28 |
-
filter_bias_and_bn: True
|
| 29 |
-
|
| 30 |
-
LRScheduler:
|
| 31 |
-
name: OneCycleLR
|
| 32 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 33 |
-
cycle_momentum: False
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: CPPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRv2LNConvTwo33
|
| 42 |
-
use_pos_embed: False
|
| 43 |
-
out_channels: 256
|
| 44 |
-
dims: [128, 256, 384]
|
| 45 |
-
depths: [6, 6, 6]
|
| 46 |
-
num_heads: [4, 8, 12]
|
| 47 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 48 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 49 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 50 |
-
last_stage: false
|
| 51 |
-
feat2d: False
|
| 52 |
-
Decoder:
|
| 53 |
-
name: CPPDDecoder
|
| 54 |
-
ds: True
|
| 55 |
-
num_layer: 2
|
| 56 |
-
pos_len: False
|
| 57 |
-
rec_layer: 1
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
Loss:
|
| 61 |
-
name: CPPDLoss
|
| 62 |
-
ignore_index: 100
|
| 63 |
-
smoothing: True
|
| 64 |
-
pos_len: False
|
| 65 |
-
sideloss_weight: 1.0
|
| 66 |
-
|
| 67 |
-
PostProcess:
|
| 68 |
-
name: CPPDLabelDecode
|
| 69 |
-
character_dict_path: *character_dict_path
|
| 70 |
-
use_space_char: *use_space_char
|
| 71 |
-
|
| 72 |
-
Metric:
|
| 73 |
-
name: RecMetric
|
| 74 |
-
main_indicator: acc
|
| 75 |
-
is_filter: True
|
| 76 |
-
|
| 77 |
-
Train:
|
| 78 |
-
dataset:
|
| 79 |
-
name: RatioDataSetTVResize
|
| 80 |
-
ds_width: True
|
| 81 |
-
padding: false
|
| 82 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 83 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 84 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 85 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 86 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 87 |
-
]
|
| 88 |
-
transforms:
|
| 89 |
-
- DecodeImagePIL: # load image
|
| 90 |
-
img_mode: RGB
|
| 91 |
-
- PARSeqAugPIL:
|
| 92 |
-
- CPPDLabelEncode: # Class handling label
|
| 93 |
-
pos_len: False
|
| 94 |
-
character_dict_path: *character_dict_path
|
| 95 |
-
use_space_char: *use_space_char
|
| 96 |
-
max_text_length: *max_text_length
|
| 97 |
-
- KeepKeys:
|
| 98 |
-
keep_keys: ['image', 'label', 'label_node', 'length'] # dataloader will return list in this order
|
| 99 |
-
sampler:
|
| 100 |
-
name: RatioSampler
|
| 101 |
-
scales: [[128, 32]] # w, h
|
| 102 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 103 |
-
first_bs: &bs 256
|
| 104 |
-
fix_bs: false
|
| 105 |
-
divided_factor: [4, 16] # w, h
|
| 106 |
-
is_training: True
|
| 107 |
-
loader:
|
| 108 |
-
shuffle: True
|
| 109 |
-
batch_size_per_card: *bs
|
| 110 |
-
drop_last: True
|
| 111 |
-
max_ratio: &max_ratio 4
|
| 112 |
-
num_workers: 4
|
| 113 |
-
|
| 114 |
-
Eval:
|
| 115 |
-
dataset:
|
| 116 |
-
name: RatioDataSetTVResize
|
| 117 |
-
ds_width: True
|
| 118 |
-
padding: False
|
| 119 |
-
data_dir_list: [
|
| 120 |
-
'../evaluation/CUTE80',
|
| 121 |
-
'../evaluation/IC13_857',
|
| 122 |
-
'../evaluation/IC15_1811',
|
| 123 |
-
'../evaluation/IIIT5k',
|
| 124 |
-
'../evaluation/SVT',
|
| 125 |
-
'../evaluation/SVTP',
|
| 126 |
-
]
|
| 127 |
-
transforms:
|
| 128 |
-
- DecodeImagePIL: # load image
|
| 129 |
-
img_mode: RGB
|
| 130 |
-
- CPPDLabelEncode: # Class handling label
|
| 131 |
-
pos_len: False
|
| 132 |
-
character_dict_path: *character_dict_path
|
| 133 |
-
use_space_char: *use_space_char
|
| 134 |
-
max_text_length: *max_text_length
|
| 135 |
-
- KeepKeys:
|
| 136 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 137 |
-
sampler:
|
| 138 |
-
name: RatioSampler
|
| 139 |
-
scales: [[128, 32]] # w, h
|
| 140 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 141 |
-
first_bs: *bs
|
| 142 |
-
fix_bs: false
|
| 143 |
-
divided_factor: [4, 16] # w, h
|
| 144 |
-
is_training: False
|
| 145 |
-
loader:
|
| 146 |
-
shuffle: False
|
| 147 |
-
drop_last: False
|
| 148 |
-
batch_size_per_card: *bs
|
| 149 |
-
max_ratio: *max_ratio
|
| 150 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/dan/resnet45_fpn_dan.yml
DELETED
|
@@ -1,98 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet45_fpn_dan/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_resnet45_fpn_dan.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.0
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: DAN
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ResNet45
|
| 39 |
-
in_channels: 3
|
| 40 |
-
strides: [2, 1, 2, 1, 1]
|
| 41 |
-
return_list: True
|
| 42 |
-
Decoder:
|
| 43 |
-
name: DANDecoder
|
| 44 |
-
max_len: 25
|
| 45 |
-
channels_list: [64, 128, 256, 512]
|
| 46 |
-
strides_list: [[2, 2], [1, 1], [1, 1]]
|
| 47 |
-
in_shape: [8, 32]
|
| 48 |
-
depth: 4
|
| 49 |
-
|
| 50 |
-
Loss:
|
| 51 |
-
name: ARLoss
|
| 52 |
-
|
| 53 |
-
PostProcess:
|
| 54 |
-
name: ARLabelDecode
|
| 55 |
-
|
| 56 |
-
Metric:
|
| 57 |
-
name: RecMetric
|
| 58 |
-
main_indicator: acc
|
| 59 |
-
is_filter: True
|
| 60 |
-
|
| 61 |
-
Train:
|
| 62 |
-
dataset:
|
| 63 |
-
name: LMDBDataSet
|
| 64 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 65 |
-
transforms:
|
| 66 |
-
- DecodeImagePIL: # load image
|
| 67 |
-
img_mode: RGB
|
| 68 |
-
- PARSeqAugPIL:
|
| 69 |
-
- ARLabelEncode:
|
| 70 |
-
- RecTVResize:
|
| 71 |
-
image_shape: [32, 128]
|
| 72 |
-
padding: False
|
| 73 |
-
- KeepKeys:
|
| 74 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 75 |
-
loader:
|
| 76 |
-
shuffle: True
|
| 77 |
-
batch_size_per_card: 256
|
| 78 |
-
drop_last: True
|
| 79 |
-
num_workers: 4
|
| 80 |
-
|
| 81 |
-
Eval:
|
| 82 |
-
dataset:
|
| 83 |
-
name: LMDBDataSet
|
| 84 |
-
data_dir: ../evaluation
|
| 85 |
-
transforms:
|
| 86 |
-
- DecodeImagePIL: # load image
|
| 87 |
-
img_mode: RGB
|
| 88 |
-
- ARLabelEncode:
|
| 89 |
-
- RecTVResize:
|
| 90 |
-
image_shape: [32, 128]
|
| 91 |
-
padding: False
|
| 92 |
-
- KeepKeys:
|
| 93 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 94 |
-
loader:
|
| 95 |
-
shuffle: False
|
| 96 |
-
drop_last: False
|
| 97 |
-
batch_size_per_card: 256
|
| 98 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/dan/svtrv2_dan.yml
DELETED
|
@@ -1,130 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_dan
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_dan.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065 # 4gpus 256bs/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: DAN
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: SVTRv2LNConvTwo33
|
| 39 |
-
use_pos_embed: False
|
| 40 |
-
out_channels: 256
|
| 41 |
-
dims: [128, 256, 384]
|
| 42 |
-
depths: [6, 6, 6]
|
| 43 |
-
num_heads: [4, 8, 12]
|
| 44 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 45 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 46 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 47 |
-
last_stage: false
|
| 48 |
-
feat2d: True
|
| 49 |
-
Decoder:
|
| 50 |
-
name: DANDecoder
|
| 51 |
-
use_cam: False
|
| 52 |
-
max_len: 25
|
| 53 |
-
|
| 54 |
-
Loss:
|
| 55 |
-
name: ARLoss
|
| 56 |
-
|
| 57 |
-
PostProcess:
|
| 58 |
-
name: ARLabelDecode
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: RatioDataSetTVResize
|
| 68 |
-
ds_width: True
|
| 69 |
-
padding: false
|
| 70 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_filter_train_challenging',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_hard',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_medium',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_normal',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_easy',
|
| 75 |
-
]
|
| 76 |
-
transforms:
|
| 77 |
-
- DecodeImagePIL: # load image
|
| 78 |
-
img_mode: RGB
|
| 79 |
-
- PARSeqAugPIL:
|
| 80 |
-
- ARLabelEncode:
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 83 |
-
sampler:
|
| 84 |
-
name: RatioSampler
|
| 85 |
-
scales: [[128, 32]] # w, h
|
| 86 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 87 |
-
first_bs: &bs 256
|
| 88 |
-
fix_bs: false
|
| 89 |
-
divided_factor: [4, 16] # w, h
|
| 90 |
-
is_training: True
|
| 91 |
-
loader:
|
| 92 |
-
shuffle: True
|
| 93 |
-
batch_size_per_card: *bs
|
| 94 |
-
drop_last: True
|
| 95 |
-
max_ratio: &max_ratio 4
|
| 96 |
-
num_workers: 4
|
| 97 |
-
|
| 98 |
-
Eval:
|
| 99 |
-
dataset:
|
| 100 |
-
name: RatioDataSetTVResize
|
| 101 |
-
ds_width: True
|
| 102 |
-
padding: False
|
| 103 |
-
data_dir_list: [
|
| 104 |
-
'../evaluation/CUTE80',
|
| 105 |
-
'../evaluation/IC13_857',
|
| 106 |
-
'../evaluation/IC15_1811',
|
| 107 |
-
'../evaluation/IIIT5k',
|
| 108 |
-
'../evaluation/SVT',
|
| 109 |
-
'../evaluation/SVTP',
|
| 110 |
-
]
|
| 111 |
-
transforms:
|
| 112 |
-
- DecodeImagePIL: # load image
|
| 113 |
-
img_mode: RGB
|
| 114 |
-
- ARLabelEncode:
|
| 115 |
-
- KeepKeys:
|
| 116 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 117 |
-
sampler:
|
| 118 |
-
name: RatioSampler
|
| 119 |
-
scales: [[128, 32]] # w, h
|
| 120 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 121 |
-
first_bs: *bs
|
| 122 |
-
fix_bs: false
|
| 123 |
-
divided_factor: [4, 16] # w, h
|
| 124 |
-
is_training: False
|
| 125 |
-
loader:
|
| 126 |
-
shuffle: False
|
| 127 |
-
drop_last: False
|
| 128 |
-
batch_size_per_card: *bs
|
| 129 |
-
max_ratio: *max_ratio
|
| 130 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/focalsvtr/focalsvtr_ctc.yml
DELETED
|
@@ -1,137 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/focalsvtr_ctc/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path
|
| 16 |
-
# ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: &max_text_length 25
|
| 18 |
-
use_space_char: &use_space_char False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_focalsvtr_ctc.txt
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
|
| 31 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 32 |
-
cycle_momentum: False
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: SVTR
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: FocalSVTR
|
| 40 |
-
img_size: [32, 128]
|
| 41 |
-
depths: [6, 6, 6]
|
| 42 |
-
embed_dim: 96
|
| 43 |
-
sub_k: [[1, 1], [2, 1], [1, 1]]
|
| 44 |
-
focal_levels: [3, 3, 3]
|
| 45 |
-
out_channels: 256
|
| 46 |
-
last_stage: True
|
| 47 |
-
Decoder:
|
| 48 |
-
name: CTCDecoder
|
| 49 |
-
|
| 50 |
-
Loss:
|
| 51 |
-
name: CTCLoss
|
| 52 |
-
zero_infinity: True
|
| 53 |
-
|
| 54 |
-
PostProcess:
|
| 55 |
-
name: CTCLabelDecode
|
| 56 |
-
character_dict_path: *character_dict_path
|
| 57 |
-
use_space_char: *use_space_char
|
| 58 |
-
|
| 59 |
-
Metric:
|
| 60 |
-
name: RecMetric
|
| 61 |
-
main_indicator: acc
|
| 62 |
-
is_filter: True
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: RatioDataSet
|
| 68 |
-
ds_width: True
|
| 69 |
-
padding: &padding False
|
| 70 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 75 |
-
]
|
| 76 |
-
transforms:
|
| 77 |
-
- DecodeImage: # load image
|
| 78 |
-
img_mode: BGR
|
| 79 |
-
channel_first: False
|
| 80 |
-
- PARSeqAug:
|
| 81 |
-
- CTCLabelEncode: # Class handling label
|
| 82 |
-
character_dict_path: *character_dict_path
|
| 83 |
-
use_space_char: *use_space_char
|
| 84 |
-
max_text_length: *max_text_length
|
| 85 |
-
- KeepKeys:
|
| 86 |
-
keep_keys: ['image', 'label', 'length']
|
| 87 |
-
sampler:
|
| 88 |
-
name: RatioSampler
|
| 89 |
-
scales: [[128, 32]] # w, h
|
| 90 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 91 |
-
first_bs: &bs 256
|
| 92 |
-
fix_bs: false
|
| 93 |
-
divided_factor: [4, 16] # w, h
|
| 94 |
-
is_training: True
|
| 95 |
-
loader:
|
| 96 |
-
shuffle: True
|
| 97 |
-
batch_size_per_card: *bs
|
| 98 |
-
drop_last: True
|
| 99 |
-
max_ratio: 12
|
| 100 |
-
num_workers: 4
|
| 101 |
-
|
| 102 |
-
Eval:
|
| 103 |
-
dataset:
|
| 104 |
-
name: RatioDataSet
|
| 105 |
-
ds_width: True
|
| 106 |
-
padding: True
|
| 107 |
-
data_dir_list: ['../evaluation/CUTE80',
|
| 108 |
-
'../evaluation/IC13_857',
|
| 109 |
-
'../evaluation/IC15_1811',
|
| 110 |
-
'../evaluation/IIIT5k',
|
| 111 |
-
'../evaluation/SVT',
|
| 112 |
-
'../evaluation/SVTP',
|
| 113 |
-
]
|
| 114 |
-
transforms:
|
| 115 |
-
- DecodeImage: # load image
|
| 116 |
-
img_mode: BGR
|
| 117 |
-
channel_first: False
|
| 118 |
-
- CTCLabelEncode: # Class handling label
|
| 119 |
-
character_dict_path: *character_dict_path
|
| 120 |
-
use_space_char: *use_space_char
|
| 121 |
-
max_text_length: *max_text_length
|
| 122 |
-
- KeepKeys:
|
| 123 |
-
keep_keys: ['image', 'label', 'length']
|
| 124 |
-
sampler:
|
| 125 |
-
name: RatioSampler
|
| 126 |
-
scales: [[128, 32]] # w, h
|
| 127 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 128 |
-
first_bs: 128
|
| 129 |
-
fix_bs: false
|
| 130 |
-
divided_factor: [4, 16] # w, h
|
| 131 |
-
is_training: False
|
| 132 |
-
loader:
|
| 133 |
-
shuffle: False
|
| 134 |
-
drop_last: False
|
| 135 |
-
batch_size_per_card: 128
|
| 136 |
-
max_ratio: 12
|
| 137 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/gtc/svtrv2_lnconv_nrtr_gtc.yml
DELETED
|
@@ -1,168 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/svtrv2_lnconv_nrtr_gtc
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img: ../ltb/img
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/predicts_smtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
distributed: true
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.00065
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
|
| 36 |
-
Architecture:
|
| 37 |
-
model_type: rec
|
| 38 |
-
algorithm: BGPD
|
| 39 |
-
in_channels: 3
|
| 40 |
-
Transform:
|
| 41 |
-
Encoder:
|
| 42 |
-
name: SVTRv2LNConvTwo33
|
| 43 |
-
use_pos_embed: False
|
| 44 |
-
out_channels: 256
|
| 45 |
-
dims: [128, 256, 384]
|
| 46 |
-
depths: [6, 6, 6]
|
| 47 |
-
num_heads: [4, 8, 12]
|
| 48 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 49 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 50 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 51 |
-
last_stage: false
|
| 52 |
-
feat2d: True
|
| 53 |
-
Decoder:
|
| 54 |
-
name: GTCDecoder
|
| 55 |
-
infer_gtc: True
|
| 56 |
-
detach: False
|
| 57 |
-
gtc_decoder:
|
| 58 |
-
name: NRTRDecoder
|
| 59 |
-
num_encoder_layers: -1
|
| 60 |
-
beam_size: 0
|
| 61 |
-
num_decoder_layers: 2
|
| 62 |
-
nhead: 12
|
| 63 |
-
max_len: *max_text_length
|
| 64 |
-
ctc_decoder:
|
| 65 |
-
name: RCTCDecoder
|
| 66 |
-
|
| 67 |
-
Loss:
|
| 68 |
-
name: GTCLoss
|
| 69 |
-
gtc_loss:
|
| 70 |
-
name: ARLoss
|
| 71 |
-
|
| 72 |
-
PostProcess:
|
| 73 |
-
name: GTCLabelDecode
|
| 74 |
-
gtc_label_decode:
|
| 75 |
-
name: ARLabelDecode
|
| 76 |
-
character_dict_path: *character_dict_path
|
| 77 |
-
use_space_char: *use_space_char
|
| 78 |
-
|
| 79 |
-
Metric:
|
| 80 |
-
name: RecGTCMetric
|
| 81 |
-
main_indicator: acc
|
| 82 |
-
is_filter: True
|
| 83 |
-
|
| 84 |
-
Train:
|
| 85 |
-
dataset:
|
| 86 |
-
name: RatioDataSet
|
| 87 |
-
ds_width: True
|
| 88 |
-
# max_ratio: &max_ratio 4
|
| 89 |
-
# min_ratio: 1
|
| 90 |
-
# base_shape: &base_shape [[64, 64], [96, 48], [112, 40], [128, 32]]
|
| 91 |
-
# base_h: &base_h 32
|
| 92 |
-
# padding: &padding False
|
| 93 |
-
padding: false
|
| 94 |
-
# padding_rand: true
|
| 95 |
-
# padding_doub: true
|
| 96 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 97 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 98 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 99 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 100 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 101 |
-
]
|
| 102 |
-
transforms:
|
| 103 |
-
- DecodeImage: # load image
|
| 104 |
-
img_mode: BGR
|
| 105 |
-
channel_first: False
|
| 106 |
-
- PARSeqAug:
|
| 107 |
-
- GTCLabelEncode: # Class handling label
|
| 108 |
-
gtc_label_encode:
|
| 109 |
-
name: ARLabelEncode
|
| 110 |
-
character_dict_path: *character_dict_path
|
| 111 |
-
use_space_char: *use_space_char
|
| 112 |
-
max_text_length: *max_text_length
|
| 113 |
-
- KeepKeys:
|
| 114 |
-
keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
|
| 115 |
-
sampler:
|
| 116 |
-
name: RatioSampler
|
| 117 |
-
scales: [[128, 32]] # w, h
|
| 118 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 119 |
-
first_bs: &bs 256
|
| 120 |
-
fix_bs: false
|
| 121 |
-
divided_factor: [4, 16] # w, h
|
| 122 |
-
is_training: True
|
| 123 |
-
loader:
|
| 124 |
-
shuffle: True
|
| 125 |
-
batch_size_per_card: *bs
|
| 126 |
-
drop_last: True
|
| 127 |
-
max_ratio: &max_ratio 4
|
| 128 |
-
num_workers: 4
|
| 129 |
-
|
| 130 |
-
Eval:
|
| 131 |
-
dataset:
|
| 132 |
-
name: RatioDataSet
|
| 133 |
-
ds_width: True
|
| 134 |
-
padding: False
|
| 135 |
-
data_dir_list: [
|
| 136 |
-
'../evaluation/CUTE80',
|
| 137 |
-
'../evaluation/IC13_857',
|
| 138 |
-
'../evaluation/IC15_1811',
|
| 139 |
-
'../evaluation/IIIT5k',
|
| 140 |
-
'../evaluation/SVT',
|
| 141 |
-
'../evaluation/SVTP',
|
| 142 |
-
]
|
| 143 |
-
transforms:
|
| 144 |
-
- DecodeImage: # load image
|
| 145 |
-
img_mode: BGR
|
| 146 |
-
channel_first: False
|
| 147 |
-
- GTCLabelEncode: # Class handling label
|
| 148 |
-
gtc_label_encode:
|
| 149 |
-
name: ARLabelEncode
|
| 150 |
-
character_dict_path: *character_dict_path
|
| 151 |
-
use_space_char: *use_space_char
|
| 152 |
-
max_text_length: *max_text_length
|
| 153 |
-
- KeepKeys:
|
| 154 |
-
keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
|
| 155 |
-
sampler:
|
| 156 |
-
name: RatioSampler
|
| 157 |
-
scales: [[128, 32]] # w, h
|
| 158 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 159 |
-
first_bs: *bs
|
| 160 |
-
fix_bs: false
|
| 161 |
-
divided_factor: [4, 16] # w, h
|
| 162 |
-
is_training: False
|
| 163 |
-
loader:
|
| 164 |
-
shuffle: False
|
| 165 |
-
drop_last: False
|
| 166 |
-
batch_size_per_card: *bs
|
| 167 |
-
max_ratio: *max_ratio
|
| 168 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/gtc/svtrv2_lnconv_smtr_gtc_long_infer.yml
DELETED
|
@@ -1,151 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/svtrv2_lnconv_smtr_gtc_long_infer
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 1000]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img: ../ltb/img
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/predicts_smtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
distributed: true
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.000325
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
|
| 36 |
-
Architecture:
|
| 37 |
-
model_type: rec
|
| 38 |
-
algorithm: BGPD
|
| 39 |
-
in_channels: 3
|
| 40 |
-
Transform:
|
| 41 |
-
Encoder:
|
| 42 |
-
name: SVTRv2LNConvTwo33
|
| 43 |
-
use_pos_embed: False
|
| 44 |
-
out_channels: 256
|
| 45 |
-
dims: [128, 256, 384]
|
| 46 |
-
depths: [6, 6, 6]
|
| 47 |
-
num_heads: [4, 8, 12]
|
| 48 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 49 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 50 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 51 |
-
last_stage: false
|
| 52 |
-
feat2d: True
|
| 53 |
-
Decoder:
|
| 54 |
-
name: GTCDecoder
|
| 55 |
-
infer_gtc: False
|
| 56 |
-
detach: False
|
| 57 |
-
gtc_decoder:
|
| 58 |
-
name: SMTRDecoder
|
| 59 |
-
num_layer: 1
|
| 60 |
-
ds: True
|
| 61 |
-
max_len: *max_text_length
|
| 62 |
-
next_mode: &next True
|
| 63 |
-
sub_str_len: &subsl 5
|
| 64 |
-
ctc_decoder:
|
| 65 |
-
name: RCTCDecoder
|
| 66 |
-
|
| 67 |
-
Loss:
|
| 68 |
-
name: CTCLoss
|
| 69 |
-
|
| 70 |
-
PostProcess:
|
| 71 |
-
name: CTCLabelDecode
|
| 72 |
-
character_dict_path: *character_dict_path
|
| 73 |
-
use_space_char: *use_space_char
|
| 74 |
-
|
| 75 |
-
Metric:
|
| 76 |
-
name: RecMetric
|
| 77 |
-
main_indicator: acc
|
| 78 |
-
is_filter: True
|
| 79 |
-
|
| 80 |
-
Train:
|
| 81 |
-
dataset:
|
| 82 |
-
name: RatioDataSetTVResize
|
| 83 |
-
ds_width: True
|
| 84 |
-
padding: false
|
| 85 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 86 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 87 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 88 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 89 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 90 |
-
]
|
| 91 |
-
transforms:
|
| 92 |
-
- DecodeImagePIL: # load image
|
| 93 |
-
img_mode: RGB
|
| 94 |
-
- PARSeqAugPIL:
|
| 95 |
-
- CTCLabelEncode: # Class handling label
|
| 96 |
-
character_dict_path: *character_dict_path
|
| 97 |
-
use_space_char: *use_space_char
|
| 98 |
-
max_text_length: *max_text_length
|
| 99 |
-
- KeepKeys:
|
| 100 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 101 |
-
sampler:
|
| 102 |
-
name: RatioSampler
|
| 103 |
-
scales: [[128, 32]] # w, h
|
| 104 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 105 |
-
first_bs: &bs 128
|
| 106 |
-
fix_bs: false
|
| 107 |
-
divided_factor: [4, 16] # w, h
|
| 108 |
-
is_training: True
|
| 109 |
-
loader:
|
| 110 |
-
shuffle: True
|
| 111 |
-
batch_size_per_card: *bs
|
| 112 |
-
drop_last: True
|
| 113 |
-
max_ratio: &max_ratio 12
|
| 114 |
-
num_workers: 4
|
| 115 |
-
|
| 116 |
-
Eval:
|
| 117 |
-
dataset:
|
| 118 |
-
name: RatioDataSetTVResize
|
| 119 |
-
ds_width: True
|
| 120 |
-
padding: False
|
| 121 |
-
data_dir_list: [
|
| 122 |
-
'../evaluation/CUTE80',
|
| 123 |
-
'../evaluation/IC13_857',
|
| 124 |
-
'../evaluation/IC15_1811',
|
| 125 |
-
'../evaluation/IIIT5k',
|
| 126 |
-
'../evaluation/SVT',
|
| 127 |
-
'../evaluation/SVTP',
|
| 128 |
-
]
|
| 129 |
-
transforms:
|
| 130 |
-
- DecodeImagePIL: # load image
|
| 131 |
-
img_mode: RGB
|
| 132 |
-
- CTCLabelEncode: # Class handling label
|
| 133 |
-
character_dict_path: *character_dict_path
|
| 134 |
-
use_space_char: *use_space_char
|
| 135 |
-
max_text_length: *max_text_length
|
| 136 |
-
- KeepKeys:
|
| 137 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 138 |
-
sampler:
|
| 139 |
-
name: RatioSampler
|
| 140 |
-
scales: [[128, 32]] # w, h
|
| 141 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 142 |
-
first_bs: *bs
|
| 143 |
-
fix_bs: false
|
| 144 |
-
divided_factor: [4, 16] # w, h
|
| 145 |
-
is_training: False
|
| 146 |
-
loader:
|
| 147 |
-
shuffle: False
|
| 148 |
-
drop_last: False
|
| 149 |
-
batch_size_per_card: *bs
|
| 150 |
-
max_ratio: *max_ratio
|
| 151 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/gtc/svtrv2_lnconv_smtr_gtc_smtr_long.yml
DELETED
|
@@ -1,150 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/svtrv2_lnconv_smtr_gtc_nodetach_smtr_long_infer
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 1000]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/predicts_smtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
distributed: true
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.000325
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: BGPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRv2LNConvTwo33
|
| 42 |
-
use_pos_embed: False
|
| 43 |
-
out_channels: 256
|
| 44 |
-
dims: [128, 256, 384]
|
| 45 |
-
depths: [6, 6, 6]
|
| 46 |
-
num_heads: [4, 8, 12]
|
| 47 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 48 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 49 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 50 |
-
last_stage: false
|
| 51 |
-
feat2d: True
|
| 52 |
-
Decoder:
|
| 53 |
-
name: GTCDecoder
|
| 54 |
-
infer_gtc: True
|
| 55 |
-
detach: False
|
| 56 |
-
gtc_decoder:
|
| 57 |
-
name: SMTRDecoder
|
| 58 |
-
num_layer: 1
|
| 59 |
-
ds: True
|
| 60 |
-
max_len: *max_text_length
|
| 61 |
-
next_mode: &next True
|
| 62 |
-
sub_str_len: &subsl 5
|
| 63 |
-
infer_aug: True
|
| 64 |
-
ctc_decoder:
|
| 65 |
-
name: RCTCDecoder
|
| 66 |
-
|
| 67 |
-
Loss:
|
| 68 |
-
name: GTCLoss
|
| 69 |
-
ctc_weight: 0.1
|
| 70 |
-
gtc_loss:
|
| 71 |
-
name: SMTRLoss
|
| 72 |
-
|
| 73 |
-
PostProcess:
|
| 74 |
-
name: GTCLabelDecode
|
| 75 |
-
gtc_label_decode:
|
| 76 |
-
name: SMTRLabelDecode
|
| 77 |
-
next_mode: *next
|
| 78 |
-
character_dict_path: *character_dict_path
|
| 79 |
-
use_space_char: *use_space_char
|
| 80 |
-
only_gtc: True
|
| 81 |
-
|
| 82 |
-
Metric:
|
| 83 |
-
name: RecGTCMetric
|
| 84 |
-
main_indicator: acc
|
| 85 |
-
is_filter: True
|
| 86 |
-
|
| 87 |
-
Train:
|
| 88 |
-
dataset:
|
| 89 |
-
name: RatioDataSetTVResize
|
| 90 |
-
ds_width: True
|
| 91 |
-
padding: false
|
| 92 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 93 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 94 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 95 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 96 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 97 |
-
]
|
| 98 |
-
transforms:
|
| 99 |
-
- DecodeImagePIL: # load image
|
| 100 |
-
img_mode: RGB
|
| 101 |
-
- PARSeqAugPIL:
|
| 102 |
-
- SMTRLabelEncode: # Class handling label
|
| 103 |
-
sub_str_len: *subsl
|
| 104 |
-
character_dict_path: *character_dict_path
|
| 105 |
-
use_space_char: *use_space_char
|
| 106 |
-
max_text_length: *max_text_length
|
| 107 |
-
- KeepKeys:
|
| 108 |
-
keep_keys: ['image', 'label', 'label_subs', 'label_next', 'length_subs',
|
| 109 |
-
'label_subs_pre', 'label_next_pre', 'length_subs_pre', 'length'] # dataloader will return list in this order
|
| 110 |
-
sampler:
|
| 111 |
-
name: RatioSampler
|
| 112 |
-
scales: [[128, 32]] # w, h
|
| 113 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 114 |
-
first_bs: &bs 256
|
| 115 |
-
fix_bs: false
|
| 116 |
-
divided_factor: [4, 16] # w, h
|
| 117 |
-
is_training: True
|
| 118 |
-
loader:
|
| 119 |
-
shuffle: True
|
| 120 |
-
batch_size_per_card: *bs
|
| 121 |
-
drop_last: True
|
| 122 |
-
max_ratio: &max_ratio 12
|
| 123 |
-
num_workers: 4
|
| 124 |
-
|
| 125 |
-
Eval:
|
| 126 |
-
dataset:
|
| 127 |
-
name: SimpleDataSet
|
| 128 |
-
data_dir: ../ltb/
|
| 129 |
-
label_file_list: ['../ltb/ultra_long_70_list.txt']
|
| 130 |
-
transforms:
|
| 131 |
-
- DecodeImage: # load image
|
| 132 |
-
img_mode: BGR
|
| 133 |
-
channel_first: False
|
| 134 |
-
- GTCLabelEncode: # Class handling label
|
| 135 |
-
gtc_label_encode:
|
| 136 |
-
name: ARLabelEncode
|
| 137 |
-
character_dict_path: *character_dict_path
|
| 138 |
-
use_space_char: *use_space_char
|
| 139 |
-
max_text_length: 200
|
| 140 |
-
- SliceResize:
|
| 141 |
-
image_shape: [3, 32, 128]
|
| 142 |
-
padding: False
|
| 143 |
-
max_ratio: 12
|
| 144 |
-
- KeepKeys:
|
| 145 |
-
keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
|
| 146 |
-
loader:
|
| 147 |
-
shuffle: False
|
| 148 |
-
drop_last: False
|
| 149 |
-
batch_size_per_card: 1
|
| 150 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/gtc/svtrv2_lnconv_smtr_gtc_stream.yml
DELETED
|
@@ -1,152 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 60
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/svtrv2_lnconv_smtr_gtc_stream
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/predicts_smtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
distributed: true
|
| 24 |
-
grad_clip_val: 20
|
| 25 |
-
|
| 26 |
-
Optimizer:
|
| 27 |
-
name: AdamW
|
| 28 |
-
lr: 0.00065
|
| 29 |
-
weight_decay: 0.05
|
| 30 |
-
filter_bias_and_bn: True
|
| 31 |
-
|
| 32 |
-
LRScheduler:
|
| 33 |
-
name: OneCycleLR
|
| 34 |
-
warmup_epoch: 5 # pct_start 0.075*20 = 1.5ep
|
| 35 |
-
cycle_momentum: False
|
| 36 |
-
|
| 37 |
-
Architecture:
|
| 38 |
-
model_type: rec
|
| 39 |
-
algorithm: BGPD
|
| 40 |
-
in_channels: 3
|
| 41 |
-
Transform:
|
| 42 |
-
Encoder:
|
| 43 |
-
name: SVTRv2LNConvTwo33
|
| 44 |
-
use_pos_embed: False
|
| 45 |
-
out_channels: 256
|
| 46 |
-
dims: [128, 256, 384]
|
| 47 |
-
depths: [6, 6, 6]
|
| 48 |
-
num_heads: [4, 8, 12]
|
| 49 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 50 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 51 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 52 |
-
last_stage: false
|
| 53 |
-
feat2d: True
|
| 54 |
-
Decoder:
|
| 55 |
-
name: GTCDecoder
|
| 56 |
-
infer_gtc: True
|
| 57 |
-
detach: False
|
| 58 |
-
gtc_decoder:
|
| 59 |
-
name: SMTRDecoder
|
| 60 |
-
num_layer: 1
|
| 61 |
-
ds: True
|
| 62 |
-
max_len: *max_text_length
|
| 63 |
-
next_mode: &next True
|
| 64 |
-
sub_str_len: &subsl 5
|
| 65 |
-
infer_aug: False
|
| 66 |
-
ctc_decoder:
|
| 67 |
-
name: RCTCDecoder
|
| 68 |
-
|
| 69 |
-
Loss:
|
| 70 |
-
name: GTCLoss
|
| 71 |
-
ctc_weight: 0.25
|
| 72 |
-
gtc_loss:
|
| 73 |
-
name: SMTRLoss
|
| 74 |
-
|
| 75 |
-
PostProcess:
|
| 76 |
-
name: GTCLabelDecode
|
| 77 |
-
gtc_label_decode:
|
| 78 |
-
name: SMTRLabelDecode
|
| 79 |
-
next_mode: *next
|
| 80 |
-
character_dict_path: *character_dict_path
|
| 81 |
-
use_space_char: *use_space_char
|
| 82 |
-
only_gtc: True
|
| 83 |
-
|
| 84 |
-
Metric:
|
| 85 |
-
name: RecMetric
|
| 86 |
-
main_indicator: acc
|
| 87 |
-
is_filter: True
|
| 88 |
-
stream: True
|
| 89 |
-
|
| 90 |
-
Train:
|
| 91 |
-
dataset:
|
| 92 |
-
name: RatioDataSetTVResize
|
| 93 |
-
ds_width: True
|
| 94 |
-
padding: false
|
| 95 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 96 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 97 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 98 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 99 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 100 |
-
]
|
| 101 |
-
transforms:
|
| 102 |
-
- DecodeImagePIL: # load image
|
| 103 |
-
img_mode: RGB
|
| 104 |
-
- PARSeqAugPIL:
|
| 105 |
-
- SMTRLabelEncode: # Class handling label
|
| 106 |
-
sub_str_len: *subsl
|
| 107 |
-
character_dict_path: *character_dict_path
|
| 108 |
-
use_space_char: *use_space_char
|
| 109 |
-
max_text_length: *max_text_length
|
| 110 |
-
- KeepKeys:
|
| 111 |
-
keep_keys: ['image', 'label', 'label_subs', 'label_next', 'length_subs',
|
| 112 |
-
'label_subs_pre', 'label_next_pre', 'length_subs_pre', 'length'] # dataloader will return list in this order
|
| 113 |
-
sampler:
|
| 114 |
-
name: RatioSampler
|
| 115 |
-
scales: [[128, 32]] # w, h
|
| 116 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 117 |
-
first_bs: &bs 256
|
| 118 |
-
fix_bs: false
|
| 119 |
-
divided_factor: [4, 16] # w, h
|
| 120 |
-
is_training: True
|
| 121 |
-
loader:
|
| 122 |
-
shuffle: True
|
| 123 |
-
batch_size_per_card: *bs
|
| 124 |
-
drop_last: True
|
| 125 |
-
max_ratio: &max_ratio 12
|
| 126 |
-
num_workers: 4
|
| 127 |
-
|
| 128 |
-
Eval:
|
| 129 |
-
dataset:
|
| 130 |
-
name: SimpleDataSet
|
| 131 |
-
data_dir: ../ltb/
|
| 132 |
-
label_file_list: ['../ltb/ultra_long_70_list.txt']
|
| 133 |
-
transforms:
|
| 134 |
-
- DecodeImagePIL: # load image
|
| 135 |
-
img_mode: RGB
|
| 136 |
-
- GTCLabelEncode: # Class handling label
|
| 137 |
-
gtc_label_encode:
|
| 138 |
-
name: ARLabelEncode
|
| 139 |
-
character_dict_path: *character_dict_path
|
| 140 |
-
use_space_char: *use_space_char
|
| 141 |
-
max_text_length: *max_text_length
|
| 142 |
-
- SliceTVResize:
|
| 143 |
-
image_shape: [32, 128]
|
| 144 |
-
padding: False
|
| 145 |
-
max_ratio: 4
|
| 146 |
-
- KeepKeys:
|
| 147 |
-
keep_keys: ['image', 'label', 'length', 'ctc_label', 'ctc_length'] # dataloader will return list in this order
|
| 148 |
-
loader:
|
| 149 |
-
shuffle: False
|
| 150 |
-
drop_last: False
|
| 151 |
-
batch_size_per_card: 1
|
| 152 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/igtr/readme.md
DELETED
|
@@ -1,189 +0,0 @@
|
|
| 1 |
-
# IGTR
|
| 2 |
-
|
| 3 |
-
- [IGTR](#igtr)
|
| 4 |
-
- [1. Introduction](#1-introduction)
|
| 5 |
-
- [2. Environment](#2-environment)
|
| 6 |
-
- [Dataset Preparation](#dataset-preparation)
|
| 7 |
-
- [3. Model Training / Evaluation](#3-model-training--evaluation)
|
| 8 |
-
- [Citation](#citation)
|
| 9 |
-
|
| 10 |
-
<a name="1"></a>
|
| 11 |
-
|
| 12 |
-
## 1. Introduction
|
| 13 |
-
|
| 14 |
-
Paper:
|
| 15 |
-
|
| 16 |
-
> [Instruction-Guided Scene Text Recognition](https://arxiv.org/abs/2401.17851)
|
| 17 |
-
> Yongkun Du, Zhineng Chen, Yuchen Su, Caiyan Jia, Yu-Gang Jiang
|
| 18 |
-
|
| 19 |
-
<a name="model"></a>
|
| 20 |
-
Multi-modal models show appealing performance in visual recognition tasks recently, as free-form text-guided training evokes the ability to understand fine-grained visual content. However, current models are either inefficient or cannot be trivially upgraded to scene text recognition (STR) due to the composition difference between natural and text images. We propose a novel instruction-guided scene text recognition (IGTR) paradigm that formulates STR as an instruction learning problem and understands text images by predicting character attributes, e.g., character frequency, position, etc. IGTR first devises $\\left \\langle condition,question,answer\\right \\rangle$ instruction triplets, providing rich and diverse descriptions of character attributes. To effectively learn these attributes through question-answering, IGTR develops lightweight instruction encoder, cross-modal feature fusion module and multi-task answer head, which guides nuanced text image understanding. Furthermore, IGTR realizes different recognition pipelines simply by using different instructions, enabling a character-understanding-based text reasoning paradigm that considerably differs from current methods. Experiments on English and Chinese benchmarks show that IGTR outperforms existing models by significant margins, while maintaining a small model size and efficient inference speed. Moreover, by adjusting the sampling of instructions, IGTR offers an elegant way to tackle the recognition of both rarely appearing and morphologically similar characters, which were previous challenges.
|
| 21 |
-
|
| 22 |
-
<a name="model"></a>
|
| 23 |
-
The accuracy (%) and model files of IGTR on the public dataset of scene text recognition are as follows:
|
| 24 |
-
|
| 25 |
-
- Trained on Synth dataset(MJ+ST), test on Common Benchmarks, training and test datasets both from [PARSeq](https://github.com/baudm/parseq).
|
| 26 |
-
|
| 27 |
-
| Model | IC13<br/>857 | SVT | IIIT5k<br/>3000 | IC15<br/>1811 | SVTP | CUTE80 | Avg | Config&Model&Log |
|
| 28 |
-
| :-----: | :----------: | :--: | :-------------: | :-----------: | :--: | :----: | :---: | :---------------------------------------------------------------------------------------------: |
|
| 29 |
-
| IGTR-PD | 97.6 | 95.2 | 97.6 | 88.4 | 91.6 | 95.5 | 94.30 | [link](https://drive.google.com/drive/folders/1Pv0CW2hiWC_dIyaB74W1fsXqiX3z5yXA?usp=drive_link) |
|
| 30 |
-
| IGTR-AR | 98.6 | 95.7 | 98.2 | 88.4 | 92.4 | 95.5 | 94.78 | as above |
|
| 31 |
-
|
| 32 |
-
- Test on Union14M-L benchmark, from [Union14M](https://github.com/Mountchicken/Union14M/).
|
| 33 |
-
|
| 34 |
-
| Model | Curve | Multi-<br/>Oriented | Artistic | Contextless | Salient | Multi-<br/>word | General | Avg | Config&Model&Log |
|
| 35 |
-
| :-----: | :---: | :-----------------: | :------: | :---------: | :-----: | :-------------: | :-----: | :---: | :---------------------: |
|
| 36 |
-
| IGTR-PD | 76.9 | 30.6 | 59.1 | 63.3 | 77.8 | 62.5 | 66.7 | 62.40 | Same as the above table |
|
| 37 |
-
| IGTR-AR | 78.4 | 31.9 | 61.3 | 66.5 | 80.2 | 69.3 | 67.9 | 65.07 | as above |
|
| 38 |
-
|
| 39 |
-
- Trained on Union14M-L training dataset.
|
| 40 |
-
|
| 41 |
-
| Model | IC13<br/>857 | SVT | IIIT5k<br/>3000 | IC15<br/>1811 | SVTP | CUTE80 | Avg | Config&Model&Log |
|
| 42 |
-
| :----------: | :----------: | :--: | :-------------: | :-----------: | :--: | :----: | :---: | :---------------------------------------------------------------------------------------------: |
|
| 43 |
-
| IGTR-PD | 97.7 | 97.7 | 98.3 | 89.8 | 93.7 | 97.9 | 95.86 | [link](https://drive.google.com/drive/folders/1ZGlzDqEzjrBg8qG2wBkbOm3bLRzFbTzo?usp=drive_link) |
|
| 44 |
-
| IGTR-AR | 98.1 | 98.4 | 98.7 | 90.5 | 94.9 | 98.3 | 96.48 | as above |
|
| 45 |
-
| IGTR-PD-60ep | 97.9 | 98.3 | 99.2 | 90.8 | 93.7 | 97.6 | 96.24 | [link](https://drive.google.com/drive/folders/1ik4hxZDRsjU1RbCA19nwE45Kg1bCnMoa?usp=drive_link) |
|
| 46 |
-
| IGTR-AR-60ep | 98.4 | 98.1 | 99.3 | 91.5 | 94.3 | 97.6 | 96.54 | as above |
|
| 47 |
-
| IGTR-PD-PT | 98.6 | 98.0 | 99.1 | 91.7 | 96.8 | 99.0 | 97.20 | [link](https://drive.google.com/drive/folders/1QM0EWV66IfYI1G0Xm066V2zJA62hH6-1?usp=drive_link) |
|
| 48 |
-
| IGTR-AR-PT | 98.8 | 98.3 | 99.2 | 92.0 | 96.8 | 99.0 | 97.34 | as above |
|
| 49 |
-
|
| 50 |
-
| Model | Curve | Multi-<br/>Oriented | Artistic | Contextless | Salient | Multi-<br/>word | General | Avg | Config&Model&Log |
|
| 51 |
-
| :----------: | :---: | :-----------------: | :------: | :---------: | :-----: | :-------------: | :-----: | :---: | :---------------------: |
|
| 52 |
-
| IGTR-PD | 88.1 | 89.9 | 74.2 | 80.3 | 82.8 | 79.2 | 83.0 | 82.51 | Same as the above table |
|
| 53 |
-
| IGTR-AR | 90.4 | 91.2 | 77.0 | 82.4 | 84.7 | 84.0 | 84.4 | 84.86 | as above |
|
| 54 |
-
| IGTR-PD-60ep | 90.0 | 92.1 | 77.5 | 82.8 | 86.0 | 83.0 | 84.8 | 85.18 | Same as the above table |
|
| 55 |
-
| IGTR-AR-60ep | 91.0 | 93.0 | 78.7 | 84.6 | 87.3 | 84.8 | 85.6 | 86.43 | as above |
|
| 56 |
-
| IGTR-PD-PT | 92.4 | 92.1 | 80.7 | 83.6 | 87.7 | 86.9 | 85.0 | 86.92 | Same as the above table |
|
| 57 |
-
| IGTR-AR-PT | 93.0 | 92.9 | 81.3 | 83.4 | 88.6 | 88.7 | 85.6 | 87.65 | as above |
|
| 58 |
-
|
| 59 |
-
- Trained and test on Chinese dataset, from [Chinese Benckmark](https://github.com/FudanVI/benchmarking-chinese-text-recognition).
|
| 60 |
-
|
| 61 |
-
| Model | Scene | Web | Document | Handwriting | Avg | Config&Model&Log |
|
| 62 |
-
| :---------: | :---: | :--: | :------: | :---------: | :---: | :---------------------------------------------------------------------------------------------: |
|
| 63 |
-
| IGTR-PD | 73.1 | 74.8 | 98.6 | 52.5 | 74.75 | |
|
| 64 |
-
| IGTR-AR | 75.1 | 76.4 | 98.7 | 55.3 | 76.37 | |
|
| 65 |
-
| IGTR-PD-TS | 73.5 | 75.9 | 98.7 | 54.5 | 75.65 | [link](https://drive.google.com/drive/folders/1H3VRdGHjhawd6fkSC-qlBzVzvYYTpHRg?usp=drive_link) |
|
| 66 |
-
| IGTR-AR-TS | 75.6 | 77.0 | 98.8 | 57.3 | 77.17 | as above |
|
| 67 |
-
| IGTR-PD-Aug | 79.5 | 80.0 | 99.4 | 58.9 | 79.45 | [link](https://drive.google.com/drive/folders/1XFQkCILwcFwA7iYyQY9crnrouaI5sqcZ?usp=drive_link) |
|
| 68 |
-
| IGTR-AR-Aug | 82.0 | 81.7 | 99.5 | 63.8 | 81.74 | as above |
|
| 69 |
-
|
| 70 |
-
Download all Configs, Models, and Logs from [Google Drive](https://drive.google.com/drive/folders/1mSRDg9Mj5R6PspAdFGXZHDHTCQmjkd8d?usp=drive_link).
|
| 71 |
-
|
| 72 |
-
<a name="2"></a>
|
| 73 |
-
|
| 74 |
-
## 2. Environment
|
| 75 |
-
|
| 76 |
-
- [PyTorch](http://pytorch.org/) version >= 1.13.0
|
| 77 |
-
- Python version >= 3.7
|
| 78 |
-
|
| 79 |
-
```shell
|
| 80 |
-
git clone -b develop https://github.com/Topdu/OpenOCR.git
|
| 81 |
-
cd OpenOCR
|
| 82 |
-
# A100 Ubuntu 20.04 Cuda 11.8
|
| 83 |
-
conda create -n openocr python==3.8
|
| 84 |
-
conda activate openocr
|
| 85 |
-
conda install pytorch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 pytorch-cuda=11.8 -c pytorch -c nvidia
|
| 86 |
-
pip install -r requirements.txt
|
| 87 |
-
```
|
| 88 |
-
|
| 89 |
-
#### Dataset Preparation
|
| 90 |
-
|
| 91 |
-
[English dataset download](https://github.com/baudm/parseq)
|
| 92 |
-
|
| 93 |
-
[Union14M-L download](https://github.com/Mountchicken/Union14M)
|
| 94 |
-
|
| 95 |
-
[Chinese dataset download](https://github.com/fudanvi/benchmarking-chinese-text-recognition#download)
|
| 96 |
-
|
| 97 |
-
The expected filesystem structure is as follows:
|
| 98 |
-
|
| 99 |
-
```
|
| 100 |
-
benchmark_bctr
|
| 101 |
-
├── benchmark_bctr_test
|
| 102 |
-
│ ├── document_test
|
| 103 |
-
│ ├── handwriting_test
|
| 104 |
-
│ ├── scene_test
|
| 105 |
-
│ └── web_test
|
| 106 |
-
└── benchmark_bctr_train
|
| 107 |
-
├── document_train
|
| 108 |
-
├── handwriting_train
|
| 109 |
-
├── scene_train
|
| 110 |
-
└── web_train
|
| 111 |
-
evaluation
|
| 112 |
-
├── CUTE80
|
| 113 |
-
├── IC13_857
|
| 114 |
-
├── IC15_1811
|
| 115 |
-
├── IIIT5k
|
| 116 |
-
├── SVT
|
| 117 |
-
└── SVTP
|
| 118 |
-
OpenOCR
|
| 119 |
-
synth
|
| 120 |
-
├── MJ
|
| 121 |
-
│ ├── test
|
| 122 |
-
│ ├── train
|
| 123 |
-
│ └── val
|
| 124 |
-
└── ST
|
| 125 |
-
test # from PARSeq
|
| 126 |
-
├── ArT
|
| 127 |
-
├── COCOv1.4
|
| 128 |
-
├── CUTE80
|
| 129 |
-
├── IC13_1015
|
| 130 |
-
├── IC13_1095
|
| 131 |
-
├── IC13_857
|
| 132 |
-
├── IC15_1811
|
| 133 |
-
├── IC15_2077
|
| 134 |
-
├── IIIT5k
|
| 135 |
-
├── SVT
|
| 136 |
-
├── SVTP
|
| 137 |
-
└── Uber
|
| 138 |
-
u14m # lmdb format
|
| 139 |
-
├── artistic
|
| 140 |
-
├── contextless
|
| 141 |
-
├── curve
|
| 142 |
-
├── general
|
| 143 |
-
├── multi_oriented
|
| 144 |
-
├── multi_words
|
| 145 |
-
└── salient
|
| 146 |
-
Union14M-LMDB-L # lmdb format
|
| 147 |
-
├── train_challenging
|
| 148 |
-
├── train_easy
|
| 149 |
-
├── train_hard
|
| 150 |
-
├── train_medium
|
| 151 |
-
└── train_normal
|
| 152 |
-
```
|
| 153 |
-
|
| 154 |
-
<a name="3"></a>
|
| 155 |
-
|
| 156 |
-
## 3. Model Training / Evaluation
|
| 157 |
-
|
| 158 |
-
Training:
|
| 159 |
-
|
| 160 |
-
```shell
|
| 161 |
-
# The configuration file is available from the link provided in the table above.
|
| 162 |
-
# Multi GPU training
|
| 163 |
-
CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 tools/train_rec.py --c PATH/svtr_base_igtr_XXX.yml
|
| 164 |
-
```
|
| 165 |
-
|
| 166 |
-
Evaluation:
|
| 167 |
-
|
| 168 |
-
```shell
|
| 169 |
-
# The configuration file is available from the link provided in the table above.
|
| 170 |
-
# en
|
| 171 |
-
python tools/eval_rec_all_ratio.py --c PATH/svtr_base_igtr_syn.yml
|
| 172 |
-
# ch
|
| 173 |
-
python tools/eval_rec_all_ch.py --c PATH/svtr_base_igtr_ch_aug.yml
|
| 174 |
-
```
|
| 175 |
-
|
| 176 |
-
## Citation
|
| 177 |
-
|
| 178 |
-
```bibtex
|
| 179 |
-
@article{Du2024IGTR,
|
| 180 |
-
title = {Instruction-Guided Scene Text Recognition},
|
| 181 |
-
author = {Du, Yongkun and Chen, Zhineng and Su, Yuchen and Jia, Caiyan and Jiang, Yu-Gang},
|
| 182 |
-
journal = {CoRR},
|
| 183 |
-
eprinttype = {arXiv},
|
| 184 |
-
primaryClass={cs.CV},
|
| 185 |
-
volume = {abs/2401.17851},
|
| 186 |
-
year = {2024},
|
| 187 |
-
url = {https://arxiv.org/abs/2401.17851}
|
| 188 |
-
}
|
| 189 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/igtr/svtr_base_ds_igtr.yml
DELETED
|
@@ -1,157 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_igtr
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path
|
| 18 |
-
# ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_igtr.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.0005 # 2gpus 384bs/gpu
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1.5
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
|
| 36 |
-
Architecture:
|
| 37 |
-
model_type: rec
|
| 38 |
-
algorithm: IGTR
|
| 39 |
-
in_channels: 3
|
| 40 |
-
Transform:
|
| 41 |
-
Encoder:
|
| 42 |
-
name: SVTRNet2DPos
|
| 43 |
-
img_size: [32, -1]
|
| 44 |
-
out_char_num: 25
|
| 45 |
-
out_channels: 256
|
| 46 |
-
patch_merging: 'Conv'
|
| 47 |
-
embed_dim: [128, 256, 384]
|
| 48 |
-
depth: [6, 6, 6]
|
| 49 |
-
num_heads: [4, 8, 12]
|
| 50 |
-
mixer: ['ConvB','ConvB','ConvB','ConvB','ConvB','ConvB', 'ConvB','ConvB', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 51 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 52 |
-
last_stage: False
|
| 53 |
-
prenorm: True
|
| 54 |
-
use_first_sub: False
|
| 55 |
-
Decoder:
|
| 56 |
-
name: IGTRDecoder
|
| 57 |
-
dim: 384
|
| 58 |
-
num_layer: 1
|
| 59 |
-
ar: False
|
| 60 |
-
refine_iter: 0
|
| 61 |
-
# next_pred: True
|
| 62 |
-
next_pred: False
|
| 63 |
-
pos2d: True
|
| 64 |
-
ds: True
|
| 65 |
-
# pos_len: False
|
| 66 |
-
# rec_layer: 1
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
Loss:
|
| 70 |
-
name: IGTRLoss
|
| 71 |
-
|
| 72 |
-
PostProcess:
|
| 73 |
-
name: IGTRLabelDecode
|
| 74 |
-
character_dict_path: *character_dict_path
|
| 75 |
-
use_space_char: *use_space_char
|
| 76 |
-
|
| 77 |
-
Metric:
|
| 78 |
-
name: RecMetric
|
| 79 |
-
main_indicator: acc
|
| 80 |
-
|
| 81 |
-
Train:
|
| 82 |
-
dataset:
|
| 83 |
-
name: RatioDataSet
|
| 84 |
-
ds_width: True
|
| 85 |
-
padding: &padding False
|
| 86 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 87 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 88 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 89 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 90 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 91 |
-
]
|
| 92 |
-
transforms:
|
| 93 |
-
- DecodeImage: # load image
|
| 94 |
-
img_mode: BGR
|
| 95 |
-
channel_first: False
|
| 96 |
-
- PARSeqAug:
|
| 97 |
-
- IGTRLabelEncode: # Class handling label
|
| 98 |
-
k: 8
|
| 99 |
-
prompt_error: False
|
| 100 |
-
character_dict_path: *character_dict_path
|
| 101 |
-
use_space_char: *use_space_char
|
| 102 |
-
max_text_length: *max_text_length
|
| 103 |
-
- KeepKeys:
|
| 104 |
-
keep_keys: ['image', 'label', 'prompt_pos_idx_list',
|
| 105 |
-
'prompt_char_idx_list', 'ques_pos_idx_list', 'ques1_answer_list',
|
| 106 |
-
'ques2_char_idx_list', 'ques2_answer_list', 'ques3_answer', 'ques4_char_num_list',
|
| 107 |
-
'ques_len_list', 'ques2_len_list', 'prompt_len_list', 'length'] # dataloader will return list in this order
|
| 108 |
-
sampler:
|
| 109 |
-
name: RatioSampler
|
| 110 |
-
scales: [[128, 32]] # w, h
|
| 111 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 112 |
-
first_bs: &bs 384
|
| 113 |
-
fix_bs: false
|
| 114 |
-
divided_factor: [4, 16] # w, h
|
| 115 |
-
is_training: True
|
| 116 |
-
loader:
|
| 117 |
-
shuffle: True
|
| 118 |
-
batch_size_per_card: *bs
|
| 119 |
-
drop_last: True
|
| 120 |
-
max_ratio: &max_ratio 4
|
| 121 |
-
num_workers: 4
|
| 122 |
-
|
| 123 |
-
Eval:
|
| 124 |
-
dataset:
|
| 125 |
-
name: RatioDataSet
|
| 126 |
-
ds_width: True
|
| 127 |
-
padding: *padding
|
| 128 |
-
data_dir_list: ['../evaluation/CUTE80',
|
| 129 |
-
'../evaluation/IC13_857',
|
| 130 |
-
'../evaluation/IC15_1811',
|
| 131 |
-
'../evaluation/IIIT5k',
|
| 132 |
-
'../evaluation/SVT',
|
| 133 |
-
'../evaluation/SVTP']
|
| 134 |
-
transforms:
|
| 135 |
-
- DecodeImage: # load image
|
| 136 |
-
img_mode: BGR
|
| 137 |
-
channel_first: False
|
| 138 |
-
- ARLabelEncode: # Class handling label
|
| 139 |
-
character_dict_path: *character_dict_path
|
| 140 |
-
use_space_char: *use_space_char
|
| 141 |
-
max_text_length: *max_text_length
|
| 142 |
-
- KeepKeys:
|
| 143 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 144 |
-
sampler:
|
| 145 |
-
name: RatioSampler
|
| 146 |
-
scales: [[128, 32]] # w, h
|
| 147 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 148 |
-
first_bs: 256
|
| 149 |
-
fix_bs: false
|
| 150 |
-
divided_factor: [4, 16] # w, h
|
| 151 |
-
is_training: False
|
| 152 |
-
loader:
|
| 153 |
-
shuffle: False
|
| 154 |
-
drop_last: False
|
| 155 |
-
batch_size_per_card: 256
|
| 156 |
-
max_ratio: *max_ratio
|
| 157 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lister/focalsvtr_lister_wo_fem_maxratio12.yml
DELETED
|
@@ -1,133 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/focalsvtr_lister_wo_fem_maxratio12/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_focalsvtr_lister_wo_fem_maxratio12.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.00065
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: LISTER
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: FocalSVTR
|
| 39 |
-
img_size: [32, 128]
|
| 40 |
-
depths: [6, 6, 9]
|
| 41 |
-
embed_dim: 96
|
| 42 |
-
sub_k: [[1, 1], [2, 1], [1, 1]]
|
| 43 |
-
focal_levels: [3, 3, 3]
|
| 44 |
-
last_stage: False
|
| 45 |
-
feat2d: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: LISTERDecoder
|
| 48 |
-
detach_grad: False
|
| 49 |
-
attn_scaling: True
|
| 50 |
-
use_fem: False
|
| 51 |
-
|
| 52 |
-
Loss:
|
| 53 |
-
name: LISTERLoss
|
| 54 |
-
|
| 55 |
-
PostProcess:
|
| 56 |
-
name: LISTERLabelDecode
|
| 57 |
-
|
| 58 |
-
Metric:
|
| 59 |
-
name: RecMetric
|
| 60 |
-
main_indicator: acc
|
| 61 |
-
is_filter: True
|
| 62 |
-
|
| 63 |
-
Train:
|
| 64 |
-
dataset:
|
| 65 |
-
name: RatioDataSetTVResize
|
| 66 |
-
ds_width: True
|
| 67 |
-
padding: False
|
| 68 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_filter_train_challenging',
|
| 69 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_hard',
|
| 70 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_medium',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_normal',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_easy',
|
| 73 |
-
]
|
| 74 |
-
transforms:
|
| 75 |
-
- DecodeImagePIL: # load image
|
| 76 |
-
img_mode: RGB
|
| 77 |
-
- PARSeqAugPIL:
|
| 78 |
-
- EPLabelEncode: # Class handling label
|
| 79 |
-
character_dict_path: *character_dict_path
|
| 80 |
-
use_space_char: *use_space_char
|
| 81 |
-
max_text_length: *max_text_length
|
| 82 |
-
- KeepKeys:
|
| 83 |
-
keep_keys: ['image', 'label', 'length']
|
| 84 |
-
sampler:
|
| 85 |
-
name: RatioSampler
|
| 86 |
-
scales: [[128, 32]] # w, h
|
| 87 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 88 |
-
first_bs: &bs 256
|
| 89 |
-
fix_bs: false
|
| 90 |
-
divided_factor: [4, 16] # w, h
|
| 91 |
-
is_training: True
|
| 92 |
-
loader:
|
| 93 |
-
shuffle: True
|
| 94 |
-
batch_size_per_card: *bs
|
| 95 |
-
drop_last: True
|
| 96 |
-
max_ratio: 12
|
| 97 |
-
num_workers: 4
|
| 98 |
-
|
| 99 |
-
Eval:
|
| 100 |
-
dataset:
|
| 101 |
-
name: RatioDataSetTVResize
|
| 102 |
-
ds_width: True
|
| 103 |
-
padding: False
|
| 104 |
-
data_dir_list: ['../evaluation/CUTE80',
|
| 105 |
-
'../evaluation/IC13_857',
|
| 106 |
-
'../evaluation/IC15_1811',
|
| 107 |
-
'../evaluation/IIIT5k',
|
| 108 |
-
'../evaluation/SVT',
|
| 109 |
-
'../evaluation/SVTP',
|
| 110 |
-
]
|
| 111 |
-
transforms:
|
| 112 |
-
- DecodeImagePIL: # load image
|
| 113 |
-
img_mode: RGB
|
| 114 |
-
- EPLabelEncode: # Class handling label
|
| 115 |
-
character_dict_path: *character_dict_path
|
| 116 |
-
use_space_char: *use_space_char
|
| 117 |
-
max_text_length: *max_text_length
|
| 118 |
-
- KeepKeys:
|
| 119 |
-
keep_keys: ['image', 'label', 'length']
|
| 120 |
-
sampler:
|
| 121 |
-
name: RatioSampler
|
| 122 |
-
scales: [[128, 32]] # w, h
|
| 123 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 124 |
-
first_bs: 256
|
| 125 |
-
fix_bs: false
|
| 126 |
-
divided_factor: [4, 16] # w, h
|
| 127 |
-
is_training: False
|
| 128 |
-
loader:
|
| 129 |
-
shuffle: False
|
| 130 |
-
drop_last: False
|
| 131 |
-
batch_size_per_card: *bs
|
| 132 |
-
max_ratio: 12
|
| 133 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lister/svtrv2_lister_wo_fem_maxratio12.yml
DELETED
|
@@ -1,138 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_lister_wo_fem_maxratio12/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_lister_wo_fem_maxratio12.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: AdamW
|
| 24 |
-
lr: 0.000325
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: True
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
|
| 31 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 32 |
-
cycle_momentum: False
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: LISTER
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: SVTRv2LNConvTwo33
|
| 40 |
-
use_pos_embed: False
|
| 41 |
-
out_channels: 256
|
| 42 |
-
dims: [128, 256, 384]
|
| 43 |
-
depths: [6, 6, 6]
|
| 44 |
-
num_heads: [4, 8, 12]
|
| 45 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 46 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 47 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 48 |
-
last_stage: false
|
| 49 |
-
feat2d: True
|
| 50 |
-
Decoder:
|
| 51 |
-
name: LISTERDecoder
|
| 52 |
-
detach_grad: False
|
| 53 |
-
attn_scaling: True
|
| 54 |
-
use_fem: False
|
| 55 |
-
|
| 56 |
-
Loss:
|
| 57 |
-
name: LISTERLoss
|
| 58 |
-
|
| 59 |
-
PostProcess:
|
| 60 |
-
name: LISTERLabelDecode
|
| 61 |
-
|
| 62 |
-
Metric:
|
| 63 |
-
name: RecMetric
|
| 64 |
-
main_indicator: acc
|
| 65 |
-
is_filter: True
|
| 66 |
-
|
| 67 |
-
Train:
|
| 68 |
-
dataset:
|
| 69 |
-
name: RatioDataSetTVResize
|
| 70 |
-
ds_width: True
|
| 71 |
-
padding: False
|
| 72 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 75 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 76 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 77 |
-
]
|
| 78 |
-
transforms:
|
| 79 |
-
- DecodeImagePIL: # load image
|
| 80 |
-
img_mode: RGB
|
| 81 |
-
- PARSeqAugPIL:
|
| 82 |
-
- EPLabelEncode: # Class handling label
|
| 83 |
-
character_dict_path: *character_dict_path
|
| 84 |
-
use_space_char: *use_space_char
|
| 85 |
-
max_text_length: *max_text_length
|
| 86 |
-
- KeepKeys:
|
| 87 |
-
keep_keys: ['image', 'label', 'length']
|
| 88 |
-
sampler:
|
| 89 |
-
name: RatioSampler
|
| 90 |
-
scales: [[128, 32]] # w, h
|
| 91 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 92 |
-
first_bs: &bs 128
|
| 93 |
-
fix_bs: false
|
| 94 |
-
divided_factor: [4, 16] # w, h
|
| 95 |
-
is_training: True
|
| 96 |
-
loader:
|
| 97 |
-
shuffle: True
|
| 98 |
-
batch_size_per_card: *bs
|
| 99 |
-
drop_last: True
|
| 100 |
-
max_ratio: 12
|
| 101 |
-
num_workers: 4
|
| 102 |
-
|
| 103 |
-
Eval:
|
| 104 |
-
dataset:
|
| 105 |
-
name: RatioDataSetTVResize
|
| 106 |
-
ds_width: True
|
| 107 |
-
padding: False
|
| 108 |
-
data_dir_list: ['../evaluation/CUTE80',
|
| 109 |
-
'../evaluation/IC13_857',
|
| 110 |
-
'../evaluation/IC15_1811',
|
| 111 |
-
'../evaluation/IIIT5k',
|
| 112 |
-
'../evaluation/SVT',
|
| 113 |
-
'../evaluation/SVTP',
|
| 114 |
-
]
|
| 115 |
-
transforms:
|
| 116 |
-
- DecodeImagePIL: # load image
|
| 117 |
-
img_mode: RGB
|
| 118 |
-
- EPLabelEncode: # Class handling label
|
| 119 |
-
character_dict_path: *character_dict_path
|
| 120 |
-
use_space_char: *use_space_char
|
| 121 |
-
max_text_length: *max_text_length
|
| 122 |
-
|
| 123 |
-
- KeepKeys:
|
| 124 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 125 |
-
sampler:
|
| 126 |
-
name: RatioSampler
|
| 127 |
-
scales: [[128, 32]] # w, h
|
| 128 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 129 |
-
first_bs: 256
|
| 130 |
-
fix_bs: false
|
| 131 |
-
divided_factor: [4, 16] # w, h
|
| 132 |
-
is_training: False
|
| 133 |
-
loader:
|
| 134 |
-
shuffle: False
|
| 135 |
-
drop_last: False
|
| 136 |
-
batch_size_per_card: *bs
|
| 137 |
-
max_ratio: 12
|
| 138 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lpv/svtr_base_lpv.yml
DELETED
|
@@ -1,124 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_lpv/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
# ./output/rec/u14m_filter/svtr_base_lpv_wo_glrm/best.pth
|
| 14 |
-
checkpoints:
|
| 15 |
-
use_tensorboard: false
|
| 16 |
-
infer_img:
|
| 17 |
-
# for data or label process
|
| 18 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_lpv.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
grad_clip_val: 20
|
| 25 |
-
|
| 26 |
-
Optimizer:
|
| 27 |
-
name: Adam
|
| 28 |
-
lr: 0.0001 # for 4gpus bs128/gpu
|
| 29 |
-
weight_decay: 0.0
|
| 30 |
-
filter_bias_and_bn: False
|
| 31 |
-
betas: [0.9, 0.99]
|
| 32 |
-
|
| 33 |
-
LRScheduler:
|
| 34 |
-
name: MultiStepLR
|
| 35 |
-
milestones: [12]
|
| 36 |
-
gamma: 0.1
|
| 37 |
-
|
| 38 |
-
Architecture:
|
| 39 |
-
model_type: rec
|
| 40 |
-
algorithm: LPV
|
| 41 |
-
in_channels: 3
|
| 42 |
-
Transform:
|
| 43 |
-
Encoder:
|
| 44 |
-
name: SVTRNet
|
| 45 |
-
img_size: [32, 128]
|
| 46 |
-
out_char_num: 25
|
| 47 |
-
out_channels: 256
|
| 48 |
-
patch_merging: 'Conv'
|
| 49 |
-
embed_dim: [128, 256, 384]
|
| 50 |
-
depth: [6, 6, 6]
|
| 51 |
-
num_heads: [4, 8, 12]
|
| 52 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 53 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 54 |
-
sub_k: [[1, 1], [1, 1]]
|
| 55 |
-
feature2d: True
|
| 56 |
-
last_stage: False
|
| 57 |
-
prenorm: True
|
| 58 |
-
Decoder:
|
| 59 |
-
name: LPVDecoder
|
| 60 |
-
num_layer: 3
|
| 61 |
-
max_len: *max_text_length
|
| 62 |
-
use_mask: True
|
| 63 |
-
dim_feedforward: 1536
|
| 64 |
-
nhead: 12
|
| 65 |
-
dropout: 0.1
|
| 66 |
-
trans_layer: 3
|
| 67 |
-
|
| 68 |
-
Loss:
|
| 69 |
-
name: LPVLoss
|
| 70 |
-
|
| 71 |
-
PostProcess:
|
| 72 |
-
name: ARLabelDecode
|
| 73 |
-
character_dict_path: *character_dict_path
|
| 74 |
-
use_space_char: *use_space_char
|
| 75 |
-
|
| 76 |
-
Metric:
|
| 77 |
-
name: RecMetric
|
| 78 |
-
main_indicator: acc
|
| 79 |
-
is_filter: True
|
| 80 |
-
|
| 81 |
-
Train:
|
| 82 |
-
dataset:
|
| 83 |
-
name: LMDBDataSet
|
| 84 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 85 |
-
transforms:
|
| 86 |
-
- DecodeImagePIL: # load image
|
| 87 |
-
img_mode: RGB
|
| 88 |
-
- PARSeqAugPIL:
|
| 89 |
-
- ARLabelEncode: # Class handling label
|
| 90 |
-
character_dict_path: *character_dict_path
|
| 91 |
-
use_space_char: *use_space_char
|
| 92 |
-
max_text_length: *max_text_length
|
| 93 |
-
- RecTVResize:
|
| 94 |
-
image_shape: [32, 128]
|
| 95 |
-
padding: False
|
| 96 |
-
- KeepKeys:
|
| 97 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 98 |
-
loader:
|
| 99 |
-
shuffle: True
|
| 100 |
-
batch_size_per_card: 128
|
| 101 |
-
drop_last: True
|
| 102 |
-
num_workers: 4
|
| 103 |
-
|
| 104 |
-
Eval:
|
| 105 |
-
dataset:
|
| 106 |
-
name: LMDBDataSet
|
| 107 |
-
data_dir: ../evaluation/
|
| 108 |
-
transforms:
|
| 109 |
-
- DecodeImagePIL: # load image
|
| 110 |
-
img_mode: RGB
|
| 111 |
-
- ARLabelEncode: # Class handling label
|
| 112 |
-
character_dict_path: *character_dict_path
|
| 113 |
-
use_space_char: *use_space_char
|
| 114 |
-
max_text_length: *max_text_length
|
| 115 |
-
- RecTVResize:
|
| 116 |
-
image_shape: [32, 128]
|
| 117 |
-
padding: False
|
| 118 |
-
- KeepKeys:
|
| 119 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 120 |
-
loader:
|
| 121 |
-
shuffle: False
|
| 122 |
-
drop_last: False
|
| 123 |
-
batch_size_per_card: 128
|
| 124 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lpv/svtr_base_lpv_wo_glrm.yml
DELETED
|
@@ -1,123 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_lpv_wo_glrm/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_lpv_wo_glrm.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
grad_clip_val: 20
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: Adam
|
| 27 |
-
lr: 0.0001 # for 4gpus bs128/gpu
|
| 28 |
-
weight_decay: 0.0
|
| 29 |
-
filter_bias_and_bn: False
|
| 30 |
-
betas: [0.9, 0.99]
|
| 31 |
-
|
| 32 |
-
LRScheduler:
|
| 33 |
-
name: MultiStepLR
|
| 34 |
-
milestones: [12]
|
| 35 |
-
gamma: 0.1
|
| 36 |
-
|
| 37 |
-
Architecture:
|
| 38 |
-
model_type: rec
|
| 39 |
-
algorithm: LPV
|
| 40 |
-
in_channels: 3
|
| 41 |
-
Transform:
|
| 42 |
-
Encoder:
|
| 43 |
-
name: SVTRNet
|
| 44 |
-
img_size: [32, 128]
|
| 45 |
-
out_char_num: 25
|
| 46 |
-
out_channels: 256
|
| 47 |
-
patch_merging: 'Conv'
|
| 48 |
-
embed_dim: [128, 256, 384]
|
| 49 |
-
depth: [6, 6, 6]
|
| 50 |
-
num_heads: [4, 8, 12]
|
| 51 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 52 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 53 |
-
sub_k: [[1, 1], [1, 1]]
|
| 54 |
-
feature2d: True
|
| 55 |
-
last_stage: False
|
| 56 |
-
prenorm: True
|
| 57 |
-
Decoder:
|
| 58 |
-
name: LPVDecoder
|
| 59 |
-
num_layer: 3
|
| 60 |
-
max_len: *max_text_length
|
| 61 |
-
use_mask: False
|
| 62 |
-
dim_feedforward: 1536
|
| 63 |
-
nhead: 12
|
| 64 |
-
dropout: 0.1
|
| 65 |
-
trans_layer: 3
|
| 66 |
-
|
| 67 |
-
Loss:
|
| 68 |
-
name: LPVLoss
|
| 69 |
-
|
| 70 |
-
PostProcess:
|
| 71 |
-
name: ARLabelDecode
|
| 72 |
-
character_dict_path: *character_dict_path
|
| 73 |
-
use_space_char: *use_space_char
|
| 74 |
-
|
| 75 |
-
Metric:
|
| 76 |
-
name: RecMetric
|
| 77 |
-
main_indicator: acc
|
| 78 |
-
is_filter: True
|
| 79 |
-
|
| 80 |
-
Train:
|
| 81 |
-
dataset:
|
| 82 |
-
name: LMDBDataSet
|
| 83 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 84 |
-
transforms:
|
| 85 |
-
- DecodeImagePIL: # load image
|
| 86 |
-
img_mode: RGB
|
| 87 |
-
- PARSeqAugPIL:
|
| 88 |
-
- ARLabelEncode: # Class handling label
|
| 89 |
-
character_dict_path: *character_dict_path
|
| 90 |
-
use_space_char: *use_space_char
|
| 91 |
-
max_text_length: *max_text_length
|
| 92 |
-
- RecTVResize:
|
| 93 |
-
image_shape: [32, 128]
|
| 94 |
-
padding: False
|
| 95 |
-
- KeepKeys:
|
| 96 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 97 |
-
loader:
|
| 98 |
-
shuffle: True
|
| 99 |
-
batch_size_per_card: 128
|
| 100 |
-
drop_last: True
|
| 101 |
-
num_workers: 4
|
| 102 |
-
|
| 103 |
-
Eval:
|
| 104 |
-
dataset:
|
| 105 |
-
name: LMDBDataSet
|
| 106 |
-
data_dir: ../evaluation/
|
| 107 |
-
transforms:
|
| 108 |
-
- DecodeImagePIL: # load image
|
| 109 |
-
img_mode: RGB
|
| 110 |
-
- ARLabelEncode: # Class handling label
|
| 111 |
-
character_dict_path: *character_dict_path
|
| 112 |
-
use_space_char: *use_space_char
|
| 113 |
-
max_text_length: *max_text_length
|
| 114 |
-
- RecTVResize:
|
| 115 |
-
image_shape: [32, 128]
|
| 116 |
-
padding: False
|
| 117 |
-
- KeepKeys:
|
| 118 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 119 |
-
loader:
|
| 120 |
-
shuffle: False
|
| 121 |
-
drop_last: False
|
| 122 |
-
batch_size_per_card: 128
|
| 123 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lpv/svtrv2_lpv.yml
DELETED
|
@@ -1,147 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_lpv/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
# ./output/rec/u14m_filter/svtrv2_lpv_wo_glrm/best.pth
|
| 14 |
-
checkpoints:
|
| 15 |
-
use_tensorboard: false
|
| 16 |
-
infer_img:
|
| 17 |
-
# for data or label process
|
| 18 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_lpv.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
grad_clip_val: 20
|
| 25 |
-
|
| 26 |
-
Optimizer:
|
| 27 |
-
name: AdamW
|
| 28 |
-
lr: 0.000325 # for 4gpus bs128/gpu
|
| 29 |
-
weight_decay: 0.05
|
| 30 |
-
filter_bias_and_bn: True
|
| 31 |
-
|
| 32 |
-
LRScheduler:
|
| 33 |
-
name: OneCycleLR
|
| 34 |
-
warmup_epoch: 1 # pct_start 0.075*20 = 1.5ep
|
| 35 |
-
cycle_momentum: False
|
| 36 |
-
|
| 37 |
-
Architecture:
|
| 38 |
-
model_type: rec
|
| 39 |
-
algorithm: LPV
|
| 40 |
-
in_channels: 3
|
| 41 |
-
Transform:
|
| 42 |
-
Encoder:
|
| 43 |
-
name: SVTRv2LNConvTwo33
|
| 44 |
-
use_pos_embed: False
|
| 45 |
-
dims: [128, 256, 384]
|
| 46 |
-
depths: [6, 6, 6]
|
| 47 |
-
num_heads: [4, 8, 12]
|
| 48 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 49 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 50 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 51 |
-
last_stage: false
|
| 52 |
-
feat2d: True
|
| 53 |
-
Decoder:
|
| 54 |
-
name: LPVDecoder
|
| 55 |
-
num_layer: 3
|
| 56 |
-
max_len: *max_text_length
|
| 57 |
-
use_mask: True
|
| 58 |
-
dim_feedforward: 1536
|
| 59 |
-
nhead: 12
|
| 60 |
-
dropout: 0.1
|
| 61 |
-
trans_layer: 3
|
| 62 |
-
|
| 63 |
-
Loss:
|
| 64 |
-
name: LPVLoss
|
| 65 |
-
|
| 66 |
-
PostProcess:
|
| 67 |
-
name: ARLabelDecode
|
| 68 |
-
character_dict_path: *character_dict_path
|
| 69 |
-
use_space_char: *use_space_char
|
| 70 |
-
|
| 71 |
-
Metric:
|
| 72 |
-
name: RecMetric
|
| 73 |
-
main_indicator: acc
|
| 74 |
-
is_filter: True
|
| 75 |
-
|
| 76 |
-
Train:
|
| 77 |
-
dataset:
|
| 78 |
-
name: RatioDataSetTVResize
|
| 79 |
-
ds_width: True
|
| 80 |
-
padding: false
|
| 81 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 82 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 83 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 84 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 85 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 86 |
-
]
|
| 87 |
-
transforms:
|
| 88 |
-
- DecodeImagePIL: # load image
|
| 89 |
-
img_mode: RGB
|
| 90 |
-
- PARSeqAugPIL:
|
| 91 |
-
- ARLabelEncode: # Class handling label
|
| 92 |
-
character_dict_path: *character_dict_path
|
| 93 |
-
use_space_char: *use_space_char
|
| 94 |
-
max_text_length: *max_text_length
|
| 95 |
-
- KeepKeys:
|
| 96 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 97 |
-
sampler:
|
| 98 |
-
name: RatioSampler
|
| 99 |
-
scales: [[128, 32]] # w, h
|
| 100 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 101 |
-
first_bs: &bs 128
|
| 102 |
-
fix_bs: false
|
| 103 |
-
divided_factor: [4, 16] # w, h
|
| 104 |
-
is_training: True
|
| 105 |
-
loader:
|
| 106 |
-
shuffle: True
|
| 107 |
-
batch_size_per_card: *bs
|
| 108 |
-
drop_last: True
|
| 109 |
-
max_ratio: &max_ratio 4
|
| 110 |
-
num_workers: 4
|
| 111 |
-
|
| 112 |
-
Eval:
|
| 113 |
-
dataset:
|
| 114 |
-
name: RatioDataSetTVResize
|
| 115 |
-
ds_width: True
|
| 116 |
-
padding: False
|
| 117 |
-
data_dir_list: [
|
| 118 |
-
'../evaluation/CUTE80',
|
| 119 |
-
'../evaluation/IC13_857',
|
| 120 |
-
'../evaluation/IC15_1811',
|
| 121 |
-
'../evaluation/IIIT5k',
|
| 122 |
-
'../evaluation/SVT',
|
| 123 |
-
'../evaluation/SVTP',
|
| 124 |
-
]
|
| 125 |
-
transforms:
|
| 126 |
-
- DecodeImagePIL: # load image
|
| 127 |
-
img_mode: RGB
|
| 128 |
-
- ARLabelEncode: # Class handling label
|
| 129 |
-
character_dict_path: *character_dict_path
|
| 130 |
-
use_space_char: *use_space_char
|
| 131 |
-
max_text_length: *max_text_length
|
| 132 |
-
- KeepKeys:
|
| 133 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 134 |
-
sampler:
|
| 135 |
-
name: RatioSampler
|
| 136 |
-
scales: [[128, 32]] # w, h
|
| 137 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 138 |
-
first_bs: *bs
|
| 139 |
-
fix_bs: false
|
| 140 |
-
divided_factor: [4, 16] # w, h
|
| 141 |
-
is_training: False
|
| 142 |
-
loader:
|
| 143 |
-
shuffle: False
|
| 144 |
-
drop_last: False
|
| 145 |
-
batch_size_per_card: *bs
|
| 146 |
-
max_ratio: *max_ratio
|
| 147 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/lpv/svtrv2_lpv_wo_glrm.yml
DELETED
|
@@ -1,146 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_lpv_wo_glrm/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_lpv_wo_glrm.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
grad_clip_val: 20
|
| 24 |
-
|
| 25 |
-
Optimizer:
|
| 26 |
-
name: AdamW
|
| 27 |
-
lr: 0.000325 # for 4gpus bs128/gpu
|
| 28 |
-
weight_decay: 0.05
|
| 29 |
-
filter_bias_and_bn: True
|
| 30 |
-
|
| 31 |
-
LRScheduler:
|
| 32 |
-
name: OneCycleLR
|
| 33 |
-
warmup_epoch: 1 # pct_start 0.075*20 = 1.5ep
|
| 34 |
-
cycle_momentum: False
|
| 35 |
-
|
| 36 |
-
Architecture:
|
| 37 |
-
model_type: rec
|
| 38 |
-
algorithm: LPV
|
| 39 |
-
in_channels: 3
|
| 40 |
-
Transform:
|
| 41 |
-
Encoder:
|
| 42 |
-
name: SVTRv2LNConvTwo33
|
| 43 |
-
use_pos_embed: False
|
| 44 |
-
dims: [128, 256, 384]
|
| 45 |
-
depths: [6, 6, 6]
|
| 46 |
-
num_heads: [4, 8, 12]
|
| 47 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 48 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 49 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 50 |
-
last_stage: false
|
| 51 |
-
feat2d: True
|
| 52 |
-
Decoder:
|
| 53 |
-
name: LPVDecoder
|
| 54 |
-
num_layer: 3
|
| 55 |
-
max_len: *max_text_length
|
| 56 |
-
use_mask: False
|
| 57 |
-
dim_feedforward: 1536
|
| 58 |
-
nhead: 12
|
| 59 |
-
dropout: 0.1
|
| 60 |
-
trans_layer: 3
|
| 61 |
-
|
| 62 |
-
Loss:
|
| 63 |
-
name: LPVLoss
|
| 64 |
-
|
| 65 |
-
PostProcess:
|
| 66 |
-
name: ARLabelDecode
|
| 67 |
-
character_dict_path: *character_dict_path
|
| 68 |
-
use_space_char: *use_space_char
|
| 69 |
-
|
| 70 |
-
Metric:
|
| 71 |
-
name: RecMetric
|
| 72 |
-
main_indicator: acc
|
| 73 |
-
is_filter: True
|
| 74 |
-
|
| 75 |
-
Train:
|
| 76 |
-
dataset:
|
| 77 |
-
name: RatioDataSetTVResize
|
| 78 |
-
ds_width: True
|
| 79 |
-
padding: false
|
| 80 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 81 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 82 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 83 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 84 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 85 |
-
]
|
| 86 |
-
transforms:
|
| 87 |
-
- DecodeImagePIL: # load image
|
| 88 |
-
img_mode: RGB
|
| 89 |
-
- PARSeqAugPIL:
|
| 90 |
-
- ARLabelEncode: # Class handling label
|
| 91 |
-
character_dict_path: *character_dict_path
|
| 92 |
-
use_space_char: *use_space_char
|
| 93 |
-
max_text_length: *max_text_length
|
| 94 |
-
- KeepKeys:
|
| 95 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 96 |
-
sampler:
|
| 97 |
-
name: RatioSampler
|
| 98 |
-
scales: [[128, 32]] # w, h
|
| 99 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 100 |
-
first_bs: &bs 128
|
| 101 |
-
fix_bs: false
|
| 102 |
-
divided_factor: [4, 16] # w, h
|
| 103 |
-
is_training: True
|
| 104 |
-
loader:
|
| 105 |
-
shuffle: True
|
| 106 |
-
batch_size_per_card: *bs
|
| 107 |
-
drop_last: True
|
| 108 |
-
max_ratio: &max_ratio 4
|
| 109 |
-
num_workers: 4
|
| 110 |
-
|
| 111 |
-
Eval:
|
| 112 |
-
dataset:
|
| 113 |
-
name: RatioDataSetTVResize
|
| 114 |
-
ds_width: True
|
| 115 |
-
padding: False
|
| 116 |
-
data_dir_list: [
|
| 117 |
-
'../evaluation/CUTE80',
|
| 118 |
-
'../evaluation/IC13_857',
|
| 119 |
-
'../evaluation/IC15_1811',
|
| 120 |
-
'../evaluation/IIIT5k',
|
| 121 |
-
'../evaluation/SVT',
|
| 122 |
-
'../evaluation/SVTP',
|
| 123 |
-
]
|
| 124 |
-
transforms:
|
| 125 |
-
- DecodeImagePIL: # load image
|
| 126 |
-
img_mode: RGB
|
| 127 |
-
- ARLabelEncode: # Class handling label
|
| 128 |
-
character_dict_path: *character_dict_path
|
| 129 |
-
use_space_char: *use_space_char
|
| 130 |
-
max_text_length: *max_text_length
|
| 131 |
-
- KeepKeys:
|
| 132 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 133 |
-
sampler:
|
| 134 |
-
name: RatioSampler
|
| 135 |
-
scales: [[128, 32]] # w, h
|
| 136 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 137 |
-
first_bs: *bs
|
| 138 |
-
fix_bs: false
|
| 139 |
-
divided_factor: [4, 16] # w, h
|
| 140 |
-
is_training: False
|
| 141 |
-
loader:
|
| 142 |
-
shuffle: False
|
| 143 |
-
drop_last: False
|
| 144 |
-
batch_size_per_card: *bs
|
| 145 |
-
max_ratio: *max_ratio
|
| 146 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/maerec/vit_nrtr.yml
DELETED
|
@@ -1,116 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 10
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_nrtr_ft_mae/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
# ./open_ocr_vit_small_params.pth
|
| 14 |
-
checkpoints:
|
| 15 |
-
use_tensorboard: false
|
| 16 |
-
infer_img:
|
| 17 |
-
# for data or label process
|
| 18 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 19 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 20 |
-
max_text_length: &max_text_length 25
|
| 21 |
-
use_space_char: &use_space_char False
|
| 22 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_nrtr_ft_mae.txt
|
| 23 |
-
use_amp: True
|
| 24 |
-
project_name: maerec
|
| 25 |
-
|
| 26 |
-
Optimizer:
|
| 27 |
-
name: AdamW
|
| 28 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 29 |
-
weight_decay: 0.05
|
| 30 |
-
filter_bias_and_bn: True
|
| 31 |
-
|
| 32 |
-
LRScheduler:
|
| 33 |
-
name: OneCycleLR
|
| 34 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 : 1.5ep
|
| 35 |
-
cycle_momentum: False
|
| 36 |
-
|
| 37 |
-
Architecture:
|
| 38 |
-
model_type: rec
|
| 39 |
-
algorithm: BGPD
|
| 40 |
-
in_channels: 3
|
| 41 |
-
Transform:
|
| 42 |
-
Encoder:
|
| 43 |
-
name: ViT
|
| 44 |
-
img_size: [32, 128]
|
| 45 |
-
patch_size: [4, 4]
|
| 46 |
-
embed_dim: 384
|
| 47 |
-
depth: 12
|
| 48 |
-
num_heads: 6
|
| 49 |
-
mlp_ratio: 4
|
| 50 |
-
qkv_bias: True
|
| 51 |
-
use_cls_token: True
|
| 52 |
-
Decoder:
|
| 53 |
-
name: NRTRDecoder
|
| 54 |
-
num_encoder_layers: -1
|
| 55 |
-
beam_size: 0
|
| 56 |
-
num_decoder_layers: 6
|
| 57 |
-
nhead: 8
|
| 58 |
-
max_len: *max_text_length
|
| 59 |
-
|
| 60 |
-
Loss:
|
| 61 |
-
name: ARLoss
|
| 62 |
-
|
| 63 |
-
PostProcess:
|
| 64 |
-
name: ARLabelDecode
|
| 65 |
-
character_dict_path: *character_dict_path
|
| 66 |
-
use_space_char: *use_space_char
|
| 67 |
-
|
| 68 |
-
Metric:
|
| 69 |
-
name: RecMetric
|
| 70 |
-
main_indicator: acc
|
| 71 |
-
is_filter: True
|
| 72 |
-
|
| 73 |
-
Train:
|
| 74 |
-
dataset:
|
| 75 |
-
name: LMDBDataSet
|
| 76 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 77 |
-
transforms:
|
| 78 |
-
- DecodeImagePIL: # load image
|
| 79 |
-
img_mode: RGB
|
| 80 |
-
- PARSeqAugPIL:
|
| 81 |
-
- ARLabelEncode: # Class handling label
|
| 82 |
-
character_dict_path: *character_dict_path
|
| 83 |
-
use_space_char: *use_space_char
|
| 84 |
-
max_text_length: *max_text_length
|
| 85 |
-
- RecTVResize:
|
| 86 |
-
image_shape: [32, 128]
|
| 87 |
-
padding: False
|
| 88 |
-
- KeepKeys:
|
| 89 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 90 |
-
loader:
|
| 91 |
-
shuffle: True
|
| 92 |
-
batch_size_per_card: 256
|
| 93 |
-
drop_last: True
|
| 94 |
-
num_workers: 4
|
| 95 |
-
|
| 96 |
-
Eval:
|
| 97 |
-
dataset:
|
| 98 |
-
name: LMDBDataSet
|
| 99 |
-
data_dir: ../evaluation/
|
| 100 |
-
transforms:
|
| 101 |
-
- DecodeImagePIL: # load image
|
| 102 |
-
img_mode: RGB
|
| 103 |
-
- ARLabelEncode: # Class handling label
|
| 104 |
-
character_dict_path: *character_dict_path
|
| 105 |
-
use_space_char: *use_space_char
|
| 106 |
-
max_text_length: *max_text_length
|
| 107 |
-
- RecTVResize:
|
| 108 |
-
image_shape: [32, 128]
|
| 109 |
-
padding: False
|
| 110 |
-
- KeepKeys:
|
| 111 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 112 |
-
loader:
|
| 113 |
-
shuffle: False
|
| 114 |
-
drop_last: False
|
| 115 |
-
batch_size_per_card: 256
|
| 116 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/matrn/resnet45_trans_matrn.yml
DELETED
|
@@ -1,95 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet45_trans_matrn/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
# ./openocr_nolang_abinet_lang.pth
|
| 12 |
-
checkpoints:
|
| 13 |
-
use_tensorboard: false
|
| 14 |
-
infer_img:
|
| 15 |
-
# for data or label process
|
| 16 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: 25
|
| 18 |
-
use_space_char: False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_resnet45_trans_matrn.txt
|
| 20 |
-
grad_clip_val: 20
|
| 21 |
-
use_amp: True
|
| 22 |
-
|
| 23 |
-
Optimizer:
|
| 24 |
-
name: Adam
|
| 25 |
-
lr: 0.000133 # 4gpus 128bs/gpu
|
| 26 |
-
weight_decay: 0.0
|
| 27 |
-
filter_bias_and_bn: False
|
| 28 |
-
|
| 29 |
-
LRScheduler:
|
| 30 |
-
name: MultiStepLR
|
| 31 |
-
milestones: [12, 18]
|
| 32 |
-
gamma: 0.1
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: MATRN
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: ResNet45
|
| 40 |
-
in_channels: 3
|
| 41 |
-
strides: [2, 1, 2, 1, 1]
|
| 42 |
-
Decoder:
|
| 43 |
-
name: MATRNDecoder
|
| 44 |
-
iter_size: 3
|
| 45 |
-
|
| 46 |
-
Loss:
|
| 47 |
-
name: ABINetLoss
|
| 48 |
-
align_weight: 3.0
|
| 49 |
-
|
| 50 |
-
PostProcess:
|
| 51 |
-
name: ABINetLabelDecode
|
| 52 |
-
|
| 53 |
-
Metric:
|
| 54 |
-
name: RecMetric
|
| 55 |
-
main_indicator: acc
|
| 56 |
-
is_filter: True
|
| 57 |
-
|
| 58 |
-
Train:
|
| 59 |
-
dataset:
|
| 60 |
-
name: LMDBDataSet
|
| 61 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 62 |
-
transforms:
|
| 63 |
-
- DecodeImagePIL: # load image
|
| 64 |
-
img_mode: RGB
|
| 65 |
-
- PARSeqAugPIL:
|
| 66 |
-
- ABINetLabelEncode:
|
| 67 |
-
- RecTVResize:
|
| 68 |
-
image_shape: [32, 128]
|
| 69 |
-
padding: False
|
| 70 |
-
- KeepKeys:
|
| 71 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 72 |
-
loader:
|
| 73 |
-
shuffle: True
|
| 74 |
-
batch_size_per_card: 128
|
| 75 |
-
drop_last: True
|
| 76 |
-
num_workers: 4
|
| 77 |
-
|
| 78 |
-
Eval:
|
| 79 |
-
dataset:
|
| 80 |
-
name: LMDBDataSet
|
| 81 |
-
data_dir: ../evaluation
|
| 82 |
-
transforms:
|
| 83 |
-
- DecodeImagePIL: # load image
|
| 84 |
-
img_mode: RGB
|
| 85 |
-
- ABINetLabelEncode:
|
| 86 |
-
- RecTVResize:
|
| 87 |
-
image_shape: [32, 128]
|
| 88 |
-
padding: False
|
| 89 |
-
- KeepKeys:
|
| 90 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 91 |
-
loader:
|
| 92 |
-
shuffle: False
|
| 93 |
-
drop_last: False
|
| 94 |
-
batch_size_per_card: 256
|
| 95 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/matrn/svtrv2_matrn.yml
DELETED
|
@@ -1,130 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_matrn/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
# ./openocr_svtrv2_nolang_abinet_lang.pth
|
| 12 |
-
checkpoints:
|
| 13 |
-
use_tensorboard: false
|
| 14 |
-
infer_img:
|
| 15 |
-
# for data or label process
|
| 16 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 17 |
-
max_text_length: 25
|
| 18 |
-
use_space_char: False
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_matrn.txt
|
| 20 |
-
use_amp: True
|
| 21 |
-
grad_clip_val: 20
|
| 22 |
-
|
| 23 |
-
Optimizer:
|
| 24 |
-
name: AdamW
|
| 25 |
-
lr: 0.000325 # for 4gpus bs128/gpu
|
| 26 |
-
weight_decay: 0.05
|
| 27 |
-
filter_bias_and_bn: True
|
| 28 |
-
|
| 29 |
-
LRScheduler:
|
| 30 |
-
name: OneCycleLR
|
| 31 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 32 |
-
cycle_momentum: False
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: MATRN
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: SVTRv2LNConvTwo33
|
| 40 |
-
use_pos_embed: False
|
| 41 |
-
dims: [128, 256, 384]
|
| 42 |
-
depths: [6, 6, 6]
|
| 43 |
-
num_heads: [4, 8, 12]
|
| 44 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 45 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 46 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 47 |
-
last_stage: false
|
| 48 |
-
feat2d: True
|
| 49 |
-
Decoder:
|
| 50 |
-
name: MATRNDecoder
|
| 51 |
-
iter_size: 3
|
| 52 |
-
num_layers: 0
|
| 53 |
-
|
| 54 |
-
Loss:
|
| 55 |
-
name: ABINetLoss
|
| 56 |
-
|
| 57 |
-
PostProcess:
|
| 58 |
-
name: ABINetLabelDecode
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: RatioDataSetTVResize
|
| 68 |
-
ds_width: True
|
| 69 |
-
padding: false
|
| 70 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 71 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 72 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 75 |
-
]
|
| 76 |
-
transforms:
|
| 77 |
-
- DecodeImagePIL: # load image
|
| 78 |
-
img_mode: RGB
|
| 79 |
-
- PARSeqAugPIL:
|
| 80 |
-
- ABINetLabelEncode:
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 83 |
-
sampler:
|
| 84 |
-
name: RatioSampler
|
| 85 |
-
scales: [[128, 32]] # w, h
|
| 86 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 87 |
-
first_bs: &bs 128
|
| 88 |
-
fix_bs: false
|
| 89 |
-
divided_factor: [4, 16] # w, h
|
| 90 |
-
is_training: True
|
| 91 |
-
loader:
|
| 92 |
-
shuffle: True
|
| 93 |
-
batch_size_per_card: *bs
|
| 94 |
-
drop_last: True
|
| 95 |
-
max_ratio: &max_ratio 4
|
| 96 |
-
num_workers: 4
|
| 97 |
-
|
| 98 |
-
Eval:
|
| 99 |
-
dataset:
|
| 100 |
-
name: RatioDataSetTVResize
|
| 101 |
-
ds_width: True
|
| 102 |
-
padding: False
|
| 103 |
-
data_dir_list: [
|
| 104 |
-
'../evaluation/CUTE80',
|
| 105 |
-
'../evaluation/IC13_857',
|
| 106 |
-
'../evaluation/IC15_1811',
|
| 107 |
-
'../evaluation/IIIT5k',
|
| 108 |
-
'../evaluation/SVT',
|
| 109 |
-
'../evaluation/SVTP',
|
| 110 |
-
]
|
| 111 |
-
transforms:
|
| 112 |
-
- DecodeImagePIL: # load image
|
| 113 |
-
img_mode: RGB
|
| 114 |
-
- ABINetLabelEncode:
|
| 115 |
-
- KeepKeys:
|
| 116 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 117 |
-
sampler:
|
| 118 |
-
name: RatioSampler
|
| 119 |
-
scales: [[128, 32]] # w, h
|
| 120 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 121 |
-
first_bs: *bs
|
| 122 |
-
fix_bs: false
|
| 123 |
-
divided_factor: [4, 16] # w, h
|
| 124 |
-
is_training: False
|
| 125 |
-
loader:
|
| 126 |
-
shuffle: False
|
| 127 |
-
drop_last: False
|
| 128 |
-
batch_size_per_card: *bs
|
| 129 |
-
max_ratio: *max_ratio
|
| 130 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/mgpstr/svtrv2_mgpstr_only_char.yml
DELETED
|
@@ -1,140 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtrv2_mgpstr_only_char/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
use_amp: True
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_mgpstr_only_char.txt
|
| 20 |
-
|
| 21 |
-
Optimizer:
|
| 22 |
-
name: AdamW
|
| 23 |
-
lr: 0.00065 # 4gpus 256bs/gpu
|
| 24 |
-
weight_decay: 0.05
|
| 25 |
-
filter_bias_and_bn: True
|
| 26 |
-
|
| 27 |
-
LRScheduler:
|
| 28 |
-
name: OneCycleLR
|
| 29 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 30 |
-
cycle_momentum: False
|
| 31 |
-
|
| 32 |
-
Architecture:
|
| 33 |
-
model_type: rec
|
| 34 |
-
algorithm: MGPSTR
|
| 35 |
-
Transform:
|
| 36 |
-
Encoder:
|
| 37 |
-
name: SVTRv2LNConvTwo33
|
| 38 |
-
use_pos_embed: False
|
| 39 |
-
out_channels: 256
|
| 40 |
-
dims: [128, 256, 384]
|
| 41 |
-
depths: [6, 6, 6]
|
| 42 |
-
num_heads: [4, 8, 12]
|
| 43 |
-
mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','FGlobal','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
|
| 44 |
-
local_k: [[5, 5], [5, 5], [-1, -1]]
|
| 45 |
-
sub_k: [[1, 1], [2, 1], [-1, -1]]
|
| 46 |
-
last_stage: false
|
| 47 |
-
feat2d: false
|
| 48 |
-
Decoder:
|
| 49 |
-
name: MGPDecoder
|
| 50 |
-
only_char: &only_char True
|
| 51 |
-
|
| 52 |
-
Loss:
|
| 53 |
-
name: MGPLoss
|
| 54 |
-
only_char: *only_char
|
| 55 |
-
|
| 56 |
-
PostProcess:
|
| 57 |
-
name: MPGLabelDecode
|
| 58 |
-
character_dict_path: *character_dict_path
|
| 59 |
-
use_space_char: *use_space_char
|
| 60 |
-
only_char: *only_char
|
| 61 |
-
|
| 62 |
-
Metric:
|
| 63 |
-
name: RecMetric
|
| 64 |
-
main_indicator: acc
|
| 65 |
-
is_filter: True
|
| 66 |
-
|
| 67 |
-
Train:
|
| 68 |
-
dataset:
|
| 69 |
-
name: RatioDataSetTVResize
|
| 70 |
-
ds_width: True
|
| 71 |
-
padding: false
|
| 72 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_filter_train_challenging',
|
| 73 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_hard',
|
| 74 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_medium',
|
| 75 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_normal',
|
| 76 |
-
'../Union14M-L-LMDB-Filtered/filter_filter_train_easy',
|
| 77 |
-
]
|
| 78 |
-
transforms:
|
| 79 |
-
- DecodeImagePIL: # load image
|
| 80 |
-
img_mode: RGB
|
| 81 |
-
- PARSeqAugPIL:
|
| 82 |
-
- MGPLabelEncode: # Class handling label
|
| 83 |
-
character_dict_path: *character_dict_path
|
| 84 |
-
use_space_char: *use_space_char
|
| 85 |
-
max_text_length: *max_text_length
|
| 86 |
-
only_char: *only_char
|
| 87 |
-
- KeepKeys:
|
| 88 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 89 |
-
sampler:
|
| 90 |
-
name: RatioSampler
|
| 91 |
-
scales: [[128, 32]] # w, h
|
| 92 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 93 |
-
first_bs: &bs 256
|
| 94 |
-
fix_bs: false
|
| 95 |
-
divided_factor: [4, 16] # w, h
|
| 96 |
-
is_training: True
|
| 97 |
-
loader:
|
| 98 |
-
shuffle: True
|
| 99 |
-
batch_size_per_card: *bs
|
| 100 |
-
drop_last: True
|
| 101 |
-
max_ratio: &max_ratio 4
|
| 102 |
-
num_workers: 4
|
| 103 |
-
|
| 104 |
-
Eval:
|
| 105 |
-
dataset:
|
| 106 |
-
name: RatioDataSetTVResize
|
| 107 |
-
ds_width: True
|
| 108 |
-
padding: False
|
| 109 |
-
data_dir_list: [
|
| 110 |
-
'../evaluation/CUTE80',
|
| 111 |
-
'../evaluation/IC13_857',
|
| 112 |
-
'../evaluation/IC15_1811',
|
| 113 |
-
'../evaluation/IIIT5k',
|
| 114 |
-
'../evaluation/SVT',
|
| 115 |
-
'../evaluation/SVTP',
|
| 116 |
-
]
|
| 117 |
-
transforms:
|
| 118 |
-
- DecodeImagePIL: # load image
|
| 119 |
-
img_mode: RGB
|
| 120 |
-
- MGPLabelEncode: # Class handling label
|
| 121 |
-
character_dict_path: *character_dict_path
|
| 122 |
-
use_space_char: *use_space_char
|
| 123 |
-
max_text_length: *max_text_length
|
| 124 |
-
only_char: *only_char
|
| 125 |
-
- KeepKeys:
|
| 126 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 127 |
-
sampler:
|
| 128 |
-
name: RatioSampler
|
| 129 |
-
scales: [[128, 32]] # w, h
|
| 130 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 131 |
-
first_bs: *bs
|
| 132 |
-
fix_bs: false
|
| 133 |
-
divided_factor: [4, 16] # w, h
|
| 134 |
-
is_training: False
|
| 135 |
-
loader:
|
| 136 |
-
shuffle: False
|
| 137 |
-
drop_last: False
|
| 138 |
-
batch_size_per_card: *bs
|
| 139 |
-
max_ratio: *max_ratio
|
| 140 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/mgpstr/vit_base_mgpstr_only_char.yml
DELETED
|
@@ -1,111 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_base_mgpstr/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
use_amp: True
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_mgpstr_only_char.txt
|
| 20 |
-
grad_clip_val: 5
|
| 21 |
-
project_name: mgpstr_base
|
| 22 |
-
|
| 23 |
-
Optimizer:
|
| 24 |
-
name: Adam
|
| 25 |
-
lr: 0.000325 # 4gpus 128bs/gpu
|
| 26 |
-
weight_decay: 0.
|
| 27 |
-
filter_bias_and_bn: False
|
| 28 |
-
|
| 29 |
-
LRScheduler:
|
| 30 |
-
name: OneCycleLR
|
| 31 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 32 |
-
cycle_momentum: False
|
| 33 |
-
|
| 34 |
-
Architecture:
|
| 35 |
-
model_type: rec
|
| 36 |
-
algorithm: MGPSTR
|
| 37 |
-
Transform:
|
| 38 |
-
Encoder:
|
| 39 |
-
name: ViT
|
| 40 |
-
img_size: [32,128]
|
| 41 |
-
patch_size: [4, 4]
|
| 42 |
-
embed_dim: 768
|
| 43 |
-
depth: 12
|
| 44 |
-
num_heads: 12
|
| 45 |
-
mlp_ratio: 4
|
| 46 |
-
qkv_bias: True
|
| 47 |
-
Decoder:
|
| 48 |
-
name: MGPDecoder
|
| 49 |
-
only_char: &only_char True
|
| 50 |
-
|
| 51 |
-
Loss:
|
| 52 |
-
name: MGPLoss
|
| 53 |
-
only_char: *only_char
|
| 54 |
-
|
| 55 |
-
PostProcess:
|
| 56 |
-
name: MPGLabelDecode
|
| 57 |
-
character_dict_path: *character_dict_path
|
| 58 |
-
use_space_char: *use_space_char
|
| 59 |
-
only_char: *only_char
|
| 60 |
-
|
| 61 |
-
Metric:
|
| 62 |
-
name: RecMetric
|
| 63 |
-
main_indicator: acc
|
| 64 |
-
is_filter: True
|
| 65 |
-
|
| 66 |
-
Train:
|
| 67 |
-
dataset:
|
| 68 |
-
name: LMDBDataSet
|
| 69 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 70 |
-
transforms:
|
| 71 |
-
- DecodeImagePIL: # load image
|
| 72 |
-
img_mode: RGB
|
| 73 |
-
- PARSeqAugPIL:
|
| 74 |
-
- MGPLabelEncode: # Class handling label
|
| 75 |
-
character_dict_path: *character_dict_path
|
| 76 |
-
use_space_char: *use_space_char
|
| 77 |
-
max_text_length: *max_text_length
|
| 78 |
-
only_char: *only_char
|
| 79 |
-
- RecTVResize:
|
| 80 |
-
image_shape: [32, 128]
|
| 81 |
-
padding: False
|
| 82 |
-
- KeepKeys:
|
| 83 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 84 |
-
loader:
|
| 85 |
-
shuffle: True
|
| 86 |
-
batch_size_per_card: 128
|
| 87 |
-
drop_last: True
|
| 88 |
-
num_workers: 4
|
| 89 |
-
|
| 90 |
-
Eval:
|
| 91 |
-
dataset:
|
| 92 |
-
name: LMDBDataSet
|
| 93 |
-
data_dir: ../evaluation/
|
| 94 |
-
transforms:
|
| 95 |
-
- DecodeImagePIL: # load image
|
| 96 |
-
img_mode: RGB
|
| 97 |
-
- MGPLabelEncode: # Class handling label
|
| 98 |
-
character_dict_path: *character_dict_path
|
| 99 |
-
use_space_char: *use_space_char
|
| 100 |
-
max_text_length: *max_text_length
|
| 101 |
-
only_char: *only_char
|
| 102 |
-
- RecTVResize:
|
| 103 |
-
image_shape: [32, 128]
|
| 104 |
-
padding: False
|
| 105 |
-
- KeepKeys:
|
| 106 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 107 |
-
loader:
|
| 108 |
-
shuffle: False
|
| 109 |
-
drop_last: False
|
| 110 |
-
batch_size_per_card: 256
|
| 111 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/mgpstr/vit_large_mgpstr_only_char.yml
DELETED
|
@@ -1,110 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_base_mgpstr_only_char/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
use_amp: True
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_mgpstr_only_char.txt
|
| 20 |
-
grad_clip_val: 5
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.000325 # 4gpus 128bs/gpu
|
| 25 |
-
weight_decay: 0.
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: MGPSTR
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ViT
|
| 39 |
-
img_size: [32,128]
|
| 40 |
-
patch_size: [4, 4]
|
| 41 |
-
embed_dim: 1024
|
| 42 |
-
depth: 24
|
| 43 |
-
num_heads: 16
|
| 44 |
-
mlp_ratio: 4
|
| 45 |
-
qkv_bias: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: MGPDecoder
|
| 48 |
-
only_char: &only_char True
|
| 49 |
-
|
| 50 |
-
Loss:
|
| 51 |
-
name: MGPLoss
|
| 52 |
-
only_char: *only_char
|
| 53 |
-
|
| 54 |
-
PostProcess:
|
| 55 |
-
name: MPGLabelDecode
|
| 56 |
-
character_dict_path: *character_dict_path
|
| 57 |
-
use_space_char: *use_space_char
|
| 58 |
-
only_char: *only_char
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- MGPLabelEncode: # Class handling label
|
| 74 |
-
character_dict_path: *character_dict_path
|
| 75 |
-
use_space_char: *use_space_char
|
| 76 |
-
max_text_length: *max_text_length
|
| 77 |
-
only_char: *only_char
|
| 78 |
-
- RecTVResize:
|
| 79 |
-
image_shape: [32, 128]
|
| 80 |
-
padding: False
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 83 |
-
loader:
|
| 84 |
-
shuffle: True
|
| 85 |
-
batch_size_per_card: 128
|
| 86 |
-
drop_last: True
|
| 87 |
-
num_workers: 4
|
| 88 |
-
|
| 89 |
-
Eval:
|
| 90 |
-
dataset:
|
| 91 |
-
name: LMDBDataSet
|
| 92 |
-
data_dir: ../evaluation/
|
| 93 |
-
transforms:
|
| 94 |
-
- DecodeImagePIL: # load image
|
| 95 |
-
img_mode: RGB
|
| 96 |
-
- MGPLabelEncode: # Class handling label
|
| 97 |
-
character_dict_path: *character_dict_path
|
| 98 |
-
use_space_char: *use_space_char
|
| 99 |
-
max_text_length: *max_text_length
|
| 100 |
-
only_char: *only_char
|
| 101 |
-
- RecTVResize:
|
| 102 |
-
image_shape: [32, 128]
|
| 103 |
-
padding: False
|
| 104 |
-
- KeepKeys:
|
| 105 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 106 |
-
loader:
|
| 107 |
-
shuffle: False
|
| 108 |
-
drop_last: False
|
| 109 |
-
batch_size_per_card: 256
|
| 110 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/mgpstr/vit_mgpstr.yml
DELETED
|
@@ -1,110 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_mgpstr/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [100000, 2000]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
use_amp: True
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_mgpstr.txt
|
| 20 |
-
grad_clip_val: 5
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.000325 # 4gpus 128bs/gpu
|
| 25 |
-
weight_decay: 0.
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: MGPSTR
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ViT
|
| 39 |
-
img_size: [32,128]
|
| 40 |
-
patch_size: [4, 4]
|
| 41 |
-
embed_dim: 384
|
| 42 |
-
depth: 12
|
| 43 |
-
num_heads: 6
|
| 44 |
-
mlp_ratio: 4
|
| 45 |
-
qkv_bias: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: MGPDecoder
|
| 48 |
-
only_char: &only_char False
|
| 49 |
-
|
| 50 |
-
Loss:
|
| 51 |
-
name: MGPLoss
|
| 52 |
-
only_char: *only_char
|
| 53 |
-
|
| 54 |
-
PostProcess:
|
| 55 |
-
name: MPGLabelDecode
|
| 56 |
-
character_dict_path: *character_dict_path
|
| 57 |
-
use_space_char: *use_space_char
|
| 58 |
-
only_char: *only_char
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMPGMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- MGPLabelEncode: # Class handling label
|
| 74 |
-
character_dict_path: *character_dict_path
|
| 75 |
-
use_space_char: *use_space_char
|
| 76 |
-
max_text_length: *max_text_length
|
| 77 |
-
only_char: *only_char
|
| 78 |
-
- RecTVResize:
|
| 79 |
-
image_shape: [32, 128]
|
| 80 |
-
padding: False
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'char_label', 'bpe_label', 'wp_label', 'length'] # dataloader will return list in this order
|
| 83 |
-
loader:
|
| 84 |
-
shuffle: True
|
| 85 |
-
batch_size_per_card: 128
|
| 86 |
-
drop_last: True
|
| 87 |
-
num_workers: 4
|
| 88 |
-
|
| 89 |
-
Eval:
|
| 90 |
-
dataset:
|
| 91 |
-
name: LMDBDataSet
|
| 92 |
-
data_dir: ../evaluation/
|
| 93 |
-
transforms:
|
| 94 |
-
- DecodeImagePIL: # load image
|
| 95 |
-
img_mode: RGB
|
| 96 |
-
- MGPLabelEncode: # Class handling label
|
| 97 |
-
character_dict_path: *character_dict_path
|
| 98 |
-
use_space_char: *use_space_char
|
| 99 |
-
max_text_length: *max_text_length
|
| 100 |
-
only_char: *only_char
|
| 101 |
-
- RecTVResize:
|
| 102 |
-
image_shape: [32, 128]
|
| 103 |
-
padding: False
|
| 104 |
-
- KeepKeys:
|
| 105 |
-
keep_keys: ['image', 'char_label', 'bpe_label', 'wp_label', 'length'] # dataloader will return list in this order
|
| 106 |
-
loader:
|
| 107 |
-
shuffle: False
|
| 108 |
-
drop_last: False
|
| 109 |
-
batch_size_per_card: 256
|
| 110 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/mgpstr/vit_mgpstr_only_char.yml
DELETED
|
@@ -1,110 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/vit_mgpstr_only_char/
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: False
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: &max_text_length 25
|
| 17 |
-
use_space_char: &use_space_char False
|
| 18 |
-
use_amp: True
|
| 19 |
-
save_res_path: ./output/rec/u14m_filter/predicts_vit_mgpstr_only_char.txt
|
| 20 |
-
grad_clip_val: 5
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.000325 # 4gpus 128bs/gpu
|
| 25 |
-
weight_decay: 0.
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: MGPSTR
|
| 36 |
-
Transform:
|
| 37 |
-
Encoder:
|
| 38 |
-
name: ViT
|
| 39 |
-
img_size: [32,128]
|
| 40 |
-
patch_size: [4, 4]
|
| 41 |
-
embed_dim: 384
|
| 42 |
-
depth: 12
|
| 43 |
-
num_heads: 6
|
| 44 |
-
mlp_ratio: 4
|
| 45 |
-
qkv_bias: True
|
| 46 |
-
Decoder:
|
| 47 |
-
name: MGPDecoder
|
| 48 |
-
only_char: &only_char True
|
| 49 |
-
|
| 50 |
-
Loss:
|
| 51 |
-
name: MGPLoss
|
| 52 |
-
only_char: *only_char
|
| 53 |
-
|
| 54 |
-
PostProcess:
|
| 55 |
-
name: MPGLabelDecode
|
| 56 |
-
character_dict_path: *character_dict_path
|
| 57 |
-
use_space_char: *use_space_char
|
| 58 |
-
only_char: *only_char
|
| 59 |
-
|
| 60 |
-
Metric:
|
| 61 |
-
name: RecMetric
|
| 62 |
-
main_indicator: acc
|
| 63 |
-
is_filter: True
|
| 64 |
-
|
| 65 |
-
Train:
|
| 66 |
-
dataset:
|
| 67 |
-
name: LMDBDataSet
|
| 68 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 69 |
-
transforms:
|
| 70 |
-
- DecodeImagePIL: # load image
|
| 71 |
-
img_mode: RGB
|
| 72 |
-
- PARSeqAugPIL:
|
| 73 |
-
- MGPLabelEncode: # Class handling label
|
| 74 |
-
character_dict_path: *character_dict_path
|
| 75 |
-
use_space_char: *use_space_char
|
| 76 |
-
max_text_length: *max_text_length
|
| 77 |
-
only_char: *only_char
|
| 78 |
-
- RecTVResize:
|
| 79 |
-
image_shape: [32, 128]
|
| 80 |
-
padding: False
|
| 81 |
-
- KeepKeys:
|
| 82 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 83 |
-
loader:
|
| 84 |
-
shuffle: True
|
| 85 |
-
batch_size_per_card: 128
|
| 86 |
-
drop_last: True
|
| 87 |
-
num_workers: 4
|
| 88 |
-
|
| 89 |
-
Eval:
|
| 90 |
-
dataset:
|
| 91 |
-
name: LMDBDataSet
|
| 92 |
-
data_dir: ../evaluation/
|
| 93 |
-
transforms:
|
| 94 |
-
- DecodeImagePIL: # load image
|
| 95 |
-
img_mode: RGB
|
| 96 |
-
- MGPLabelEncode: # Class handling label
|
| 97 |
-
character_dict_path: *character_dict_path
|
| 98 |
-
use_space_char: *use_space_char
|
| 99 |
-
max_text_length: *max_text_length
|
| 100 |
-
only_char: *only_char
|
| 101 |
-
- RecTVResize:
|
| 102 |
-
image_shape: [32, 128]
|
| 103 |
-
padding: False
|
| 104 |
-
- KeepKeys:
|
| 105 |
-
keep_keys: ['image', 'char_label', 'length'] # dataloader will return list in this order
|
| 106 |
-
loader:
|
| 107 |
-
shuffle: False
|
| 108 |
-
drop_last: False
|
| 109 |
-
batch_size_per_card: 256
|
| 110 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/moran/resnet31_lstm_moran.yml
DELETED
|
@@ -1,92 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/resnet31_lstm_moran
|
| 7 |
-
eval_epoch_step: [0, 1]
|
| 8 |
-
eval_batch_step: [0, 500]
|
| 9 |
-
cal_metric_during_train: True
|
| 10 |
-
pretrained_model:
|
| 11 |
-
checkpoints:
|
| 12 |
-
use_tensorboard: false
|
| 13 |
-
infer_img:
|
| 14 |
-
# for data or label process
|
| 15 |
-
character_dict_path: ./tools/utils/EN_symbol_dict.txt
|
| 16 |
-
max_text_length: 25
|
| 17 |
-
use_space_char: False
|
| 18 |
-
save_res_path: ./output/rec/predicts_moran.txt
|
| 19 |
-
use_amp: True
|
| 20 |
-
grad_clip_val: 1.0
|
| 21 |
-
|
| 22 |
-
Optimizer:
|
| 23 |
-
name: Adam
|
| 24 |
-
lr: 0.002 # for 1gpus bs1024/gpu
|
| 25 |
-
weight_decay: 0.05
|
| 26 |
-
filter_bias_and_bn: False
|
| 27 |
-
|
| 28 |
-
LRScheduler:
|
| 29 |
-
name: OneCycleLR
|
| 30 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 31 |
-
cycle_momentum: False
|
| 32 |
-
|
| 33 |
-
Architecture:
|
| 34 |
-
model_type: rec
|
| 35 |
-
algorithm: MORAN
|
| 36 |
-
Transform:
|
| 37 |
-
name: MORN
|
| 38 |
-
target_shape: [32, 128]
|
| 39 |
-
Encoder:
|
| 40 |
-
name: ResNet_ASTER
|
| 41 |
-
Decoder:
|
| 42 |
-
name: ASTERDecoder
|
| 43 |
-
|
| 44 |
-
Loss:
|
| 45 |
-
name: ARLoss
|
| 46 |
-
|
| 47 |
-
Metric:
|
| 48 |
-
name: RecMetric
|
| 49 |
-
main_indicator: acc
|
| 50 |
-
is_filter: True
|
| 51 |
-
|
| 52 |
-
PostProcess:
|
| 53 |
-
name: ARLabelDecode
|
| 54 |
-
|
| 55 |
-
Train:
|
| 56 |
-
dataset:
|
| 57 |
-
name: LMDBDataSet
|
| 58 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 59 |
-
transforms:
|
| 60 |
-
- DecodeImagePIL: # load image
|
| 61 |
-
img_mode: RGB
|
| 62 |
-
- PARSeqAugPIL:
|
| 63 |
-
- ARLabelEncode: # Class handling label
|
| 64 |
-
- RecTVResize:
|
| 65 |
-
image_shape: [64, 256]
|
| 66 |
-
padding: False
|
| 67 |
-
- KeepKeys:
|
| 68 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 69 |
-
loader:
|
| 70 |
-
shuffle: True
|
| 71 |
-
batch_size_per_card: 1024
|
| 72 |
-
drop_last: True
|
| 73 |
-
num_workers: 4
|
| 74 |
-
|
| 75 |
-
Eval:
|
| 76 |
-
dataset:
|
| 77 |
-
name: LMDBDataSet
|
| 78 |
-
data_dir: ../evaluation
|
| 79 |
-
transforms:
|
| 80 |
-
- DecodeImagePIL: # load image
|
| 81 |
-
img_mode: RGB
|
| 82 |
-
- ARLabelEncode: # Class handling label
|
| 83 |
-
- RecTVResize:
|
| 84 |
-
image_shape: [64, 256]
|
| 85 |
-
padding: False
|
| 86 |
-
- KeepKeys:
|
| 87 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 88 |
-
loader:
|
| 89 |
-
shuffle: False
|
| 90 |
-
drop_last: False
|
| 91 |
-
batch_size_per_card: 256
|
| 92 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/nrtr/focalsvtr_nrtr_maxraio12.yml
DELETED
|
@@ -1,145 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/focalsvtr_nrtr_maxrtio12
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img: ../ltb/img
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_focalsvtr_nrtr_maxrtio12.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
|
| 24 |
-
Optimizer:
|
| 25 |
-
name: AdamW
|
| 26 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 27 |
-
weight_decay: 0.05
|
| 28 |
-
filter_bias_and_bn: True
|
| 29 |
-
|
| 30 |
-
LRScheduler:
|
| 31 |
-
name: OneCycleLR
|
| 32 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 33 |
-
cycle_momentum: False
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: NRTR
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: FocalSVTR
|
| 42 |
-
img_size: [32, 128]
|
| 43 |
-
depths: [6, 6, 6]
|
| 44 |
-
embed_dim: 96
|
| 45 |
-
sub_k: [[1, 1], [2, 1], [1, 1]]
|
| 46 |
-
focal_levels: [3, 3, 3]
|
| 47 |
-
last_stage: False
|
| 48 |
-
Decoder:
|
| 49 |
-
name: NRTRDecoder
|
| 50 |
-
num_encoder_layers: -1
|
| 51 |
-
beam_size: 0
|
| 52 |
-
num_decoder_layers: 2
|
| 53 |
-
nhead: 12
|
| 54 |
-
max_len: *max_text_length
|
| 55 |
-
|
| 56 |
-
Loss:
|
| 57 |
-
name: ARLoss
|
| 58 |
-
|
| 59 |
-
PostProcess:
|
| 60 |
-
name: ARLabelDecode
|
| 61 |
-
character_dict_path: *character_dict_path
|
| 62 |
-
use_space_char: *use_space_char
|
| 63 |
-
|
| 64 |
-
Metric:
|
| 65 |
-
name: RecMetric
|
| 66 |
-
main_indicator: acc
|
| 67 |
-
is_filter: True
|
| 68 |
-
|
| 69 |
-
Train:
|
| 70 |
-
dataset:
|
| 71 |
-
name: RatioDataSet
|
| 72 |
-
ds_width: True
|
| 73 |
-
padding: &padding True
|
| 74 |
-
padding_rand: True
|
| 75 |
-
padding_doub: True
|
| 76 |
-
data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
|
| 77 |
-
'../Union14M-L-LMDB-Filtered/filter_train_hard',
|
| 78 |
-
'../Union14M-L-LMDB-Filtered/filter_train_medium',
|
| 79 |
-
'../Union14M-L-LMDB-Filtered/filter_train_normal',
|
| 80 |
-
'../Union14M-L-LMDB-Filtered/filter_train_easy',
|
| 81 |
-
]
|
| 82 |
-
transforms:
|
| 83 |
-
- DecodeImage: # load image
|
| 84 |
-
img_mode: BGR
|
| 85 |
-
channel_first: False
|
| 86 |
-
- PARSeqAug:
|
| 87 |
-
- ARLabelEncode: # Class handling label
|
| 88 |
-
character_dict_path: *character_dict_path
|
| 89 |
-
use_space_char: *use_space_char
|
| 90 |
-
max_text_length: *max_text_length
|
| 91 |
-
- KeepKeys:
|
| 92 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 93 |
-
sampler:
|
| 94 |
-
name: RatioSampler
|
| 95 |
-
scales: [[128, 32]] # w, h
|
| 96 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 97 |
-
first_bs: &bs 256
|
| 98 |
-
fix_bs: false
|
| 99 |
-
divided_factor: [4, 16] # w, h
|
| 100 |
-
is_training: True
|
| 101 |
-
loader:
|
| 102 |
-
shuffle: True
|
| 103 |
-
batch_size_per_card: *bs
|
| 104 |
-
drop_last: True
|
| 105 |
-
max_ratio: &max_ratio 12
|
| 106 |
-
num_workers: 4
|
| 107 |
-
|
| 108 |
-
Eval:
|
| 109 |
-
dataset:
|
| 110 |
-
name: RatioDataSet
|
| 111 |
-
ds_width: True
|
| 112 |
-
padding: False
|
| 113 |
-
padding_rand: False
|
| 114 |
-
data_dir_list: [
|
| 115 |
-
'../evaluation/CUTE80',
|
| 116 |
-
'../evaluation/IC13_857',
|
| 117 |
-
'../evaluation/IC15_1811',
|
| 118 |
-
'../evaluation/IIIT5k',
|
| 119 |
-
'../evaluation/SVT',
|
| 120 |
-
'../evaluation/SVTP',
|
| 121 |
-
]
|
| 122 |
-
transforms:
|
| 123 |
-
- DecodeImage: # load image
|
| 124 |
-
img_mode: BGR
|
| 125 |
-
channel_first: False
|
| 126 |
-
- ARLabelEncode: # Class handling label
|
| 127 |
-
character_dict_path: *character_dict_path
|
| 128 |
-
use_space_char: *use_space_char
|
| 129 |
-
max_text_length: *max_text_length
|
| 130 |
-
- KeepKeys:
|
| 131 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 132 |
-
sampler:
|
| 133 |
-
name: RatioSampler
|
| 134 |
-
scales: [[128, 32]] # w, h
|
| 135 |
-
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
|
| 136 |
-
first_bs: 128
|
| 137 |
-
fix_bs: false
|
| 138 |
-
divided_factor: [4, 16] # w, h
|
| 139 |
-
is_training: False
|
| 140 |
-
loader:
|
| 141 |
-
shuffle: False
|
| 142 |
-
drop_last: False
|
| 143 |
-
max_ratio: *max_ratio
|
| 144 |
-
batch_size_per_card: 128
|
| 145 |
-
num_workers: 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/nrtr/nrtr.yml
DELETED
|
@@ -1,107 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/nrtr/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_nrtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
|
| 24 |
-
Optimizer:
|
| 25 |
-
name: AdamW
|
| 26 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 27 |
-
weight_decay: 0.05
|
| 28 |
-
filter_bias_and_bn: True
|
| 29 |
-
|
| 30 |
-
LRScheduler:
|
| 31 |
-
name: OneCycleLR
|
| 32 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 33 |
-
cycle_momentum: False
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: BGPD
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: NRTREncoder
|
| 42 |
-
Decoder:
|
| 43 |
-
name: NRTRDecoder
|
| 44 |
-
num_encoder_layers: 6
|
| 45 |
-
beam_size: 0
|
| 46 |
-
num_decoder_layers: 6
|
| 47 |
-
nhead: 8
|
| 48 |
-
max_len: *max_text_length
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
Loss:
|
| 52 |
-
name: ARLoss
|
| 53 |
-
|
| 54 |
-
PostProcess:
|
| 55 |
-
name: ARLabelDecode
|
| 56 |
-
character_dict_path: *character_dict_path
|
| 57 |
-
use_space_char: *use_space_char
|
| 58 |
-
|
| 59 |
-
Metric:
|
| 60 |
-
name: RecMetric
|
| 61 |
-
main_indicator: acc
|
| 62 |
-
is_filter: True
|
| 63 |
-
|
| 64 |
-
Train:
|
| 65 |
-
dataset:
|
| 66 |
-
name: LMDBDataSet
|
| 67 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 68 |
-
transforms:
|
| 69 |
-
- DecodeImagePIL: # load image
|
| 70 |
-
img_mode: RGB
|
| 71 |
-
- PARSeqAugPIL:
|
| 72 |
-
- ARLabelEncode: # Class handling label
|
| 73 |
-
character_dict_path: *character_dict_path
|
| 74 |
-
use_space_char: *use_space_char
|
| 75 |
-
max_text_length: *max_text_length
|
| 76 |
-
- RecTVResize:
|
| 77 |
-
image_shape: [32, 128]
|
| 78 |
-
padding: False
|
| 79 |
-
- KeepKeys:
|
| 80 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 81 |
-
loader:
|
| 82 |
-
shuffle: True
|
| 83 |
-
batch_size_per_card: 256
|
| 84 |
-
drop_last: True
|
| 85 |
-
num_workers: 4
|
| 86 |
-
|
| 87 |
-
Eval:
|
| 88 |
-
dataset:
|
| 89 |
-
name: LMDBDataSet
|
| 90 |
-
data_dir: ../evaluation/
|
| 91 |
-
transforms:
|
| 92 |
-
- DecodeImagePIL: # load image
|
| 93 |
-
img_mode: RGB
|
| 94 |
-
- ARLabelEncode: # Class handling label
|
| 95 |
-
character_dict_path: *character_dict_path
|
| 96 |
-
use_space_char: *use_space_char
|
| 97 |
-
max_text_length: *max_text_length
|
| 98 |
-
- RecTVResize:
|
| 99 |
-
image_shape: [32, 128]
|
| 100 |
-
padding: False
|
| 101 |
-
- KeepKeys:
|
| 102 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 103 |
-
loader:
|
| 104 |
-
shuffle: False
|
| 105 |
-
drop_last: False
|
| 106 |
-
batch_size_per_card: 256
|
| 107 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/rec/nrtr/svtr_base_nrtr.yml
DELETED
|
@@ -1,118 +0,0 @@
|
|
| 1 |
-
Global:
|
| 2 |
-
device: gpu
|
| 3 |
-
epoch_num: 20
|
| 4 |
-
log_smooth_window: 20
|
| 5 |
-
print_batch_step: 10
|
| 6 |
-
output_dir: ./output/rec/u14m_filter/svtr_base_nrtr/
|
| 7 |
-
save_epoch_step: 1
|
| 8 |
-
# evaluation is run every 2000 iterations
|
| 9 |
-
eval_batch_step: [0, 500]
|
| 10 |
-
eval_epoch_step: [0, 1]
|
| 11 |
-
cal_metric_during_train: True
|
| 12 |
-
pretrained_model:
|
| 13 |
-
checkpoints:
|
| 14 |
-
use_tensorboard: false
|
| 15 |
-
infer_img:
|
| 16 |
-
# for data or label process
|
| 17 |
-
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
|
| 18 |
-
# ./tools/utils/ppocr_keys_v1.txt # ch
|
| 19 |
-
max_text_length: &max_text_length 25
|
| 20 |
-
use_space_char: &use_space_char False
|
| 21 |
-
save_res_path: ./output/rec/u14m_filter/predicts_svtr_base_nrtr.txt
|
| 22 |
-
use_amp: True
|
| 23 |
-
|
| 24 |
-
Optimizer:
|
| 25 |
-
name: AdamW
|
| 26 |
-
lr: 0.00065 # for 4gpus bs256/gpu
|
| 27 |
-
weight_decay: 0.05
|
| 28 |
-
filter_bias_and_bn: True
|
| 29 |
-
|
| 30 |
-
LRScheduler:
|
| 31 |
-
name: OneCycleLR
|
| 32 |
-
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
|
| 33 |
-
cycle_momentum: False
|
| 34 |
-
|
| 35 |
-
Architecture:
|
| 36 |
-
model_type: rec
|
| 37 |
-
algorithm: NRTR
|
| 38 |
-
in_channels: 3
|
| 39 |
-
Transform:
|
| 40 |
-
Encoder:
|
| 41 |
-
name: SVTRNet
|
| 42 |
-
img_size: [32, 128]
|
| 43 |
-
out_char_num: 25
|
| 44 |
-
out_channels: 256
|
| 45 |
-
patch_merging: 'Conv'
|
| 46 |
-
embed_dim: [128, 256, 384]
|
| 47 |
-
depth: [6, 6, 6]
|
| 48 |
-
num_heads: [4, 8, 12]
|
| 49 |
-
mixer: ['Conv','Conv','Conv','Conv','Conv','Conv', 'Conv','Conv', 'Global','Global','Global','Global','Global','Global','Global','Global','Global','Global']
|
| 50 |
-
local_mixer: [[5, 5], [5, 5], [5, 5]]
|
| 51 |
-
last_stage: False
|
| 52 |
-
prenorm: True
|
| 53 |
-
Decoder:
|
| 54 |
-
name: NRTRDecoder
|
| 55 |
-
num_encoder_layers: -1
|
| 56 |
-
beam_size: 0
|
| 57 |
-
num_decoder_layers: 2
|
| 58 |
-
nhead: 12
|
| 59 |
-
max_len: *max_text_length
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
Loss:
|
| 63 |
-
name: ARLoss
|
| 64 |
-
|
| 65 |
-
PostProcess:
|
| 66 |
-
name: ARLabelDecode
|
| 67 |
-
character_dict_path: *character_dict_path
|
| 68 |
-
use_space_char: *use_space_char
|
| 69 |
-
|
| 70 |
-
Metric:
|
| 71 |
-
name: RecMetric
|
| 72 |
-
main_indicator: acc
|
| 73 |
-
is_filter: True
|
| 74 |
-
|
| 75 |
-
Train:
|
| 76 |
-
dataset:
|
| 77 |
-
name: LMDBDataSet
|
| 78 |
-
data_dir: ../Union14M-L-LMDB-Filtered
|
| 79 |
-
transforms:
|
| 80 |
-
- DecodeImagePIL: # load image
|
| 81 |
-
img_mode: RGB
|
| 82 |
-
- PARSeqAugPIL:
|
| 83 |
-
- ARLabelEncode: # Class handling label
|
| 84 |
-
character_dict_path: *character_dict_path
|
| 85 |
-
use_space_char: *use_space_char
|
| 86 |
-
max_text_length: *max_text_length
|
| 87 |
-
- RecTVResize:
|
| 88 |
-
image_shape: [32, 128]
|
| 89 |
-
padding: False
|
| 90 |
-
- KeepKeys:
|
| 91 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 92 |
-
loader:
|
| 93 |
-
shuffle: True
|
| 94 |
-
batch_size_per_card: 256
|
| 95 |
-
drop_last: True
|
| 96 |
-
num_workers: 4
|
| 97 |
-
|
| 98 |
-
Eval:
|
| 99 |
-
dataset:
|
| 100 |
-
name: LMDBDataSet
|
| 101 |
-
data_dir: ../evaluation/
|
| 102 |
-
transforms:
|
| 103 |
-
- DecodeImagePIL: # load image
|
| 104 |
-
img_mode: RGB
|
| 105 |
-
- ARLabelEncode: # Class handling label
|
| 106 |
-
character_dict_path: *character_dict_path
|
| 107 |
-
use_space_char: *use_space_char
|
| 108 |
-
max_text_length: *max_text_length
|
| 109 |
-
- RecTVResize:
|
| 110 |
-
image_shape: [32, 128]
|
| 111 |
-
padding: False
|
| 112 |
-
- KeepKeys:
|
| 113 |
-
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
|
| 114 |
-
loader:
|
| 115 |
-
shuffle: False
|
| 116 |
-
drop_last: False
|
| 117 |
-
batch_size_per_card: 256
|
| 118 |
-
num_workers: 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|