Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /configs /rec /rec_svtrnet.yml

datdo2717

rotate

c5b5437 about 2 years ago

raw

history blame contribute delete

2.84 kB

	Global:
	use_gpu: True
	epoch_num: 20
	log_smooth_window: 20
	print_batch_step: 10
	save_model_dir: ./output/rec/svtr/
	save_epoch_step: 1
	# evaluation is run every 2000 iterations after the 0th iteration
	eval_batch_step: [0, 2000]
	cal_metric_during_train: True
	pretrained_model:
	checkpoints:
	save_inference_dir:
	use_visualdl: False
	infer_img: doc/imgs_words_en/word_10.png
	# for data or label process
	character_dict_path:
	character_type: en
	max_text_length: 25
	infer_mode: False
	use_space_char: False
	save_res_path: ./output/rec/predicts_svtr_tiny.txt
	d2s_train_image_shape: [3, 64, 256]


	Optimizer:
	name: AdamW
	beta1: 0.9
	beta2: 0.99
	epsilon: 1.e-8
	weight_decay: 0.05
	no_weight_decay_name: norm pos_embed
	one_dim_param_no_weight_decay: True
	lr:
	name: Cosine
	learning_rate: 0.0005
	warmup_epoch: 2

	Architecture:
	model_type: rec
	algorithm: SVTR
	Transform:
	name: STN_ON
	tps_inputsize: [32, 64]
	tps_outputsize: [32, 100]
	num_control_points: 20
	tps_margins: [0.05,0.05]
	stn_activation: none
	Backbone:
	name: SVTRNet
	img_size: [32, 100]
	out_char_num: 25 # W//4 or W//8 or W/12
	out_channels: 192
	patch_merging: 'Conv'
	embed_dim: [64, 128, 256]
	depth: [3, 6, 3]
	num_heads: [2, 4, 8]
	mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global']
	local_mixer: [[7, 11], [7, 11], [7, 11]]
	last_stage: True
	prenorm: False
	Neck:
	name: SequenceEncoder
	encoder_type: reshape
	Head:
	name: CTCHead

	Loss:
	name: CTCLoss

	PostProcess:
	name: CTCLabelDecode

	Metric:
	name: RecMetric
	main_indicator: acc

	Train:
	dataset:
	name: LMDBDataSet
	data_dir: ./train_data/data_lmdb_release/training/
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	- SVTRRecAug:
	aug_type: 0 # or 1
	- CTCLabelEncode: # Class handling label
	- SVTRRecResizeImg:
	image_shape: [3, 64, 256]
	padding: False
	- KeepKeys:
	keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
	loader:
	shuffle: True
	batch_size_per_card: 512
	drop_last: True
	num_workers: 8

	Eval:
	dataset:
	name: LMDBDataSet
	data_dir: ./train_data/data_lmdb_release/evaluation/
	transforms:
	- DecodeImage: # load image
	img_mode: BGR
	channel_first: False
	- CTCLabelEncode: # Class handling label
	- SVTRRecResizeImg:
	image_shape: [3, 64, 256]
	padding: False
	- KeepKeys:
	keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: 256
	num_workers: 2