Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /configs /vqa /ser /layoutlmv2.yml

datdo2717

rotate

c5b5437 about 2 years ago

raw

history blame contribute delete

3.15 kB

	Global:
	use_gpu: True
	epoch_num: &epoch_num 200
	log_smooth_window: 10
	print_batch_step: 10
	save_model_dir: ./output/ser_layoutlmv2/
	save_epoch_step: 2000
	# evaluation is run every 10 iterations after the 0th iteration
	eval_batch_step: [ 0, 19 ]
	cal_metric_during_train: False
	save_inference_dir:
	use_visualdl: False
	seed: 2022
	infer_img: doc/vqa/input/zh_val_0.jpg
	save_res_path: ./output/ser/

	Architecture:
	model_type: vqa
	algorithm: &algorithm "LayoutLMv2"
	Transform:
	Backbone:
	name: LayoutLMv2ForSer
	pretrained: True
	checkpoints:
	num_classes: &num_classes 7

	Loss:
	name: VQASerTokenLayoutLMLoss
	num_classes: *num_classes

	Optimizer:
	name: AdamW
	beta1: 0.9
	beta2: 0.999
	lr:
	name: Linear
	learning_rate: 0.00005
	epochs: *epoch_num
	warmup_epoch: 2
	regularizer:

	name: L2
	factor: 0.00000

	PostProcess:
	name: VQASerTokenLayoutLMPostProcess
	class_path: &class_path ppstructure/vqa/labels/labels_ser.txt

	Metric:
	name: VQASerTokenMetric
	main_indicator: hmean

	Train:
	dataset:
	name: SimpleDataSet
	data_dir: train_data/XFUND/zh_train/image
	label_file_list:
	- train_data/XFUND/zh_train/xfun_normalize_train.json
	transforms:
	- DecodeImage: # load image
	img_mode: RGB
	channel_first: False
	- VQATokenLabelEncode: # Class handling label
	contains_re: False
	algorithm: *algorithm
	class_path: *class_path
	- VQATokenPad:
	max_seq_len: &max_seq_len 512
	return_attention_mask: True
	- VQASerTokenChunk:
	max_seq_len: *max_seq_len
	- Resize:
	size: [224,224]
	- NormalizeImage:
	scale: 1
	mean: [ 123.675, 116.28, 103.53 ]
	std: [ 58.395, 57.12, 57.375 ]
	order: 'hwc'
	- ToCHWImage:
	- KeepKeys:
	keep_keys: [ 'input_ids','labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
	loader:
	shuffle: True
	drop_last: False
	batch_size_per_card: 8
	num_workers: 4

	Eval:
	dataset:
	name: SimpleDataSet
	data_dir: train_data/XFUND/zh_val/image
	label_file_list:
	- train_data/XFUND/zh_val/xfun_normalize_val.json
	transforms:
	- DecodeImage: # load image
	img_mode: RGB
	channel_first: False
	- VQATokenLabelEncode: # Class handling label
	contains_re: False
	algorithm: *algorithm
	class_path: *class_path
	- VQATokenPad:
	max_seq_len: *max_seq_len
	return_attention_mask: True
	- VQASerTokenChunk:
	max_seq_len: *max_seq_len
	- Resize:
	size: [224,224]
	- NormalizeImage:
	scale: 1
	mean: [ 123.675, 116.28, 103.53 ]
	std: [ 58.395, 57.12, 57.375 ]
	order: 'hwc'
	- ToCHWImage:
	- KeepKeys:
	keep_keys: [ 'input_ids', 'labels', 'bbox', 'image', 'attention_mask', 'token_type_ids'] # dataloader will return list in this order
	loader:
	shuffle: False
	drop_last: False
	batch_size_per_card: 8
	num_workers: 4