NeMo / examples /nlp /duplex_text_normalization /conf /duplex_tn_config.yaml

thanks to NVIDIA ❤

7934b29 almost 3 years ago

5.99 kB

	name: &name DuplexTextNormalization
	mode: joint # Three possible choices ['tn', 'itn', 'joint']
	lang: ??? # Supported languages are ['en', 'ru', 'de', 'multilingual']

	# Pretrained Nemo Models
	tagger_pretrained_model: null
	decoder_pretrained_model: null

	# Tagger
	tagger_trainer:
	devices: 1 # the number of gpus, 0 for CPU
	num_nodes: 1
	max_epochs: 5 # the number of training epochs (for ru or de or multilingual, try 10)
	enable_checkpointing: False # provided by exp_manager
	logger: false # provided by exp_manager
	accumulate_grad_batches: 1 # accumulates grads every k batches
	gradient_clip_val: 0.0
	precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
	accelerator: gpu
	strategy: ddp

	tagger_model:
	do_training: true
	transformer: albert-base-v2 # For ru, try cointegrated/rubert-tiny \| For de, try bert-base-german-cased \| For multilingual, try bert-base-multilingual-cased
	tokenizer: ${tagger_model.transformer}
	max_sequence_len: 128
	nemo_path: ${tagger_exp_manager.exp_dir}/tagger_model.nemo # exported .nemo path
	lang: ${lang}
	mode: ${mode}

	optim:
	name: adamw
	lr: 5e-5
	weight_decay: 0.01

	sched:
	name: WarmupAnnealing

	# pytorch lightning args
	monitor: val_token_precision
	reduce_on_plateau: false

	# scheduler config override
	warmup_steps: null
	warmup_ratio: 0.1
	last_epoch: -1

	tagger_exp_manager:
	exp_dir: nemo_experiments # where to store logs and checkpoints
	name: tagger_training # name of experiment
	create_tensorboard_logger: True
	create_checkpoint_callback: True
	checkpoint_callback_params:
	save_top_k: 3
	monitor: "val_token_precision"
	mode: "max"
	save_best_model: true
	always_save_nemo: true

	# Decoder
	decoder_trainer:
	devices: 1 # the number of gpus, 0 for CPU
	num_nodes: 1
	max_epochs: 3 # the number of training epochs
	enable_checkpointing: False # provided by exp_manager
	logger: false # provided by exp_manager
	accumulate_grad_batches: 1 # accumulates grads every k batches
	gradient_clip_val: 0.0
	precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
	accelerator: gpu
	strategy: ddp
	log_every_n_steps: 1 # Interval of logging.
	val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
	resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.

	decoder_model:
	do_training: true
	transformer: t5-small # For ru, try cointegrated/rut5-base \| For de or multilingual, try google/mt5-base
	max_sequence_len: 80
	tokenizer: ${decoder_model.transformer}
	nemo_path: ${decoder_exp_manager.exp_dir}/decoder_model.nemo # exported .nemo path
	lang: ${lang}
	mode: ${mode}

	# Options related to covering grammars for TN
	use_cg: false # Use covering grammars to avoid catastrophic errors
	neural_confidence_threshold: 0.99 # If the neural model is not confident, then use the covering grammars
	n_tagged: 1 # number of tagged options to consider, -1 - to get all possible tagged options

	optim:
	name: adamw
	lr: 2e-4
	weight_decay: 0.01

	sched:
	name: WarmupAnnealing

	# pytorch lightning args
	monitor: val_loss
	reduce_on_plateau: false

	# scheduler config override
	warmup_steps: null
	warmup_ratio: 0.0
	last_epoch: -1

	decoder_exp_manager:
	exp_dir: nemo_experiments # where to store logs and checkpoints
	name: decoder_training # name of experiment
	create_tensorboard_logger: True
	create_checkpoint_callback: True
	checkpoint_callback_params:
	save_top_k: 3
	monitor: "val_loss"
	mode: "min"
	save_best_model: True

	# Data
	data:
	train_ds:
	data_path: train.tsv # provide the full path to the file. Ignored when using tarred dataset, tar_metadata_file is used instead.
	batch_size: 64 # local training batch size for each worker. Ignored when using tarred dataset, the batch size of the tarred dataset is used instead.
	shuffle: true
	max_insts: -1 # Maximum number of instances (-1 means no limit)
	# Refer to the text_normalization doc for more information about data augmentation
	tagger_data_augmentation: false
	decoder_data_augmentation: true
	use_cache: false # uses a cache to store the processed dataset, you may use it for large datasets for speed up (especially when using multi GPUs)
	num_workers: 3
	pin_memory: false
	drop_last: false
	use_tarred_dataset: False # if true tar_metadata_file will be used
	tar_metadata_file: null # metadata for tarred dataset. A JSON file containing the list of tar_files in "text_tar_filepaths" field
	tar_shuffle_n: 100 # How many samples to look ahead and load to be shuffled

	validation_ds:
	data_path: dev.tsv # provide the full path to the file. Provide multiple paths to run evaluation on multiple datasets
	batch_size: 64
	shuffle: false
	max_insts: -1 # Maximum number of instances (-1 means no limit)
	use_cache: false # uses a cache to store the processed dataset, you may use it for large datasets for speed up (especially when using multi GPUs)
	num_workers: 3
	pin_memory: false
	drop_last: false

	test_ds:
	data_path: test.tsv # provide the full path to the file
	batch_size: 64
	shuffle: false
	use_cache: false # uses a cache to store the processed dataset, you may use it for large datasets for speed up (especially when using multi GPUs)
	num_workers: 3
	pin_memory: false
	drop_last: false
	errors_log_fp: errors.txt # Path to the file for logging the errors

	# Inference
	inference:
	interactive: false # Set to true if you want to enable the interactive mode when running duplex_text_normalization_test.py
	from_file: null # Path to the raw text, no labels required. Each sentence on a separate line
	batch_size: 16 # batch size for inference.from_file