Spaces:

subhankarg
/

MagpieTTS_Internal_Demo

Runtime error

App Files Files Community

MagpieTTS_Internal_Demo / examples /tts /conf /aligner.yaml

subhankarg

Upload folder using huggingface_hub

0558aa4 verified 5 months ago

raw

history blame contribute delete

4.79 kB

	# This config contains the default values for training Aligner model on LJSpeech dataset.
	# If you want to train model on other dataset, you can change config values according to your dataset.
	# Most dataset-specific arguments are in the head of the config file, see below.

	name: Aligner

	train_dataset: ???
	validation_datasets: ???
	sup_data_path: ???
	sup_data_types: [ "align_prior_matrix" ]

	# Default values for dataset with sample_rate=22050
	sample_rate: 22050
	n_mel_channels: 80
	n_window_size: 1024
	n_window_stride: 256
	n_fft: 1024
	lowfreq: 0
	highfreq: 8000
	window: hann

	phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10"
	heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"

	model:
	symbols_embedding_dim: 384
	bin_loss_start_ratio: 0.2
	bin_loss_warmup_epochs: 100

	sample_rate: ${sample_rate}
	n_mel_channels: ${n_mel_channels}
	n_window_size: ${n_window_size}
	n_window_stride: ${n_window_stride}
	n_fft: ${n_fft}
	lowfreq: ${lowfreq}
	highfreq: ${highfreq}
	window: ${window}

	text_normalizer:
	_target_: nemo_text_processing.text_normalization.normalize.Normalizer
	lang: en
	input_case: cased

	text_normalizer_call_kwargs:
	verbose: false
	punct_pre_process: true
	punct_post_process: true

	text_tokenizer:
	_target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer
	punct: true
	stresses: true
	chars: true
	apostrophe: true
	pad_with_space: true
	g2p:
	_target_: nemo.collections.tts.g2p.models.en_us_arpabet.EnglishG2p
	phoneme_dict: ${phoneme_dict_path}
	heteronyms: ${heteronyms_path}

	train_ds:
	dataset:
	_target_: nemo.collections.tts.data.dataset.TTSDataset
	manifest_filepath: ${train_dataset}
	sample_rate: ${model.sample_rate}
	sup_data_path: ${sup_data_path}
	sup_data_types: ${sup_data_types}
	n_fft: ${model.n_fft}
	win_length: ${model.n_window_size}
	hop_length: ${model.n_window_stride}
	window: ${model.window}
	n_mels: ${model.n_mel_channels}
	lowfreq: ${model.lowfreq}
	highfreq: ${model.highfreq}
	max_duration: null
	min_duration: 0.1
	ignore_file: null
	trim: false

	dataloader_params:
	drop_last: false
	shuffle: true
	batch_size: 64
	num_workers: 4
	pin_memory: true

	validation_ds:
	dataset:
	_target_: nemo.collections.tts.data.dataset.TTSDataset
	manifest_filepath: ${validation_datasets}
	sample_rate: ${model.sample_rate}
	sup_data_path: ${sup_data_path}
	sup_data_types: ${sup_data_types}
	n_fft: ${model.n_fft}
	win_length: ${model.n_window_size}
	hop_length: ${model.n_window_stride}
	window: ${model.window}
	n_mels: ${model.n_mel_channels}
	lowfreq: ${model.lowfreq}
	highfreq: ${model.highfreq}
	max_duration: null
	min_duration: 0.1
	ignore_file: null
	trim: false

	dataloader_params:
	drop_last: false
	shuffle: false
	batch_size: 64
	num_workers: 1
	pin_memory: true

	preprocessor:
	_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
	features: ${model.n_mel_channels}
	lowfreq: ${model.lowfreq}
	highfreq: ${model.highfreq}
	n_fft: ${model.n_fft}
	n_window_size: ${model.n_window_size}
	window_size: false
	n_window_stride: ${model.n_window_stride}
	window_stride: false
	pad_to: 1
	pad_value: -11.52
	sample_rate: ${model.sample_rate}
	window: ${model.window}
	normalize: null
	preemph: null
	dither: 0.0
	frame_splicing: 1
	log: true
	log_zero_guard_type: clamp
	log_zero_guard_value: 1e-05
	mag_power: 1.0

	alignment_encoder:
	_target_: nemo.collections.tts.modules.aligner.AlignmentEncoder
	n_mel_channels: ${model.n_mel_channels}
	n_text_channels: ${model.symbols_embedding_dim}
	n_att_channels: ${model.n_mel_channels}

	optim:
	name: adam
	lr: 1e-3
	weight_decay: 1e-6

	sched:
	name: CosineAnnealing
	min_lr: 5e-5
	warmup_ratio: 0.35

	trainer:
	devices: 1
	num_nodes: 1
	accelerator: gpu
	strategy: ddp
	precision: 32
	max_epochs: 1000
	accumulate_grad_batches: 1
	gradient_clip_val: 1000.0
	enable_checkpointing: false # Provided by exp_manager
	logger: false # Provided by exp_manager
	log_every_n_steps: 100
	check_val_every_n_epoch: 1
	benchmark: false

	exp_manager:
	exp_dir: null
	name: ${name}
	create_tensorboard_logger: true
	create_checkpoint_callback: true
	checkpoint_callback_params:
	monitor: val_forward_sum_loss
	mode: min
	create_wandb_logger: false
	wandb_logger_kwargs:
	name: null
	project: null
	entity: null
	resume_if_exists: false
	resume_ignore_no_checkpoint: false