# This config contains the default values for training Aligner model on LJSpeech dataset. # If you want to train model on other dataset, you can change config values according to your dataset. # Most dataset-specific arguments are in the head of the config file, see below. name: Aligner train_dataset: ??? validation_datasets: ??? sup_data_path: ??? sup_data_types: [ "align_prior_matrix" ] # Default values for dataset with sample_rate=22050 sample_rate: 22050 n_mel_channels: 80 n_window_size: 1024 n_window_stride: 256 n_fft: 1024 lowfreq: 0 highfreq: 8000 window: hann phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10" heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722" model: symbols_embedding_dim: 384 bin_loss_start_ratio: 0.2 bin_loss_warmup_epochs: 100 sample_rate: ${sample_rate} n_mel_channels: ${n_mel_channels} n_window_size: ${n_window_size} n_window_stride: ${n_window_stride} n_fft: ${n_fft} lowfreq: ${lowfreq} highfreq: ${highfreq} window: ${window} text_normalizer: _target_: nemo_text_processing.text_normalization.normalize.Normalizer lang: en input_case: cased text_normalizer_call_kwargs: verbose: false punct_pre_process: true punct_post_process: true text_tokenizer: _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.EnglishPhonemesTokenizer punct: true stresses: true chars: true apostrophe: true pad_with_space: true g2p: _target_: nemo.collections.tts.g2p.modules.EnglishG2p phoneme_dict: ${phoneme_dict_path} heteronyms: ${heteronyms_path} train_ds: dataset: _target_: nemo.collections.tts.data.tts_dataset.TTSDataset manifest_filepath: ${train_dataset} sample_rate: ${model.sample_rate} sup_data_path: ${sup_data_path} sup_data_types: ${sup_data_types} n_fft: ${model.n_fft} win_length: ${model.n_window_size} hop_length: ${model.n_window_stride} window: ${model.window} n_mels: ${model.n_mel_channels} lowfreq: ${model.lowfreq} highfreq: ${model.highfreq} max_duration: null min_duration: 0.1 ignore_file: null trim: false dataloader_params: drop_last: false shuffle: true batch_size: 64 num_workers: 4 pin_memory: true validation_ds: dataset: _target_: nemo.collections.tts.data.tts_dataset.TTSDataset manifest_filepath: ${validation_datasets} sample_rate: ${model.sample_rate} sup_data_path: ${sup_data_path} sup_data_types: ${sup_data_types} n_fft: ${model.n_fft} win_length: ${model.n_window_size} hop_length: ${model.n_window_stride} window: ${model.window} n_mels: ${model.n_mel_channels} lowfreq: ${model.lowfreq} highfreq: ${model.highfreq} max_duration: null min_duration: 0.1 ignore_file: null trim: false dataloader_params: drop_last: false shuffle: false batch_size: 64 num_workers: 1 pin_memory: true preprocessor: _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor features: ${model.n_mel_channels} lowfreq: ${model.lowfreq} highfreq: ${model.highfreq} n_fft: ${model.n_fft} n_window_size: ${model.n_window_size} window_size: false n_window_stride: ${model.n_window_stride} window_stride: false pad_to: 1 pad_value: -11.52 sample_rate: ${model.sample_rate} window: ${model.window} normalize: null preemph: null dither: 0.0 frame_splicing: 1 log: true log_zero_guard_type: clamp log_zero_guard_value: 1e-05 mag_power: 1.0 alignment_encoder: _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder n_mel_channels: ${model.n_mel_channels} n_text_channels: ${model.symbols_embedding_dim} n_att_channels: ${model.n_mel_channels} optim: name: adam lr: 1e-3 weight_decay: 1e-6 sched: name: CosineAnnealing min_lr: 5e-5 warmup_ratio: 0.35 trainer: devices: 1 num_nodes: 1 accelerator: gpu strategy: ddp precision: 32 max_epochs: 1000 accumulate_grad_batches: 1 gradient_clip_val: 1000.0 enable_checkpointing: false # Provided by exp_manager logger: false # Provided by exp_manager log_every_n_steps: 100 check_val_every_n_epoch: 1 benchmark: false exp_manager: exp_dir: null name: ${name} create_tensorboard_logger: true create_checkpoint_callback: true checkpoint_callback_params: monitor: val_forward_sum_loss mode: min create_wandb_logger: false wandb_logger_kwargs: name: null project: null entity: null resume_if_exists: false resume_ignore_no_checkpoint: false