| config: conf/train.yaml |
| print_config: false |
| log_level: INFO |
| drop_last_iter: false |
| dry_run: false |
| iterator_type: sequence |
| valid_iterator_type: null |
| output_dir: exp/tts_kinyarwanda_transfer_v1 |
| ngpu: 1 |
| seed: 0 |
| num_workers: 4 |
| num_att_plot: 3 |
| dist_backend: nccl |
| dist_init_method: env:// |
| dist_world_size: null |
| dist_rank: null |
| local_rank: null |
| dist_master_addr: null |
| dist_master_port: null |
| dist_launcher: null |
| multiprocessing_distributed: false |
| unused_parameters: false |
| sharded_ddp: false |
| use_deepspeed: false |
| deepspeed_config: null |
| gradient_as_bucket_view: true |
| ddp_comm_hook: null |
| cudnn_enabled: true |
| cudnn_benchmark: false |
| cudnn_deterministic: true |
| use_tf32: false |
| collect_stats: false |
| write_collected_feats: false |
| max_epoch: 100 |
| patience: null |
| val_scheduler_criterion: |
| - valid |
| - loss |
| early_stopping_criterion: |
| - valid |
| - loss |
| - min |
| best_model_criterion: |
| - - valid |
| - loss |
| - min |
| - - train |
| - loss |
| - min |
| keep_nbest_models: 5 |
| nbest_averaging_interval: 0 |
| grad_clip: 1.0 |
| grad_clip_type: 2.0 |
| grad_noise: false |
| accum_grad: 1 |
| no_forward_run: false |
| resume: false |
| train_dtype: float32 |
| use_amp: false |
| log_interval: null |
| use_matplotlib: true |
| use_tensorboard: true |
| create_graph_in_tensorboard: false |
| use_wandb: false |
| wandb_project: null |
| wandb_id: null |
| wandb_entity: null |
| wandb_name: null |
| wandb_model_log_interval: -1 |
| detect_anomaly: false |
| use_adapter: false |
| adapter: lora |
| save_strategy: all |
| adapter_conf: {} |
| pretrain_path: null |
| init_param: |
| - pretrained/ljspeech_pretrained.pth:tts:tts |
| ignore_init_mismatch: true |
| freeze_param: [] |
| num_iters_per_epoch: null |
| batch_size: 20 |
| valid_batch_size: null |
| batch_bins: 15000000 |
| valid_batch_bins: null |
| category_sample_size: 10 |
| upsampling_factor: 0.5 |
| category_upsampling_factor: 0.5 |
| dataset_upsampling_factor: 0.5 |
| dataset_scaling_factor: 1.2 |
| max_batch_size: null |
| min_batch_size: 1 |
| train_shape_file: |
| - exp/tts_stats_raw_char/train/text_shape.char |
| - exp/tts_stats_raw_char/train/speech_shape |
| valid_shape_file: |
| - exp/tts_stats_raw_char/valid/text_shape.char |
| - exp/tts_stats_raw_char/valid/speech_shape |
| batch_type: numel |
| valid_batch_type: null |
| fold_length: [] |
| sort_in_batch: descending |
| shuffle_within_batch: false |
| sort_batch: descending |
| multiple_iterator: false |
| chunk_length: 500 |
| chunk_shift_ratio: 0.5 |
| num_cache_chunks: 1024 |
| chunk_excluded_key_prefixes: [] |
| chunk_default_fs: null |
| chunk_max_abs_length: null |
| chunk_discard_short_samples: true |
| train_data_path_and_name_and_type: |
| - - dump/raw/train/text |
| - text |
| - text |
| - - dump/raw/train/wav.scp |
| - speech |
| - sound |
| valid_data_path_and_name_and_type: |
| - - dump/raw/dev/text |
| - text |
| - text |
| - - dump/raw/dev/wav.scp |
| - speech |
| - sound |
| multi_task_dataset: false |
| allow_variable_data_keys: false |
| max_cache_size: 0.0 |
| max_cache_fd: 32 |
| allow_multi_rates: false |
| valid_max_cache_size: null |
| exclude_weight_decay: false |
| exclude_weight_decay_conf: {} |
| optim: adam |
| optim_conf: |
| lr: 0.001 |
| eps: 1.0e-06 |
| weight_decay: 0.0 |
| scheduler: null |
| scheduler_conf: {} |
| token_list: |
| - <blank> |
| - <unk> |
| - <space> |
| - '''' |
| - a |
| - b |
| - c |
| - d |
| - e |
| - f |
| - g |
| - h |
| - i |
| - j |
| - k |
| - l |
| - m |
| - n |
| - o |
| - p |
| - q |
| - r |
| - s |
| - t |
| - u |
| - v |
| - w |
| - y |
| - z |
| - <sos/eos> |
| odim: null |
| model_conf: {} |
| use_preprocessor: true |
| token_type: char |
| bpemodel: null |
| non_linguistic_symbols: null |
| cleaner: null |
| g2p: null |
| feats_extract: fbank |
| feats_extract_conf: {} |
| normalize: global_mvn |
| normalize_conf: |
| stats_file: exp/tts_stats_raw_char/train/feats_stats.npz |
| tts: tacotron2 |
| tts_conf: |
| embed_dim: 512 |
| elayers: 1 |
| eunits: 512 |
| econv_layers: 3 |
| econv_chans: 512 |
| econv_filts: 5 |
| atype: location |
| adim: 512 |
| aconv_chans: 32 |
| aconv_filts: 15 |
| cumulate_att_w: true |
| dlayers: 2 |
| dunits: 1024 |
| prenet_layers: 2 |
| prenet_units: 256 |
| postnet_layers: 5 |
| postnet_chans: 512 |
| postnet_filts: 5 |
| output_activation: null |
| use_batch_norm: true |
| use_concate: true |
| use_residual: false |
| dropout_rate: 0.5 |
| zoneout_rate: 0.1 |
| reduction_factor: 1 |
| spk_embed_dim: null |
| use_masking: true |
| bce_pos_weight: 5.0 |
| use_guided_attn_loss: true |
| guided_attn_loss_sigma: 0.4 |
| guided_attn_loss_lambda: 1.0 |
| pitch_extract: null |
| pitch_extract_conf: {} |
| pitch_normalize: null |
| pitch_normalize_conf: {} |
| energy_extract: null |
| energy_extract_conf: {} |
| energy_normalize: null |
| energy_normalize_conf: {} |
| required: |
| - output_dir |
| - token_list |
| version: '202511' |
| distributed: false |
|
|