Upload folder using huggingface_hub
Browse files- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7/config.yaml +1 -1
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8.log +14 -14
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8/config.yaml +1 -1
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml +3 -3
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091035.92b100c97f43.1159464.0 +2 -2
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091448.92b100c97f43.1179446.0 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091666.92b100c97f43.1289026.0 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091743.92b100c97f43.1324139.0 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091448.92b100c97f43.1179446.1 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091666.92b100c97f43.1289026.1 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091743.92b100c97f43.1324139.1 +3 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.1.log +1342 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.2.log +0 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.3.log +1247 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.4.log +1212 -0
- exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.log +6 -978
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.1.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.1.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.1.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.1.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.1.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.1.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.1.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.1.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,621 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,843 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,966 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,174 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f8714e3e1c0>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f8723d63280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,185 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,204 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.1.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.1.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.1
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.2.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.2.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.2.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.2.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.2.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.2.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.2.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.2.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,625 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,848 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,971 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,094 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,107 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,107 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,107 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f91b6d1e160>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,107 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,107 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f91c5f45280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,108 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,127 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.2.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.2.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.2
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.3.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.3.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.3.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.3.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.3.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.3.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.3.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.3.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,630 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,851 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,974 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,143 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,153 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,154 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,154 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f44e9149100>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,154 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,154 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f44f806f190>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,154 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,173 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.3.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.3.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.3
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.4.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.4.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.4.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.4.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.4.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.4.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.4.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.4.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,646 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,867 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,988 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,192 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fb42810e040>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fb4442b9280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,203 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,222 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.4.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.4.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.4
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.5.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.5.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.5.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.5.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.5.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.5.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.5.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.5.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,637 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,858 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,979 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,208 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f900e5431c0>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f901d469280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,219 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,238 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.5.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.5.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.5
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.6.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.6.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.6.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.6.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.6.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.6.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.6.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.6.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,622 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,844 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,966 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,158 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f3c0ac0f160>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f3c19b35280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,169 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,188 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.6.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.6.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.6
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.7.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.7.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.7.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.7.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.7.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.7.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.7.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.7.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,619 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,843 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,967 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,131 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fd21d2ef1c0>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fd22c214280>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,142 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,161 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.7.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.7.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.7
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8.log
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.8.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.8.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.8.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.8.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
-
[92b100c97f43] 2025-03-04
|
| 8 |
-
[92b100c97f43] 2025-03-04
|
| 9 |
-
[92b100c97f43] 2025-03-04
|
| 10 |
-
[92b100c97f43] 2025-03-04
|
| 11 |
-
[92b100c97f43] 2025-03-04
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
@@ -844,7 +844,7 @@ Model summary:
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
-
[92b100c97f43] 2025-03-04
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
@@ -854,8 +854,8 @@ Parameter Group 0
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
-
[92b100c97f43] 2025-03-04
|
| 858 |
-
[92b100c97f43] 2025-03-04
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
@@ -865,10 +865,10 @@ Parameter Group 0
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
-
[92b100c97f43] 2025-03-04
|
| 869 |
-
[92b100c97f43] 2025-03-04
|
| 870 |
-
[92b100c97f43] 2025-03-04
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
-
# Accounting: time=
|
| 874 |
-
# Ended (code 0) at Tue Mar 4
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.8.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.8.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 2 |
+
# Started at Tue Mar 4 22:09:35 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --collect_stats true --write_collected_feats false --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize none --pitch_normalize none --energy_normalize none --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.8.scp --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.8.scp --output_dir exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8 --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200
|
| 7 |
+
[92b100c97f43] 2025-03-04 22:09:38,624 (gan_tts:304) INFO: Vocabulary size: 41
|
| 8 |
+
[92b100c97f43] 2025-03-04 22:09:38,844 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 9 |
+
[92b100c97f43] 2025-03-04 22:09:38,966 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 10 |
+
[92b100c97f43] 2025-03-04 22:09:41,199 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 11 |
+
[92b100c97f43] 2025-03-04 22:09:41,209 (abs_task:1158) INFO: Model structure:
|
| 12 |
ESPnetGANTTSModel(
|
| 13 |
(feats_extract): LogMelFbank(
|
| 14 |
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
|
|
|
| 844 |
Number of trainable parameters: 83.28 M (100.0%)
|
| 845 |
Size: 333.11 MB
|
| 846 |
Type: torch.float32
|
| 847 |
+
[92b100c97f43] 2025-03-04 22:09:41,209 (abs_task:1161) INFO: Optimizer:
|
| 848 |
AdamW (
|
| 849 |
Parameter Group 0
|
| 850 |
amsgrad: False
|
|
|
|
| 854 |
lr: 0.0002
|
| 855 |
weight_decay: 0.0
|
| 856 |
)
|
| 857 |
+
[92b100c97f43] 2025-03-04 22:09:41,209 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fe0420a1100>
|
| 858 |
+
[92b100c97f43] 2025-03-04 22:09:41,209 (abs_task:1161) INFO: Optimizer2:
|
| 859 |
AdamW (
|
| 860 |
Parameter Group 0
|
| 861 |
amsgrad: False
|
|
|
|
| 865 |
lr: 0.0002
|
| 866 |
weight_decay: 0.0
|
| 867 |
)
|
| 868 |
+
[92b100c97f43] 2025-03-04 22:09:41,209 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7fe050fc7190>
|
| 869 |
+
[92b100c97f43] 2025-03-04 22:09:41,210 (abs_task:1171) INFO: Saving the configuration in exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8/config.yaml
|
| 870 |
+
[92b100c97f43] 2025-03-04 22:09:41,228 (abs_task:1182) INFO: Namespace(accum_grad=1, allow_variable_data_keys=False, batch_bins=6000000, batch_size=20, batch_type='numel', best_model_criterion=[['valid', 'text2mel_loss', 'min'], ['train', 'text2mel_loss', 'min'], ['train', 'total_count', 'max']], bpemodel=None, chunk_length=500, chunk_shift_ratio=0.5, cleaner='jaconv', collect_stats=True, config='conf/tuning/train_jets.yaml', cudnn_benchmark=False, cudnn_deterministic=False, cudnn_enabled=True, detect_anomaly=False, dist_backend='nccl', dist_init_method='env://', dist_launcher=None, dist_master_addr=None, dist_master_port=None, dist_rank=None, dist_world_size=None, distributed=False, dry_run=False, early_stopping_criterion=('valid', 'loss', 'min'), energy_extract='energy', energy_extract_conf={'reduction_factor': 1, 'use_token_averaged_energy': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'win_length': 1200}, energy_normalize=None, energy_normalize_conf={}, feats_extract='fbank', feats_extract_conf={'n_fft': 2048, 'hop_length': 300, 'win_length': 1200, 'fs': 24000, 'fmin': 80, 'fmax': 7600, 'n_mels': 80}, fold_length=[], freeze_param=[], g2p='pyopenjtalk', generator_first=True, grad_clip=-1, grad_clip_type=2.0, grad_noise=False, ignore_init_mismatch=False, init_param=[], iterator_type='sequence', keep_nbest_models=-1, local_rank=None, log_interval=50, log_level='INFO', max_cache_fd=32, max_cache_size=0.0, max_epoch=130, model_conf={}, multiple_iterator=False, multiprocessing_distributed=False, nbest_averaging_interval=0, ngpu=0, no_forward_run=False, non_linguistic_symbols=None, normalize=None, normalize_conf={}, num_att_plot=3, num_cache_chunks=1024, num_iters_per_epoch=1000, num_workers=32, odim=None, optim='adamw', optim2='adamw', optim2_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, optim_conf={'lr': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'weight_decay': 0.0}, output_dir='exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8', patience=None, pitch_extract='dio', pitch_extract_conf={'reduction_factor': 1, 'use_token_averaged_f0': False, 'fs': 24000, 'n_fft': 2048, 'hop_length': 300, 'f0max': 400, 'f0min': 80}, pitch_normalize=None, pitch_normalize_conf={}, pretrain_path=None, print_config=False, required=['output_dir', 'token_list'], resume=False, scheduler='exponentiallr', scheduler2='exponentiallr', scheduler2_conf={'gamma': 0.999875}, scheduler_conf={'gamma': 0.999875}, seed=777, sharded_ddp=False, sort_batch='descending', sort_in_batch='descending', token_list=['<blank>', '<unk>', 'o', 'a', 'u', 'i', 'e', 'k', 'r', 't', 'n', 'pau', 'N', 's', 'sh', 'd', 'm', 'g', 'w', 'b', 'cl', 'I', 'j', 'ch', 'y', 'U', 'h', 'p', 'ts', 'f', 'z', 'ky', 'ny', 'gy', 'ry', 'hy', 'my', 'by', 'py', 'v', '<sos/eos>'], token_type='phn', train_data_path_and_name_and_type=[('dump/raw/jvs010_tr_no_dev/text', 'text', 'text'), ('dump/raw/jvs010_tr_no_dev/wav.scp', 'speech', 'sound')], train_dtype='float32', train_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/train.8.scp'], tts='jets', tts_conf={'generator_type': 'jets_generator', 'generator_params': {'adim': 256, 'aheads': 2, 'elayers': 4, 'eunits': 1024, 'dlayers': 4, 'dunits': 1024, 'positionwise_layer_type': 'conv1d', 'positionwise_conv_kernel_size': 3, 'duration_predictor_layers': 2, 'duration_predictor_chans': 256, 'duration_predictor_kernel_size': 3, 'use_masking': True, 'encoder_normalize_before': True, 'decoder_normalize_before': True, 'encoder_type': 'transformer', 'decoder_type': 'transformer', 'conformer_rel_pos_type': 'latest', 'conformer_pos_enc_layer_type': 'rel_pos', 'conformer_self_attn_layer_type': 'rel_selfattn', 'conformer_activation_type': 'swish', 'use_macaron_style_in_conformer': True, 'use_cnn_in_conformer': True, 'conformer_enc_kernel_size': 7, 'conformer_dec_kernel_size': 31, 'init_type': 'xavier_uniform', 'transformer_enc_dropout_rate': 0.2, 'transformer_enc_positional_dropout_rate': 0.2, 'transformer_enc_attn_dropout_rate': 0.2, 'transformer_dec_dropout_rate': 0.2, 'transformer_dec_positional_dropout_rate': 0.2, 'transformer_dec_attn_dropout_rate': 0.2, 'pitch_predictor_layers': 5, 'pitch_predictor_chans': 256, 'pitch_predictor_kernel_size': 5, 'pitch_predictor_dropout': 0.5, 'pitch_embed_kernel_size': 1, 'pitch_embed_dropout': 0.0, 'stop_gradient_from_pitch_predictor': True, 'energy_predictor_layers': 2, 'energy_predictor_chans': 256, 'energy_predictor_kernel_size': 3, 'energy_predictor_dropout': 0.5, 'energy_embed_kernel_size': 1, 'energy_embed_dropout': 0.0, 'stop_gradient_from_energy_predictor': False, 'generator_out_channels': 1, 'generator_channels': 512, 'generator_global_channels': -1, 'generator_kernel_size': 7, 'generator_upsample_scales': [8, 8, 2, 2], 'generator_upsample_kernel_sizes': [16, 16, 4, 4], 'generator_resblock_kernel_sizes': [3, 7, 11], 'generator_resblock_dilations': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'generator_use_additional_convs': True, 'generator_bias': True, 'generator_nonlinear_activation': 'LeakyReLU', 'generator_nonlinear_activation_params': {'negative_slope': 0.1}, 'generator_use_weight_norm': True, 'segment_size': 64, 'idim': 41, 'odim': 80}, 'discriminator_type': 'hifigan_multi_scale_multi_period_discriminator', 'discriminator_params': {'scales': 1, 'scale_downsample_pooling': 'AvgPool1d', 'scale_downsample_pooling_params': {'kernel_size': 4, 'stride': 2, 'padding': 2}, 'scale_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [15, 41, 5, 3], 'channels': 128, 'max_downsample_channels': 1024, 'max_groups': 16, 'bias': True, 'downsample_scales': [2, 2, 4, 4, 1], 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}, 'follow_official_norm': False, 'periods': [2, 3, 5, 7, 11], 'period_discriminator_params': {'in_channels': 1, 'out_channels': 1, 'kernel_sizes': [5, 3], 'channels': 32, 'downsample_scales': [3, 3, 3, 3, 1], 'max_downsample_channels': 1024, 'bias': True, 'nonlinear_activation': 'LeakyReLU', 'nonlinear_activation_params': {'negative_slope': 0.1}, 'use_weight_norm': True, 'use_spectral_norm': False}}, 'generator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'discriminator_adv_loss_params': {'average_by_discriminators': False, 'loss_type': 'mse'}, 'feat_match_loss_params': {'average_by_discriminators': False, 'average_by_layers': False, 'include_final_outputs': True}, 'mel_loss_params': {'fs': 24000, 'n_fft': 1024, 'hop_length': 256, 'win_length': None, 'window': 'hann', 'n_mels': 80, 'fmin': 0, 'fmax': None, 'log_base': None}, 'lambda_adv': 1.0, 'lambda_mel': 45.0, 'lambda_feat_match': 2.0, 'lambda_var': 1.0, 'lambda_align': 2.0, 'sampling_rate': 24000, 'cache_generator_outputs': True}, unused_parameters=True, use_amp=False, use_matplotlib=True, use_preprocessor=True, use_tensorboard=True, use_wandb=False, val_scheduler_criterion=('valid', 'loss'), valid_batch_bins=None, valid_batch_size=None, valid_batch_type=None, valid_data_path_and_name_and_type=[('dump/raw/jvs010_dev/text', 'text', 'text'), ('dump/raw/jvs010_dev/wav.scp', 'speech', 'sound')], valid_max_cache_size=None, valid_shape_file=['exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/valid.8.scp'], version='202204', wandb_entity=None, wandb_id=None, wandb_model_log_interval=-1, wandb_name=None, wandb_project=None, write_collected_feats=False)
|
| 871 |
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 872 |
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 873 |
+
# Accounting: time=15 threads=1
|
| 874 |
+
# Ended (code 0) at Tue Mar 4 22:09:50 JST 2025, elapsed time 15 seconds
|
exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 6 |
output_dir: exp/tts_stats_raw_phn_jaconv_pyopenjtalk/logdir/stats.8
|
| 7 |
ngpu: 0
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml
CHANGED
|
@@ -6,7 +6,7 @@ iterator_type: sequence
|
|
| 6 |
output_dir: exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk
|
| 7 |
ngpu: 1
|
| 8 |
seed: 777
|
| 9 |
-
num_workers:
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
@@ -14,7 +14,7 @@ dist_world_size: 4
|
|
| 14 |
dist_rank: 0
|
| 15 |
local_rank: 0
|
| 16 |
dist_master_addr: localhost
|
| 17 |
-
dist_master_port:
|
| 18 |
dist_launcher: null
|
| 19 |
multiprocessing_distributed: true
|
| 20 |
unused_parameters: true
|
|
@@ -70,7 +70,7 @@ freeze_param: []
|
|
| 70 |
num_iters_per_epoch: 1000
|
| 71 |
batch_size: 20
|
| 72 |
valid_batch_size: null
|
| 73 |
-
batch_bins:
|
| 74 |
valid_batch_bins: null
|
| 75 |
train_shape_file:
|
| 76 |
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn
|
|
|
|
| 6 |
output_dir: exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk
|
| 7 |
ngpu: 1
|
| 8 |
seed: 777
|
| 9 |
+
num_workers: 32
|
| 10 |
num_att_plot: 3
|
| 11 |
dist_backend: nccl
|
| 12 |
dist_init_method: env://
|
|
|
|
| 14 |
dist_rank: 0
|
| 15 |
local_rank: 0
|
| 16 |
dist_master_addr: localhost
|
| 17 |
+
dist_master_port: 59597
|
| 18 |
dist_launcher: null
|
| 19 |
multiprocessing_distributed: true
|
| 20 |
unused_parameters: true
|
|
|
|
| 70 |
num_iters_per_epoch: 1000
|
| 71 |
batch_size: 20
|
| 72 |
valid_batch_size: null
|
| 73 |
+
batch_bins: 9000000
|
| 74 |
valid_batch_bins: null
|
| 75 |
train_shape_file:
|
| 76 |
- exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091035.92b100c97f43.1159464.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47ca4c2af2ae5fe4ef0943d758d223149e37a2e101fc264aae7be3a9cdbb57bf
|
| 3 |
+
size 6486
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091448.92b100c97f43.1179446.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e59d9a3123a950064a58b9351b715c74d50f04dfc4bbbda3049c9e7366862a7
|
| 3 |
+
size 1674
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091666.92b100c97f43.1289026.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6afddefdf687f1aec9663925e8bb8d5127831a5a2e7b3d5070c1684734cc7f00
|
| 3 |
+
size 88
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/train/events.out.tfevents.1741091743.92b100c97f43.1324139.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:212fa57bbb135b7b0557550baa81d440f17fdefc764a4b170da42cc08b3ae061
|
| 3 |
+
size 88
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091448.92b100c97f43.1179446.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e27cca46138cc3b95825e29775f472797d893c3278e7e926ceaa550d9316db8
|
| 3 |
+
size 88
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091666.92b100c97f43.1289026.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7bec30596fbaf8184b6cb5fadfeccfba94d87111a618757906ebbabe7fed01b
|
| 3 |
+
size 88
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/tensorboard/valid/events.out.tfevents.1741091743.92b100c97f43.1324139.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b469790a7a5ba7a680abdd1ebac1353bd03e16b487dda1629f526d68b1843e74
|
| 3 |
+
size 88
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.1.log
ADDED
|
@@ -0,0 +1,1342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python3 -m espnet2.bin.gan_tts_train --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 2 |
+
# Started at Tue Mar 4 21:35:13 JST 2025
|
| 3 |
+
#
|
| 4 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
+
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 7 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 8 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 9 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:19,077 (distributed_c10d:217) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
|
| 10 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:19,088 (distributed_c10d:251) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
|
| 11 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:19,138 (gan_tts:304) INFO: Vocabulary size: 41
|
| 12 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:19,267 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 13 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:19,488 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 14 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,642 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 15 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,651 (abs_task:1158) INFO: Model structure:
|
| 16 |
+
ESPnetGANTTSModel(
|
| 17 |
+
(feats_extract): LogMelFbank(
|
| 18 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 19 |
+
(logmel): LogMel(sr=24000, n_fft=2048, n_mels=80, fmin=80, fmax=7600, htk=False)
|
| 20 |
+
)
|
| 21 |
+
(normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz, norm_means=True, norm_vars=True)
|
| 22 |
+
(pitch_extract): Dio()
|
| 23 |
+
(pitch_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz, norm_means=True, norm_vars=True)
|
| 24 |
+
(energy_extract): Energy(
|
| 25 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 26 |
+
)
|
| 27 |
+
(energy_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz, norm_means=True, norm_vars=True)
|
| 28 |
+
(tts): JETS(
|
| 29 |
+
(generator): JETSGenerator(
|
| 30 |
+
(encoder): Encoder(
|
| 31 |
+
(embed): Sequential(
|
| 32 |
+
(0): Embedding(41, 256, padding_idx=0)
|
| 33 |
+
(1): ScaledPositionalEncoding(
|
| 34 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 35 |
+
)
|
| 36 |
+
)
|
| 37 |
+
(encoders): MultiSequential(
|
| 38 |
+
(0): EncoderLayer(
|
| 39 |
+
(self_attn): MultiHeadedAttention(
|
| 40 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 41 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 42 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 43 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 44 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 45 |
+
)
|
| 46 |
+
(feed_forward): MultiLayeredConv1d(
|
| 47 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 48 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 49 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 50 |
+
)
|
| 51 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 52 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 53 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 54 |
+
)
|
| 55 |
+
(1): EncoderLayer(
|
| 56 |
+
(self_attn): MultiHeadedAttention(
|
| 57 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 58 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 59 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 60 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 61 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 62 |
+
)
|
| 63 |
+
(feed_forward): MultiLayeredConv1d(
|
| 64 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 65 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 66 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 67 |
+
)
|
| 68 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 69 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 70 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 71 |
+
)
|
| 72 |
+
(2): EncoderLayer(
|
| 73 |
+
(self_attn): MultiHeadedAttention(
|
| 74 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 75 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 76 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 77 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 78 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 79 |
+
)
|
| 80 |
+
(feed_forward): MultiLayeredConv1d(
|
| 81 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 82 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 83 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 84 |
+
)
|
| 85 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 86 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 87 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 88 |
+
)
|
| 89 |
+
(3): EncoderLayer(
|
| 90 |
+
(self_attn): MultiHeadedAttention(
|
| 91 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 92 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 93 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 94 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 95 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 96 |
+
)
|
| 97 |
+
(feed_forward): MultiLayeredConv1d(
|
| 98 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 99 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 100 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 101 |
+
)
|
| 102 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 103 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 104 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 108 |
+
)
|
| 109 |
+
(duration_predictor): DurationPredictor(
|
| 110 |
+
(conv): ModuleList(
|
| 111 |
+
(0): Sequential(
|
| 112 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 113 |
+
(1): ReLU()
|
| 114 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 115 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 116 |
+
)
|
| 117 |
+
(1): Sequential(
|
| 118 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 119 |
+
(1): ReLU()
|
| 120 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 121 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 122 |
+
)
|
| 123 |
+
)
|
| 124 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 125 |
+
)
|
| 126 |
+
(pitch_predictor): VariancePredictor(
|
| 127 |
+
(conv): ModuleList(
|
| 128 |
+
(0): Sequential(
|
| 129 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 130 |
+
(1): ReLU()
|
| 131 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 132 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 133 |
+
)
|
| 134 |
+
(1): Sequential(
|
| 135 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 136 |
+
(1): ReLU()
|
| 137 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 138 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 139 |
+
)
|
| 140 |
+
(2): Sequential(
|
| 141 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 142 |
+
(1): ReLU()
|
| 143 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 144 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 145 |
+
)
|
| 146 |
+
(3): Sequential(
|
| 147 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 148 |
+
(1): ReLU()
|
| 149 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 150 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 151 |
+
)
|
| 152 |
+
(4): Sequential(
|
| 153 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 154 |
+
(1): ReLU()
|
| 155 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 156 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 160 |
+
)
|
| 161 |
+
(pitch_embed): Sequential(
|
| 162 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 163 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 164 |
+
)
|
| 165 |
+
(energy_predictor): VariancePredictor(
|
| 166 |
+
(conv): ModuleList(
|
| 167 |
+
(0): Sequential(
|
| 168 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 169 |
+
(1): ReLU()
|
| 170 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 171 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 172 |
+
)
|
| 173 |
+
(1): Sequential(
|
| 174 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 175 |
+
(1): ReLU()
|
| 176 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 177 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 178 |
+
)
|
| 179 |
+
)
|
| 180 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 181 |
+
)
|
| 182 |
+
(energy_embed): Sequential(
|
| 183 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 184 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 185 |
+
)
|
| 186 |
+
(alignment_module): AlignmentModule(
|
| 187 |
+
(t_conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 188 |
+
(t_conv2): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 189 |
+
(f_conv1): Conv1d(80, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 190 |
+
(f_conv2): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 191 |
+
(f_conv3): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 192 |
+
)
|
| 193 |
+
(length_regulator): GaussianUpsampling()
|
| 194 |
+
(decoder): Encoder(
|
| 195 |
+
(embed): Sequential(
|
| 196 |
+
(0): ScaledPositionalEncoding(
|
| 197 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 198 |
+
)
|
| 199 |
+
)
|
| 200 |
+
(encoders): MultiSequential(
|
| 201 |
+
(0): EncoderLayer(
|
| 202 |
+
(self_attn): MultiHeadedAttention(
|
| 203 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 204 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 205 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 206 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 207 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 208 |
+
)
|
| 209 |
+
(feed_forward): MultiLayeredConv1d(
|
| 210 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 211 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 212 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 213 |
+
)
|
| 214 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 215 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 216 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 217 |
+
)
|
| 218 |
+
(1): EncoderLayer(
|
| 219 |
+
(self_attn): MultiHeadedAttention(
|
| 220 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 221 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 222 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 223 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 224 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 225 |
+
)
|
| 226 |
+
(feed_forward): MultiLayeredConv1d(
|
| 227 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 228 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 229 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 230 |
+
)
|
| 231 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 232 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 233 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 234 |
+
)
|
| 235 |
+
(2): EncoderLayer(
|
| 236 |
+
(self_attn): MultiHeadedAttention(
|
| 237 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 238 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 239 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 240 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 241 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 242 |
+
)
|
| 243 |
+
(feed_forward): MultiLayeredConv1d(
|
| 244 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 245 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 246 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 247 |
+
)
|
| 248 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 249 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 250 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 251 |
+
)
|
| 252 |
+
(3): EncoderLayer(
|
| 253 |
+
(self_attn): MultiHeadedAttention(
|
| 254 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 255 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 256 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 257 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 258 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 259 |
+
)
|
| 260 |
+
(feed_forward): MultiLayeredConv1d(
|
| 261 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 262 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 263 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 264 |
+
)
|
| 265 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 266 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 267 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 268 |
+
)
|
| 269 |
+
)
|
| 270 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 271 |
+
)
|
| 272 |
+
(generator): HiFiGANGenerator(
|
| 273 |
+
(input_conv): Conv1d(256, 512, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 274 |
+
(upsamples): ModuleList(
|
| 275 |
+
(0): Sequential(
|
| 276 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 277 |
+
(1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 278 |
+
)
|
| 279 |
+
(1): Sequential(
|
| 280 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 281 |
+
(1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 282 |
+
)
|
| 283 |
+
(2): Sequential(
|
| 284 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 285 |
+
(1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 286 |
+
)
|
| 287 |
+
(3): Sequential(
|
| 288 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 289 |
+
(1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 290 |
+
)
|
| 291 |
+
)
|
| 292 |
+
(blocks): ModuleList(
|
| 293 |
+
(0): ResidualBlock(
|
| 294 |
+
(convs1): ModuleList(
|
| 295 |
+
(0): Sequential(
|
| 296 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 297 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 298 |
+
)
|
| 299 |
+
(1): Sequential(
|
| 300 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 301 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 302 |
+
)
|
| 303 |
+
(2): Sequential(
|
| 304 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 305 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 306 |
+
)
|
| 307 |
+
)
|
| 308 |
+
(convs2): ModuleList(
|
| 309 |
+
(0): Sequential(
|
| 310 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 311 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 312 |
+
)
|
| 313 |
+
(1): Sequential(
|
| 314 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 315 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 316 |
+
)
|
| 317 |
+
(2): Sequential(
|
| 318 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 319 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 320 |
+
)
|
| 321 |
+
)
|
| 322 |
+
)
|
| 323 |
+
(1): ResidualBlock(
|
| 324 |
+
(convs1): ModuleList(
|
| 325 |
+
(0): Sequential(
|
| 326 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 327 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 328 |
+
)
|
| 329 |
+
(1): Sequential(
|
| 330 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 331 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 332 |
+
)
|
| 333 |
+
(2): Sequential(
|
| 334 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 335 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 336 |
+
)
|
| 337 |
+
)
|
| 338 |
+
(convs2): ModuleList(
|
| 339 |
+
(0): Sequential(
|
| 340 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 341 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 342 |
+
)
|
| 343 |
+
(1): Sequential(
|
| 344 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 345 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 346 |
+
)
|
| 347 |
+
(2): Sequential(
|
| 348 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 349 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 350 |
+
)
|
| 351 |
+
)
|
| 352 |
+
)
|
| 353 |
+
(2): ResidualBlock(
|
| 354 |
+
(convs1): ModuleList(
|
| 355 |
+
(0): Sequential(
|
| 356 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 357 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 358 |
+
)
|
| 359 |
+
(1): Sequential(
|
| 360 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 361 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 362 |
+
)
|
| 363 |
+
(2): Sequential(
|
| 364 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 365 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 366 |
+
)
|
| 367 |
+
)
|
| 368 |
+
(convs2): ModuleList(
|
| 369 |
+
(0): Sequential(
|
| 370 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 371 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 372 |
+
)
|
| 373 |
+
(1): Sequential(
|
| 374 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 375 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 376 |
+
)
|
| 377 |
+
(2): Sequential(
|
| 378 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 379 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 380 |
+
)
|
| 381 |
+
)
|
| 382 |
+
)
|
| 383 |
+
(3): ResidualBlock(
|
| 384 |
+
(convs1): ModuleList(
|
| 385 |
+
(0): Sequential(
|
| 386 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 387 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 388 |
+
)
|
| 389 |
+
(1): Sequential(
|
| 390 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 391 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 392 |
+
)
|
| 393 |
+
(2): Sequential(
|
| 394 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 395 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 396 |
+
)
|
| 397 |
+
)
|
| 398 |
+
(convs2): ModuleList(
|
| 399 |
+
(0): Sequential(
|
| 400 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 401 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 402 |
+
)
|
| 403 |
+
(1): Sequential(
|
| 404 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 405 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 406 |
+
)
|
| 407 |
+
(2): Sequential(
|
| 408 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 409 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 410 |
+
)
|
| 411 |
+
)
|
| 412 |
+
)
|
| 413 |
+
(4): ResidualBlock(
|
| 414 |
+
(convs1): ModuleList(
|
| 415 |
+
(0): Sequential(
|
| 416 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 417 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 418 |
+
)
|
| 419 |
+
(1): Sequential(
|
| 420 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 421 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 422 |
+
)
|
| 423 |
+
(2): Sequential(
|
| 424 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 425 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 426 |
+
)
|
| 427 |
+
)
|
| 428 |
+
(convs2): ModuleList(
|
| 429 |
+
(0): Sequential(
|
| 430 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 431 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 432 |
+
)
|
| 433 |
+
(1): Sequential(
|
| 434 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 435 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 436 |
+
)
|
| 437 |
+
(2): Sequential(
|
| 438 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 439 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 440 |
+
)
|
| 441 |
+
)
|
| 442 |
+
)
|
| 443 |
+
(5): ResidualBlock(
|
| 444 |
+
(convs1): ModuleList(
|
| 445 |
+
(0): Sequential(
|
| 446 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 447 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 448 |
+
)
|
| 449 |
+
(1): Sequential(
|
| 450 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 451 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 452 |
+
)
|
| 453 |
+
(2): Sequential(
|
| 454 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 455 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 456 |
+
)
|
| 457 |
+
)
|
| 458 |
+
(convs2): ModuleList(
|
| 459 |
+
(0): Sequential(
|
| 460 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 461 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 462 |
+
)
|
| 463 |
+
(1): Sequential(
|
| 464 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 465 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 466 |
+
)
|
| 467 |
+
(2): Sequential(
|
| 468 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 469 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 470 |
+
)
|
| 471 |
+
)
|
| 472 |
+
)
|
| 473 |
+
(6): ResidualBlock(
|
| 474 |
+
(convs1): ModuleList(
|
| 475 |
+
(0): Sequential(
|
| 476 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 477 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 478 |
+
)
|
| 479 |
+
(1): Sequential(
|
| 480 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 481 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 482 |
+
)
|
| 483 |
+
(2): Sequential(
|
| 484 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 485 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 486 |
+
)
|
| 487 |
+
)
|
| 488 |
+
(convs2): ModuleList(
|
| 489 |
+
(0): Sequential(
|
| 490 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 491 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 492 |
+
)
|
| 493 |
+
(1): Sequential(
|
| 494 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 495 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 496 |
+
)
|
| 497 |
+
(2): Sequential(
|
| 498 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 499 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 500 |
+
)
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
(7): ResidualBlock(
|
| 504 |
+
(convs1): ModuleList(
|
| 505 |
+
(0): Sequential(
|
| 506 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 507 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 508 |
+
)
|
| 509 |
+
(1): Sequential(
|
| 510 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 511 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 512 |
+
)
|
| 513 |
+
(2): Sequential(
|
| 514 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 515 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 516 |
+
)
|
| 517 |
+
)
|
| 518 |
+
(convs2): ModuleList(
|
| 519 |
+
(0): Sequential(
|
| 520 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 521 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 522 |
+
)
|
| 523 |
+
(1): Sequential(
|
| 524 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 525 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 526 |
+
)
|
| 527 |
+
(2): Sequential(
|
| 528 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 529 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 530 |
+
)
|
| 531 |
+
)
|
| 532 |
+
)
|
| 533 |
+
(8): ResidualBlock(
|
| 534 |
+
(convs1): ModuleList(
|
| 535 |
+
(0): Sequential(
|
| 536 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 537 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 538 |
+
)
|
| 539 |
+
(1): Sequential(
|
| 540 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 541 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 542 |
+
)
|
| 543 |
+
(2): Sequential(
|
| 544 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 545 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 546 |
+
)
|
| 547 |
+
)
|
| 548 |
+
(convs2): ModuleList(
|
| 549 |
+
(0): Sequential(
|
| 550 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 551 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 552 |
+
)
|
| 553 |
+
(1): Sequential(
|
| 554 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 555 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 556 |
+
)
|
| 557 |
+
(2): Sequential(
|
| 558 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 559 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 560 |
+
)
|
| 561 |
+
)
|
| 562 |
+
)
|
| 563 |
+
(9): ResidualBlock(
|
| 564 |
+
(convs1): ModuleList(
|
| 565 |
+
(0): Sequential(
|
| 566 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 567 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 568 |
+
)
|
| 569 |
+
(1): Sequential(
|
| 570 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 571 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 572 |
+
)
|
| 573 |
+
(2): Sequential(
|
| 574 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 575 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 576 |
+
)
|
| 577 |
+
)
|
| 578 |
+
(convs2): ModuleList(
|
| 579 |
+
(0): Sequential(
|
| 580 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 581 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 582 |
+
)
|
| 583 |
+
(1): Sequential(
|
| 584 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 585 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 586 |
+
)
|
| 587 |
+
(2): Sequential(
|
| 588 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 589 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 590 |
+
)
|
| 591 |
+
)
|
| 592 |
+
)
|
| 593 |
+
(10): ResidualBlock(
|
| 594 |
+
(convs1): ModuleList(
|
| 595 |
+
(0): Sequential(
|
| 596 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 597 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 598 |
+
)
|
| 599 |
+
(1): Sequential(
|
| 600 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 601 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 602 |
+
)
|
| 603 |
+
(2): Sequential(
|
| 604 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 605 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 606 |
+
)
|
| 607 |
+
)
|
| 608 |
+
(convs2): ModuleList(
|
| 609 |
+
(0): Sequential(
|
| 610 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 611 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 612 |
+
)
|
| 613 |
+
(1): Sequential(
|
| 614 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 615 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 616 |
+
)
|
| 617 |
+
(2): Sequential(
|
| 618 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 619 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 620 |
+
)
|
| 621 |
+
)
|
| 622 |
+
)
|
| 623 |
+
(11): ResidualBlock(
|
| 624 |
+
(convs1): ModuleList(
|
| 625 |
+
(0): Sequential(
|
| 626 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 627 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 628 |
+
)
|
| 629 |
+
(1): Sequential(
|
| 630 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 631 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 632 |
+
)
|
| 633 |
+
(2): Sequential(
|
| 634 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 635 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 636 |
+
)
|
| 637 |
+
)
|
| 638 |
+
(convs2): ModuleList(
|
| 639 |
+
(0): Sequential(
|
| 640 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 641 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 642 |
+
)
|
| 643 |
+
(1): Sequential(
|
| 644 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 645 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 646 |
+
)
|
| 647 |
+
(2): Sequential(
|
| 648 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 649 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 650 |
+
)
|
| 651 |
+
)
|
| 652 |
+
)
|
| 653 |
+
)
|
| 654 |
+
(output_conv): Sequential(
|
| 655 |
+
(0): LeakyReLU(negative_slope=0.01)
|
| 656 |
+
(1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 657 |
+
(2): Tanh()
|
| 658 |
+
)
|
| 659 |
+
)
|
| 660 |
+
)
|
| 661 |
+
(discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
|
| 662 |
+
(msd): HiFiGANMultiScaleDiscriminator(
|
| 663 |
+
(discriminators): ModuleList(
|
| 664 |
+
(0): HiFiGANScaleDiscriminator(
|
| 665 |
+
(layers): ModuleList(
|
| 666 |
+
(0): Sequential(
|
| 667 |
+
(0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
|
| 668 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 669 |
+
)
|
| 670 |
+
(1): Sequential(
|
| 671 |
+
(0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
|
| 672 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 673 |
+
)
|
| 674 |
+
(2): Sequential(
|
| 675 |
+
(0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
|
| 676 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 677 |
+
)
|
| 678 |
+
(3): Sequential(
|
| 679 |
+
(0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 680 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 681 |
+
)
|
| 682 |
+
(4): Sequential(
|
| 683 |
+
(0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 684 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 685 |
+
)
|
| 686 |
+
(5): Sequential(
|
| 687 |
+
(0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
|
| 688 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 689 |
+
)
|
| 690 |
+
(6): Sequential(
|
| 691 |
+
(0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 692 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 693 |
+
)
|
| 694 |
+
(7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 695 |
+
)
|
| 696 |
+
)
|
| 697 |
+
)
|
| 698 |
+
)
|
| 699 |
+
(mpd): HiFiGANMultiPeriodDiscriminator(
|
| 700 |
+
(discriminators): ModuleList(
|
| 701 |
+
(0): HiFiGANPeriodDiscriminator(
|
| 702 |
+
(convs): ModuleList(
|
| 703 |
+
(0): Sequential(
|
| 704 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 705 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 706 |
+
)
|
| 707 |
+
(1): Sequential(
|
| 708 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 709 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 710 |
+
)
|
| 711 |
+
(2): Sequential(
|
| 712 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 713 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 714 |
+
)
|
| 715 |
+
(3): Sequential(
|
| 716 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 717 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 718 |
+
)
|
| 719 |
+
(4): Sequential(
|
| 720 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 721 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 722 |
+
)
|
| 723 |
+
)
|
| 724 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 725 |
+
)
|
| 726 |
+
(1): HiFiGANPeriodDiscriminator(
|
| 727 |
+
(convs): ModuleList(
|
| 728 |
+
(0): Sequential(
|
| 729 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 730 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 731 |
+
)
|
| 732 |
+
(1): Sequential(
|
| 733 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 734 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 735 |
+
)
|
| 736 |
+
(2): Sequential(
|
| 737 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 738 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 739 |
+
)
|
| 740 |
+
(3): Sequential(
|
| 741 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 742 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 743 |
+
)
|
| 744 |
+
(4): Sequential(
|
| 745 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 746 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 747 |
+
)
|
| 748 |
+
)
|
| 749 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 750 |
+
)
|
| 751 |
+
(2): HiFiGANPeriodDiscriminator(
|
| 752 |
+
(convs): ModuleList(
|
| 753 |
+
(0): Sequential(
|
| 754 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 755 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 756 |
+
)
|
| 757 |
+
(1): Sequential(
|
| 758 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 759 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 760 |
+
)
|
| 761 |
+
(2): Sequential(
|
| 762 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 763 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 764 |
+
)
|
| 765 |
+
(3): Sequential(
|
| 766 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 767 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 768 |
+
)
|
| 769 |
+
(4): Sequential(
|
| 770 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 771 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 772 |
+
)
|
| 773 |
+
)
|
| 774 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 775 |
+
)
|
| 776 |
+
(3): HiFiGANPeriodDiscriminator(
|
| 777 |
+
(convs): ModuleList(
|
| 778 |
+
(0): Sequential(
|
| 779 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 780 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 781 |
+
)
|
| 782 |
+
(1): Sequential(
|
| 783 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 784 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 785 |
+
)
|
| 786 |
+
(2): Sequential(
|
| 787 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 788 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 789 |
+
)
|
| 790 |
+
(3): Sequential(
|
| 791 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 792 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 793 |
+
)
|
| 794 |
+
(4): Sequential(
|
| 795 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 796 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 797 |
+
)
|
| 798 |
+
)
|
| 799 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 800 |
+
)
|
| 801 |
+
(4): HiFiGANPeriodDiscriminator(
|
| 802 |
+
(convs): ModuleList(
|
| 803 |
+
(0): Sequential(
|
| 804 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 805 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 806 |
+
)
|
| 807 |
+
(1): Sequential(
|
| 808 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 809 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 810 |
+
)
|
| 811 |
+
(2): Sequential(
|
| 812 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 813 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 814 |
+
)
|
| 815 |
+
(3): Sequential(
|
| 816 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 817 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 818 |
+
)
|
| 819 |
+
(4): Sequential(
|
| 820 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 821 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 822 |
+
)
|
| 823 |
+
)
|
| 824 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 825 |
+
)
|
| 826 |
+
)
|
| 827 |
+
)
|
| 828 |
+
)
|
| 829 |
+
(generator_adv_loss): GeneratorAdversarialLoss()
|
| 830 |
+
(discriminator_adv_loss): DiscriminatorAdversarialLoss()
|
| 831 |
+
(feat_match_loss): FeatureMatchLoss()
|
| 832 |
+
(mel_loss): MelSpectrogramLoss(
|
| 833 |
+
(wav_to_mel): LogMelFbank(
|
| 834 |
+
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
|
| 835 |
+
(logmel): LogMel(sr=24000, n_fft=1024, n_mels=80, fmin=0, fmax=12000.0, htk=False)
|
| 836 |
+
)
|
| 837 |
+
)
|
| 838 |
+
(var_loss): VarianceLoss(
|
| 839 |
+
(mse_criterion): MSELoss()
|
| 840 |
+
(duration_criterion): DurationPredictorLoss(
|
| 841 |
+
(criterion): MSELoss()
|
| 842 |
+
)
|
| 843 |
+
)
|
| 844 |
+
(forwardsum_loss): ForwardSumLoss()
|
| 845 |
+
)
|
| 846 |
+
)
|
| 847 |
+
|
| 848 |
+
Model summary:
|
| 849 |
+
Class Name: ESPnetGANTTSModel
|
| 850 |
+
Total Number of model parameters: 83.28 M
|
| 851 |
+
Number of trainable parameters: 83.28 M (100.0%)
|
| 852 |
+
Size: 333.11 MB
|
| 853 |
+
Type: torch.float32
|
| 854 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,651 (abs_task:1161) INFO: Optimizer:
|
| 855 |
+
AdamW (
|
| 856 |
+
Parameter Group 0
|
| 857 |
+
amsgrad: False
|
| 858 |
+
betas: [0.8, 0.99]
|
| 859 |
+
eps: 1e-09
|
| 860 |
+
initial_lr: 0.0002
|
| 861 |
+
lr: 0.0002
|
| 862 |
+
weight_decay: 0.0
|
| 863 |
+
)
|
| 864 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,651 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f1cdc2a85b0>
|
| 865 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,651 (abs_task:1161) INFO: Optimizer2:
|
| 866 |
+
AdamW (
|
| 867 |
+
Parameter Group 0
|
| 868 |
+
amsgrad: False
|
| 869 |
+
betas: [0.8, 0.99]
|
| 870 |
+
eps: 1e-09
|
| 871 |
+
initial_lr: 0.0002
|
| 872 |
+
lr: 0.0002
|
| 873 |
+
weight_decay: 0.0
|
| 874 |
+
)
|
| 875 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,652 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f1de6a6b9d0>
|
| 876 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,652 (abs_task:1171) INFO: Saving the configuration in exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml
|
| 877 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,974 (abs_task:1525) INFO: [train] dataset:
|
| 878 |
+
ESPnetDataset(
|
| 879 |
+
text: {"path": "dump/raw/jvs010_tr_no_dev/text", "type": "text"}
|
| 880 |
+
speech: {"path": "dump/raw/jvs010_tr_no_dev/wav.scp", "type": "sound"}
|
| 881 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f1cdc2a8e20>)
|
| 882 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,974 (abs_task:1526) INFO: [train] Batch sampler: NumElementsBatchSampler(N-batch=3, batch_bins=9000000, sort_in_batch=descending, sort_batch=descending)
|
| 883 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,974 (abs_task:1527) INFO: [train] mini-batch sizes summary: N-batch=3, mean=33.3, min=6, max=53
|
| 884 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,994 (abs_task:1525) INFO: [valid] dataset:
|
| 885 |
+
ESPnetDataset(
|
| 886 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 887 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 888 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f1cdc2a8550>)
|
| 889 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,994 (abs_task:1526) INFO: [valid] Batch sampler: NumElementsBatchSampler(N-batch=1, batch_bins=9000000, sort_in_batch=descending, sort_batch=descending)
|
| 890 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:42,994 (abs_task:1527) INFO: [valid] mini-batch sizes summary: N-batch=1, mean=15.0, min=15, max=15
|
| 891 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:43,014 (abs_task:1525) INFO: [plot_att] dataset:
|
| 892 |
+
ESPnetDataset(
|
| 893 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 894 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 895 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f1cdc264190>)
|
| 896 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:43,014 (abs_task:1526) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=15, batch_size=1, key_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn,
|
| 897 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:43,014 (abs_task:1527) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
|
| 898 |
+
92b100c97f43:1324139:1324139 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 899 |
+
92b100c97f43:1324139:1324139 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 900 |
+
|
| 901 |
+
92b100c97f43:1324139:1324139 [0] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 902 |
+
92b100c97f43:1324139:1324139 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 903 |
+
92b100c97f43:1324139:1324139 [0] NCCL INFO Using network Socket
|
| 904 |
+
NCCL version 2.10.3+cuda11.3
|
| 905 |
+
92b100c97f43:1324142:1324142 [3] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 906 |
+
92b100c97f43:1324140:1324140 [1] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 907 |
+
92b100c97f43:1324142:1324142 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 908 |
+
92b100c97f43:1324140:1324140 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 909 |
+
|
| 910 |
+
92b100c97f43:1324142:1324142 [3] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 911 |
+
|
| 912 |
+
92b100c97f43:1324140:1324140 [1] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 913 |
+
92b100c97f43:1324142:1324142 [3] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 914 |
+
92b100c97f43:1324140:1324140 [1] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 915 |
+
92b100c97f43:1324142:1324142 [3] NCCL INFO Using network Socket
|
| 916 |
+
92b100c97f43:1324140:1324140 [1] NCCL INFO Using network Socket
|
| 917 |
+
92b100c97f43:1324141:1324141 [2] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 918 |
+
92b100c97f43:1324141:1324141 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 919 |
+
|
| 920 |
+
92b100c97f43:1324141:1324141 [2] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 921 |
+
92b100c97f43:1324141:1324141 [2] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 922 |
+
92b100c97f43:1324141:1324141 [2] NCCL INFO Using network Socket
|
| 923 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Channel 00/02 : 0 1 2 3
|
| 924 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
|
| 925 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Channel 01/02 : 0 1 2 3
|
| 926 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
|
| 927 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
|
| 928 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1
|
| 929 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Setting affinity for GPU 1 to ffff,ffffffff
|
| 930 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Setting affinity for GPU 2 to ffff,ffffffff
|
| 931 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Setting affinity for GPU 3 to ffff,ffffffff
|
| 932 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Setting affinity for GPU 0 to ffff,ffffffff
|
| 933 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Channel 00 : 1[40] -> 2[50] via direct shared memory
|
| 934 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Channel 00 : 3[60] -> 0[30] via direct shared memory
|
| 935 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Channel 01 : 1[40] -> 2[50] via direct shared memory
|
| 936 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Channel 01 : 3[60] -> 0[30] via direct shared memory
|
| 937 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Channel 00 : 2[50] -> 3[60] via direct shared memory
|
| 938 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Channel 00 : 0[30] -> 1[40] via direct shared memory
|
| 939 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Channel 01 : 2[50] -> 3[60] via direct shared memory
|
| 940 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Channel 01 : 0[30] -> 1[40] via direct shared memory
|
| 941 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Connected all rings
|
| 942 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Connected all rings
|
| 943 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Channel 00 : 3[60] -> 2[50] via direct shared memory
|
| 944 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Channel 01 : 3[60] -> 2[50] via direct shared memory
|
| 945 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Connected all rings
|
| 946 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Connected all rings
|
| 947 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Channel 00 : 1[40] -> 0[30] via direct shared memory
|
| 948 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Channel 01 : 1[40] -> 0[30] via direct shared memory
|
| 949 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Channel 00 : 2[50] -> 1[40] via direct shared memory
|
| 950 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Channel 01 : 2[50] -> 1[40] via direct shared memory
|
| 951 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO Connected all trees
|
| 952 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 953 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 954 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO Connected all trees
|
| 955 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 956 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 957 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO Connected all trees
|
| 958 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 959 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 960 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO Connected all trees
|
| 961 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 962 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 963 |
+
92b100c97f43:1324140:1324178 [1] NCCL INFO comm 0x7ff3c40030d0 rank 1 nranks 4 cudaDev 1 busId 40 - Init COMPLETE
|
| 964 |
+
92b100c97f43:1324142:1324179 [3] NCCL INFO comm 0x7f4ea80030d0 rank 3 nranks 4 cudaDev 3 busId 60 - Init COMPLETE
|
| 965 |
+
92b100c97f43:1324139:1324177 [0] NCCL INFO comm 0x7f1bec0030d0 rank 0 nranks 4 cudaDev 0 busId 30 - Init COMPLETE
|
| 966 |
+
92b100c97f43:1324139:1324139 [0] NCCL INFO Launch mode Parallel
|
| 967 |
+
92b100c97f43:1324141:1324180 [2] NCCL INFO comm 0x7f0acc0030d0 rank 2 nranks 4 cudaDev 2 busId 50 - Init COMPLETE
|
| 968 |
+
[92b100c97f43:0/4] 2025-03-04 21:35:43,535 (trainer:280) INFO: 1/130epoch started
|
| 969 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 970 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 971 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 972 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 973 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 974 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 975 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 976 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 977 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 978 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 979 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 980 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 981 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 982 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 983 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 984 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 985 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 986 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 987 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 988 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 989 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 990 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 991 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 992 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 993 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 994 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 995 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 996 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 997 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 998 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 999 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1000 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1001 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1002 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1003 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1004 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1005 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1006 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1007 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1008 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1009 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1010 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1011 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1012 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1013 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1014 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1015 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1016 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1017 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1018 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1019 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1020 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1021 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1022 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1023 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1024 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1025 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1026 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1027 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1028 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1029 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1030 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1031 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1032 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1033 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1034 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1035 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1036 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1037 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1038 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1039 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1040 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1041 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1042 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1043 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1044 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1045 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1046 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1047 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1048 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1049 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1050 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1051 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1052 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1053 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1054 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1055 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1056 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1057 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1058 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1059 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1060 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1061 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1062 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1063 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1064 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1065 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1066 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1067 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1068 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1069 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1070 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1071 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1072 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1073 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1074 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1075 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1076 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1077 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1078 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1079 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1080 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1081 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1082 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1083 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1084 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1085 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1086 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1087 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1088 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1089 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1090 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1091 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1092 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1093 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1094 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1095 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1096 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1097 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1098 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1099 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1100 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1101 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1102 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1103 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1104 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1105 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1106 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1107 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1108 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1109 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1110 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1111 |
+
Process SpawnProcess-3:
|
| 1112 |
+
Traceback (most recent call last):
|
| 1113 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1114 |
+
self.run()
|
| 1115 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1116 |
+
self._target(*self._args, **self._kwargs)
|
| 1117 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1118 |
+
cls.trainer.run(
|
| 1119 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1120 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1121 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 160, in train_one_epoch
|
| 1122 |
+
retval = model(forward_generator=turn == "generator", **batch)
|
| 1123 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1124 |
+
return forward_call(*input, **kwargs)
|
| 1125 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/parallel/distributed.py", line 886, in forward
|
| 1126 |
+
output = self.module(*inputs[0], **kwargs[0])
|
| 1127 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1128 |
+
return forward_call(*input, **kwargs)
|
| 1129 |
+
File "/work/espnet/espnet2/gan_tts/espnet_model.py", line 164, in forward
|
| 1130 |
+
return self.tts(**batch)
|
| 1131 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1132 |
+
return forward_call(*input, **kwargs)
|
| 1133 |
+
File "/work/espnet/espnet2/gan_tts/jets/jets.py", line 339, in forward
|
| 1134 |
+
return self._forward_generator(
|
| 1135 |
+
File "/work/espnet/espnet2/gan_tts/jets/jets.py", line 452, in _forward_generator
|
| 1136 |
+
feat_match_loss = self.feat_match_loss(p_hat, p)
|
| 1137 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1138 |
+
return forward_call(*input, **kwargs)
|
| 1139 |
+
File "/work/espnet/espnet2/gan_tts/hifigan/loss.py", line 210, in forward
|
| 1140 |
+
feat_match_loss_ += F.l1_loss(feat_hat_, feat_.detach())
|
| 1141 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/functional.py", line 3081, in l1_loss
|
| 1142 |
+
return torch._C._nn.l1_loss(expanded_input, expanded_target, _Reduction.get_enum(reduction))
|
| 1143 |
+
RuntimeError: CUDA out of memory. Tried to allocate 80.00 MiB (GPU 2; 21.96 GiB total capacity; 7.64 GiB already allocated; 66.88 MiB free; 7.94 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
|
| 1144 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1145 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1146 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1147 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1148 |
+
Process SpawnProcess-1:
|
| 1149 |
+
Traceback (most recent call last):
|
| 1150 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1151 |
+
self.run()
|
| 1152 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1153 |
+
self._target(*self._args, **self._kwargs)
|
| 1154 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1155 |
+
cls.trainer.run(
|
| 1156 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1157 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1158 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 160, in train_one_epoch
|
| 1159 |
+
retval = model(forward_generator=turn == "generator", **batch)
|
| 1160 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1161 |
+
return forward_call(*input, **kwargs)
|
| 1162 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/parallel/distributed.py", line 886, in forward
|
| 1163 |
+
output = self.module(*inputs[0], **kwargs[0])
|
| 1164 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1165 |
+
return forward_call(*input, **kwargs)
|
| 1166 |
+
File "/work/espnet/espnet2/gan_tts/espnet_model.py", line 164, in forward
|
| 1167 |
+
return self.tts(**batch)
|
| 1168 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1169 |
+
return forward_call(*input, **kwargs)
|
| 1170 |
+
File "/work/espnet/espnet2/gan_tts/jets/jets.py", line 339, in forward
|
| 1171 |
+
return self._forward_generator(
|
| 1172 |
+
File "/work/espnet/espnet2/gan_tts/jets/jets.py", line 407, in _forward_generator
|
| 1173 |
+
outs = self.generator(
|
| 1174 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1175 |
+
return forward_call(*input, **kwargs)
|
| 1176 |
+
File "/work/espnet/espnet2/gan_tts/jets/generator.py", line 626, in forward
|
| 1177 |
+
wav = self.generator(z_segments)
|
| 1178 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1179 |
+
return forward_call(*input, **kwargs)
|
| 1180 |
+
File "/work/espnet/espnet2/gan_tts/hifigan/hifigan.py", line 160, in forward
|
| 1181 |
+
cs += self.blocks[i * self.num_blocks + j](c)
|
| 1182 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1183 |
+
return forward_call(*input, **kwargs)
|
| 1184 |
+
File "/work/espnet/espnet2/gan_tts/hifigan/residual_block.py", line 97, in forward
|
| 1185 |
+
xt = self.convs2[idx](xt)
|
| 1186 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1187 |
+
return forward_call(*input, **kwargs)
|
| 1188 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/container.py", line 141, in forward
|
| 1189 |
+
input = module(input)
|
| 1190 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
|
| 1191 |
+
return forward_call(*input, **kwargs)
|
| 1192 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/activation.py", line 738, in forward
|
| 1193 |
+
return F.leaky_relu(input, self.negative_slope, self.inplace)
|
| 1194 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/nn/functional.py", line 1475, in leaky_relu
|
| 1195 |
+
result = torch._C._nn.leaky_relu(input, negative_slope)
|
| 1196 |
+
RuntimeError: CUDA out of memory. Tried to allocate 22.00 MiB (GPU 0; 21.96 GiB total capacity; 6.66 GiB already allocated; 3.50 MiB free; 6.83 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
|
| 1197 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1198 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1199 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1200 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1201 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1202 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1203 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1204 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1205 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1206 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1207 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1208 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1209 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1210 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1211 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1212 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1213 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1214 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1215 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1216 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1217 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1218 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1219 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1220 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1221 |
+
Traceback (most recent call last):
|
| 1222 |
+
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
|
| 1223 |
+
return _run_code(code, main_globals, None,
|
| 1224 |
+
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
|
| 1225 |
+
exec(code, run_globals)
|
| 1226 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 22, in <module>
|
| 1227 |
+
main()
|
| 1228 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 18, in main
|
| 1229 |
+
GANTTSTask.main(cmd=cmd)
|
| 1230 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1069, in main
|
| 1231 |
+
while not ProcessContext(processes, error_queues).join():
|
| 1232 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 139, in join
|
| 1233 |
+
raise ProcessExitedException(
|
| 1234 |
+
torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
|
| 1235 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1236 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1237 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1238 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1239 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1240 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1241 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1242 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1243 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1244 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1245 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1246 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1247 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1248 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1249 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1250 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1251 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1252 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1253 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1254 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1255 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1256 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1257 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1258 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1259 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1260 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1261 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1262 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1263 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1264 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1265 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1266 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1267 |
+
# Accounting: time=55 threads=1
|
| 1268 |
+
# Ended (code 1) at Tue Mar 4 21:36:08 JST 2025, elapsed time 55 seconds
|
| 1269 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1270 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1271 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1272 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1273 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1274 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1275 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1276 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1277 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1278 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1279 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1280 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1281 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1282 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1283 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1284 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1285 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1286 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1287 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1288 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1289 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1290 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1291 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1292 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1293 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1294 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1295 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1296 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1297 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1298 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1299 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1300 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1301 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1302 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1303 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1304 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1305 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1306 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1307 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1308 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1309 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1310 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1311 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1312 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1313 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1314 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1315 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1316 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1317 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1318 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1319 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1320 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1321 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1322 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1323 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1324 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1325 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1326 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1327 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1328 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1329 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1330 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1331 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1332 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1333 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1334 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1335 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1336 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1337 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1338 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1339 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1340 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1341 |
+
/usr/lib/python3.8/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 208 leaked semaphore objects to clean up at shutdown
|
| 1342 |
+
warnings.warn('resource_tracker: There appear to be %d '
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.2.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.3.log
ADDED
|
@@ -0,0 +1,1247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python3 -m espnet2.bin.gan_tts_train --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 2 |
+
# Started at Tue Mar 4 21:30:32 JST 2025
|
| 3 |
+
#
|
| 4 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
+
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 7 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 8 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 9 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:38,282 (distributed_c10d:217) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
|
| 10 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:38,292 (distributed_c10d:251) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
|
| 11 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:38,340 (gan_tts:304) INFO: Vocabulary size: 41
|
| 12 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:38,470 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 13 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:38,693 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 14 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,860 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 15 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,870 (abs_task:1158) INFO: Model structure:
|
| 16 |
+
ESPnetGANTTSModel(
|
| 17 |
+
(feats_extract): LogMelFbank(
|
| 18 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 19 |
+
(logmel): LogMel(sr=24000, n_fft=2048, n_mels=80, fmin=80, fmax=7600, htk=False)
|
| 20 |
+
)
|
| 21 |
+
(normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz, norm_means=True, norm_vars=True)
|
| 22 |
+
(pitch_extract): Dio()
|
| 23 |
+
(pitch_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz, norm_means=True, norm_vars=True)
|
| 24 |
+
(energy_extract): Energy(
|
| 25 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 26 |
+
)
|
| 27 |
+
(energy_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz, norm_means=True, norm_vars=True)
|
| 28 |
+
(tts): JETS(
|
| 29 |
+
(generator): JETSGenerator(
|
| 30 |
+
(encoder): Encoder(
|
| 31 |
+
(embed): Sequential(
|
| 32 |
+
(0): Embedding(41, 256, padding_idx=0)
|
| 33 |
+
(1): ScaledPositionalEncoding(
|
| 34 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 35 |
+
)
|
| 36 |
+
)
|
| 37 |
+
(encoders): MultiSequential(
|
| 38 |
+
(0): EncoderLayer(
|
| 39 |
+
(self_attn): MultiHeadedAttention(
|
| 40 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 41 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 42 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 43 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 44 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 45 |
+
)
|
| 46 |
+
(feed_forward): MultiLayeredConv1d(
|
| 47 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 48 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 49 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 50 |
+
)
|
| 51 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 52 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 53 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 54 |
+
)
|
| 55 |
+
(1): EncoderLayer(
|
| 56 |
+
(self_attn): MultiHeadedAttention(
|
| 57 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 58 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 59 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 60 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 61 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 62 |
+
)
|
| 63 |
+
(feed_forward): MultiLayeredConv1d(
|
| 64 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 65 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 66 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 67 |
+
)
|
| 68 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 69 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 70 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 71 |
+
)
|
| 72 |
+
(2): EncoderLayer(
|
| 73 |
+
(self_attn): MultiHeadedAttention(
|
| 74 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 75 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 76 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 77 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 78 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 79 |
+
)
|
| 80 |
+
(feed_forward): MultiLayeredConv1d(
|
| 81 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 82 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 83 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 84 |
+
)
|
| 85 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 86 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 87 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 88 |
+
)
|
| 89 |
+
(3): EncoderLayer(
|
| 90 |
+
(self_attn): MultiHeadedAttention(
|
| 91 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 92 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 93 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 94 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 95 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 96 |
+
)
|
| 97 |
+
(feed_forward): MultiLayeredConv1d(
|
| 98 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 99 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 100 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 101 |
+
)
|
| 102 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 103 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 104 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 108 |
+
)
|
| 109 |
+
(duration_predictor): DurationPredictor(
|
| 110 |
+
(conv): ModuleList(
|
| 111 |
+
(0): Sequential(
|
| 112 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 113 |
+
(1): ReLU()
|
| 114 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 115 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 116 |
+
)
|
| 117 |
+
(1): Sequential(
|
| 118 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 119 |
+
(1): ReLU()
|
| 120 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 121 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 122 |
+
)
|
| 123 |
+
)
|
| 124 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 125 |
+
)
|
| 126 |
+
(pitch_predictor): VariancePredictor(
|
| 127 |
+
(conv): ModuleList(
|
| 128 |
+
(0): Sequential(
|
| 129 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 130 |
+
(1): ReLU()
|
| 131 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 132 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 133 |
+
)
|
| 134 |
+
(1): Sequential(
|
| 135 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 136 |
+
(1): ReLU()
|
| 137 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 138 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 139 |
+
)
|
| 140 |
+
(2): Sequential(
|
| 141 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 142 |
+
(1): ReLU()
|
| 143 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 144 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 145 |
+
)
|
| 146 |
+
(3): Sequential(
|
| 147 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 148 |
+
(1): ReLU()
|
| 149 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 150 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 151 |
+
)
|
| 152 |
+
(4): Sequential(
|
| 153 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 154 |
+
(1): ReLU()
|
| 155 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 156 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 160 |
+
)
|
| 161 |
+
(pitch_embed): Sequential(
|
| 162 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 163 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 164 |
+
)
|
| 165 |
+
(energy_predictor): VariancePredictor(
|
| 166 |
+
(conv): ModuleList(
|
| 167 |
+
(0): Sequential(
|
| 168 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 169 |
+
(1): ReLU()
|
| 170 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 171 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 172 |
+
)
|
| 173 |
+
(1): Sequential(
|
| 174 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 175 |
+
(1): ReLU()
|
| 176 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 177 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 178 |
+
)
|
| 179 |
+
)
|
| 180 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 181 |
+
)
|
| 182 |
+
(energy_embed): Sequential(
|
| 183 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 184 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 185 |
+
)
|
| 186 |
+
(alignment_module): AlignmentModule(
|
| 187 |
+
(t_conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 188 |
+
(t_conv2): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 189 |
+
(f_conv1): Conv1d(80, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 190 |
+
(f_conv2): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 191 |
+
(f_conv3): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 192 |
+
)
|
| 193 |
+
(length_regulator): GaussianUpsampling()
|
| 194 |
+
(decoder): Encoder(
|
| 195 |
+
(embed): Sequential(
|
| 196 |
+
(0): ScaledPositionalEncoding(
|
| 197 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 198 |
+
)
|
| 199 |
+
)
|
| 200 |
+
(encoders): MultiSequential(
|
| 201 |
+
(0): EncoderLayer(
|
| 202 |
+
(self_attn): MultiHeadedAttention(
|
| 203 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 204 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 205 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 206 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 207 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 208 |
+
)
|
| 209 |
+
(feed_forward): MultiLayeredConv1d(
|
| 210 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 211 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 212 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 213 |
+
)
|
| 214 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 215 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 216 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 217 |
+
)
|
| 218 |
+
(1): EncoderLayer(
|
| 219 |
+
(self_attn): MultiHeadedAttention(
|
| 220 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 221 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 222 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 223 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 224 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 225 |
+
)
|
| 226 |
+
(feed_forward): MultiLayeredConv1d(
|
| 227 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 228 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 229 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 230 |
+
)
|
| 231 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 232 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 233 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 234 |
+
)
|
| 235 |
+
(2): EncoderLayer(
|
| 236 |
+
(self_attn): MultiHeadedAttention(
|
| 237 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 238 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 239 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 240 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 241 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 242 |
+
)
|
| 243 |
+
(feed_forward): MultiLayeredConv1d(
|
| 244 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 245 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 246 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 247 |
+
)
|
| 248 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 249 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 250 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 251 |
+
)
|
| 252 |
+
(3): EncoderLayer(
|
| 253 |
+
(self_attn): MultiHeadedAttention(
|
| 254 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 255 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 256 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 257 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 258 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 259 |
+
)
|
| 260 |
+
(feed_forward): MultiLayeredConv1d(
|
| 261 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 262 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 263 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 264 |
+
)
|
| 265 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 266 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 267 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 268 |
+
)
|
| 269 |
+
)
|
| 270 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 271 |
+
)
|
| 272 |
+
(generator): HiFiGANGenerator(
|
| 273 |
+
(input_conv): Conv1d(256, 512, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 274 |
+
(upsamples): ModuleList(
|
| 275 |
+
(0): Sequential(
|
| 276 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 277 |
+
(1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 278 |
+
)
|
| 279 |
+
(1): Sequential(
|
| 280 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 281 |
+
(1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 282 |
+
)
|
| 283 |
+
(2): Sequential(
|
| 284 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 285 |
+
(1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 286 |
+
)
|
| 287 |
+
(3): Sequential(
|
| 288 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 289 |
+
(1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 290 |
+
)
|
| 291 |
+
)
|
| 292 |
+
(blocks): ModuleList(
|
| 293 |
+
(0): ResidualBlock(
|
| 294 |
+
(convs1): ModuleList(
|
| 295 |
+
(0): Sequential(
|
| 296 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 297 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 298 |
+
)
|
| 299 |
+
(1): Sequential(
|
| 300 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 301 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 302 |
+
)
|
| 303 |
+
(2): Sequential(
|
| 304 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 305 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 306 |
+
)
|
| 307 |
+
)
|
| 308 |
+
(convs2): ModuleList(
|
| 309 |
+
(0): Sequential(
|
| 310 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 311 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 312 |
+
)
|
| 313 |
+
(1): Sequential(
|
| 314 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 315 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 316 |
+
)
|
| 317 |
+
(2): Sequential(
|
| 318 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 319 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 320 |
+
)
|
| 321 |
+
)
|
| 322 |
+
)
|
| 323 |
+
(1): ResidualBlock(
|
| 324 |
+
(convs1): ModuleList(
|
| 325 |
+
(0): Sequential(
|
| 326 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 327 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 328 |
+
)
|
| 329 |
+
(1): Sequential(
|
| 330 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 331 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 332 |
+
)
|
| 333 |
+
(2): Sequential(
|
| 334 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 335 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 336 |
+
)
|
| 337 |
+
)
|
| 338 |
+
(convs2): ModuleList(
|
| 339 |
+
(0): Sequential(
|
| 340 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 341 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 342 |
+
)
|
| 343 |
+
(1): Sequential(
|
| 344 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 345 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 346 |
+
)
|
| 347 |
+
(2): Sequential(
|
| 348 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 349 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 350 |
+
)
|
| 351 |
+
)
|
| 352 |
+
)
|
| 353 |
+
(2): ResidualBlock(
|
| 354 |
+
(convs1): ModuleList(
|
| 355 |
+
(0): Sequential(
|
| 356 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 357 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 358 |
+
)
|
| 359 |
+
(1): Sequential(
|
| 360 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 361 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 362 |
+
)
|
| 363 |
+
(2): Sequential(
|
| 364 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 365 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 366 |
+
)
|
| 367 |
+
)
|
| 368 |
+
(convs2): ModuleList(
|
| 369 |
+
(0): Sequential(
|
| 370 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 371 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 372 |
+
)
|
| 373 |
+
(1): Sequential(
|
| 374 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 375 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 376 |
+
)
|
| 377 |
+
(2): Sequential(
|
| 378 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 379 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 380 |
+
)
|
| 381 |
+
)
|
| 382 |
+
)
|
| 383 |
+
(3): ResidualBlock(
|
| 384 |
+
(convs1): ModuleList(
|
| 385 |
+
(0): Sequential(
|
| 386 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 387 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 388 |
+
)
|
| 389 |
+
(1): Sequential(
|
| 390 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 391 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 392 |
+
)
|
| 393 |
+
(2): Sequential(
|
| 394 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 395 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 396 |
+
)
|
| 397 |
+
)
|
| 398 |
+
(convs2): ModuleList(
|
| 399 |
+
(0): Sequential(
|
| 400 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 401 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 402 |
+
)
|
| 403 |
+
(1): Sequential(
|
| 404 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 405 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 406 |
+
)
|
| 407 |
+
(2): Sequential(
|
| 408 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 409 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 410 |
+
)
|
| 411 |
+
)
|
| 412 |
+
)
|
| 413 |
+
(4): ResidualBlock(
|
| 414 |
+
(convs1): ModuleList(
|
| 415 |
+
(0): Sequential(
|
| 416 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 417 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 418 |
+
)
|
| 419 |
+
(1): Sequential(
|
| 420 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 421 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 422 |
+
)
|
| 423 |
+
(2): Sequential(
|
| 424 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 425 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 426 |
+
)
|
| 427 |
+
)
|
| 428 |
+
(convs2): ModuleList(
|
| 429 |
+
(0): Sequential(
|
| 430 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 431 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 432 |
+
)
|
| 433 |
+
(1): Sequential(
|
| 434 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 435 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 436 |
+
)
|
| 437 |
+
(2): Sequential(
|
| 438 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 439 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 440 |
+
)
|
| 441 |
+
)
|
| 442 |
+
)
|
| 443 |
+
(5): ResidualBlock(
|
| 444 |
+
(convs1): ModuleList(
|
| 445 |
+
(0): Sequential(
|
| 446 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 447 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 448 |
+
)
|
| 449 |
+
(1): Sequential(
|
| 450 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 451 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 452 |
+
)
|
| 453 |
+
(2): Sequential(
|
| 454 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 455 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 456 |
+
)
|
| 457 |
+
)
|
| 458 |
+
(convs2): ModuleList(
|
| 459 |
+
(0): Sequential(
|
| 460 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 461 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 462 |
+
)
|
| 463 |
+
(1): Sequential(
|
| 464 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 465 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 466 |
+
)
|
| 467 |
+
(2): Sequential(
|
| 468 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 469 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 470 |
+
)
|
| 471 |
+
)
|
| 472 |
+
)
|
| 473 |
+
(6): ResidualBlock(
|
| 474 |
+
(convs1): ModuleList(
|
| 475 |
+
(0): Sequential(
|
| 476 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 477 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 478 |
+
)
|
| 479 |
+
(1): Sequential(
|
| 480 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 481 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 482 |
+
)
|
| 483 |
+
(2): Sequential(
|
| 484 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 485 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 486 |
+
)
|
| 487 |
+
)
|
| 488 |
+
(convs2): ModuleList(
|
| 489 |
+
(0): Sequential(
|
| 490 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 491 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 492 |
+
)
|
| 493 |
+
(1): Sequential(
|
| 494 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 495 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 496 |
+
)
|
| 497 |
+
(2): Sequential(
|
| 498 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 499 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 500 |
+
)
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
(7): ResidualBlock(
|
| 504 |
+
(convs1): ModuleList(
|
| 505 |
+
(0): Sequential(
|
| 506 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 507 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 508 |
+
)
|
| 509 |
+
(1): Sequential(
|
| 510 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 511 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 512 |
+
)
|
| 513 |
+
(2): Sequential(
|
| 514 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 515 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 516 |
+
)
|
| 517 |
+
)
|
| 518 |
+
(convs2): ModuleList(
|
| 519 |
+
(0): Sequential(
|
| 520 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 521 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 522 |
+
)
|
| 523 |
+
(1): Sequential(
|
| 524 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 525 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 526 |
+
)
|
| 527 |
+
(2): Sequential(
|
| 528 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 529 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 530 |
+
)
|
| 531 |
+
)
|
| 532 |
+
)
|
| 533 |
+
(8): ResidualBlock(
|
| 534 |
+
(convs1): ModuleList(
|
| 535 |
+
(0): Sequential(
|
| 536 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 537 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 538 |
+
)
|
| 539 |
+
(1): Sequential(
|
| 540 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 541 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 542 |
+
)
|
| 543 |
+
(2): Sequential(
|
| 544 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 545 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 546 |
+
)
|
| 547 |
+
)
|
| 548 |
+
(convs2): ModuleList(
|
| 549 |
+
(0): Sequential(
|
| 550 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 551 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 552 |
+
)
|
| 553 |
+
(1): Sequential(
|
| 554 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 555 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 556 |
+
)
|
| 557 |
+
(2): Sequential(
|
| 558 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 559 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 560 |
+
)
|
| 561 |
+
)
|
| 562 |
+
)
|
| 563 |
+
(9): ResidualBlock(
|
| 564 |
+
(convs1): ModuleList(
|
| 565 |
+
(0): Sequential(
|
| 566 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 567 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 568 |
+
)
|
| 569 |
+
(1): Sequential(
|
| 570 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 571 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 572 |
+
)
|
| 573 |
+
(2): Sequential(
|
| 574 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 575 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 576 |
+
)
|
| 577 |
+
)
|
| 578 |
+
(convs2): ModuleList(
|
| 579 |
+
(0): Sequential(
|
| 580 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 581 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 582 |
+
)
|
| 583 |
+
(1): Sequential(
|
| 584 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 585 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 586 |
+
)
|
| 587 |
+
(2): Sequential(
|
| 588 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 589 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 590 |
+
)
|
| 591 |
+
)
|
| 592 |
+
)
|
| 593 |
+
(10): ResidualBlock(
|
| 594 |
+
(convs1): ModuleList(
|
| 595 |
+
(0): Sequential(
|
| 596 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 597 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 598 |
+
)
|
| 599 |
+
(1): Sequential(
|
| 600 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 601 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 602 |
+
)
|
| 603 |
+
(2): Sequential(
|
| 604 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 605 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 606 |
+
)
|
| 607 |
+
)
|
| 608 |
+
(convs2): ModuleList(
|
| 609 |
+
(0): Sequential(
|
| 610 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 611 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 612 |
+
)
|
| 613 |
+
(1): Sequential(
|
| 614 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 615 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 616 |
+
)
|
| 617 |
+
(2): Sequential(
|
| 618 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 619 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 620 |
+
)
|
| 621 |
+
)
|
| 622 |
+
)
|
| 623 |
+
(11): ResidualBlock(
|
| 624 |
+
(convs1): ModuleList(
|
| 625 |
+
(0): Sequential(
|
| 626 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 627 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 628 |
+
)
|
| 629 |
+
(1): Sequential(
|
| 630 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 631 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 632 |
+
)
|
| 633 |
+
(2): Sequential(
|
| 634 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 635 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 636 |
+
)
|
| 637 |
+
)
|
| 638 |
+
(convs2): ModuleList(
|
| 639 |
+
(0): Sequential(
|
| 640 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 641 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 642 |
+
)
|
| 643 |
+
(1): Sequential(
|
| 644 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 645 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 646 |
+
)
|
| 647 |
+
(2): Sequential(
|
| 648 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 649 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 650 |
+
)
|
| 651 |
+
)
|
| 652 |
+
)
|
| 653 |
+
)
|
| 654 |
+
(output_conv): Sequential(
|
| 655 |
+
(0): LeakyReLU(negative_slope=0.01)
|
| 656 |
+
(1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 657 |
+
(2): Tanh()
|
| 658 |
+
)
|
| 659 |
+
)
|
| 660 |
+
)
|
| 661 |
+
(discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
|
| 662 |
+
(msd): HiFiGANMultiScaleDiscriminator(
|
| 663 |
+
(discriminators): ModuleList(
|
| 664 |
+
(0): HiFiGANScaleDiscriminator(
|
| 665 |
+
(layers): ModuleList(
|
| 666 |
+
(0): Sequential(
|
| 667 |
+
(0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
|
| 668 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 669 |
+
)
|
| 670 |
+
(1): Sequential(
|
| 671 |
+
(0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
|
| 672 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 673 |
+
)
|
| 674 |
+
(2): Sequential(
|
| 675 |
+
(0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
|
| 676 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 677 |
+
)
|
| 678 |
+
(3): Sequential(
|
| 679 |
+
(0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 680 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 681 |
+
)
|
| 682 |
+
(4): Sequential(
|
| 683 |
+
(0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 684 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 685 |
+
)
|
| 686 |
+
(5): Sequential(
|
| 687 |
+
(0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
|
| 688 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 689 |
+
)
|
| 690 |
+
(6): Sequential(
|
| 691 |
+
(0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 692 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 693 |
+
)
|
| 694 |
+
(7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 695 |
+
)
|
| 696 |
+
)
|
| 697 |
+
)
|
| 698 |
+
)
|
| 699 |
+
(mpd): HiFiGANMultiPeriodDiscriminator(
|
| 700 |
+
(discriminators): ModuleList(
|
| 701 |
+
(0): HiFiGANPeriodDiscriminator(
|
| 702 |
+
(convs): ModuleList(
|
| 703 |
+
(0): Sequential(
|
| 704 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 705 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 706 |
+
)
|
| 707 |
+
(1): Sequential(
|
| 708 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 709 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 710 |
+
)
|
| 711 |
+
(2): Sequential(
|
| 712 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 713 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 714 |
+
)
|
| 715 |
+
(3): Sequential(
|
| 716 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 717 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 718 |
+
)
|
| 719 |
+
(4): Sequential(
|
| 720 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 721 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 722 |
+
)
|
| 723 |
+
)
|
| 724 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 725 |
+
)
|
| 726 |
+
(1): HiFiGANPeriodDiscriminator(
|
| 727 |
+
(convs): ModuleList(
|
| 728 |
+
(0): Sequential(
|
| 729 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 730 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 731 |
+
)
|
| 732 |
+
(1): Sequential(
|
| 733 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 734 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 735 |
+
)
|
| 736 |
+
(2): Sequential(
|
| 737 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 738 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 739 |
+
)
|
| 740 |
+
(3): Sequential(
|
| 741 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 742 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 743 |
+
)
|
| 744 |
+
(4): Sequential(
|
| 745 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 746 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 747 |
+
)
|
| 748 |
+
)
|
| 749 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 750 |
+
)
|
| 751 |
+
(2): HiFiGANPeriodDiscriminator(
|
| 752 |
+
(convs): ModuleList(
|
| 753 |
+
(0): Sequential(
|
| 754 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 755 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 756 |
+
)
|
| 757 |
+
(1): Sequential(
|
| 758 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 759 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 760 |
+
)
|
| 761 |
+
(2): Sequential(
|
| 762 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 763 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 764 |
+
)
|
| 765 |
+
(3): Sequential(
|
| 766 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 767 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 768 |
+
)
|
| 769 |
+
(4): Sequential(
|
| 770 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 771 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 772 |
+
)
|
| 773 |
+
)
|
| 774 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 775 |
+
)
|
| 776 |
+
(3): HiFiGANPeriodDiscriminator(
|
| 777 |
+
(convs): ModuleList(
|
| 778 |
+
(0): Sequential(
|
| 779 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 780 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 781 |
+
)
|
| 782 |
+
(1): Sequential(
|
| 783 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 784 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 785 |
+
)
|
| 786 |
+
(2): Sequential(
|
| 787 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 788 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 789 |
+
)
|
| 790 |
+
(3): Sequential(
|
| 791 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 792 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 793 |
+
)
|
| 794 |
+
(4): Sequential(
|
| 795 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 796 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 797 |
+
)
|
| 798 |
+
)
|
| 799 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 800 |
+
)
|
| 801 |
+
(4): HiFiGANPeriodDiscriminator(
|
| 802 |
+
(convs): ModuleList(
|
| 803 |
+
(0): Sequential(
|
| 804 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 805 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 806 |
+
)
|
| 807 |
+
(1): Sequential(
|
| 808 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 809 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 810 |
+
)
|
| 811 |
+
(2): Sequential(
|
| 812 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 813 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 814 |
+
)
|
| 815 |
+
(3): Sequential(
|
| 816 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 817 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 818 |
+
)
|
| 819 |
+
(4): Sequential(
|
| 820 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 821 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 822 |
+
)
|
| 823 |
+
)
|
| 824 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 825 |
+
)
|
| 826 |
+
)
|
| 827 |
+
)
|
| 828 |
+
)
|
| 829 |
+
(generator_adv_loss): GeneratorAdversarialLoss()
|
| 830 |
+
(discriminator_adv_loss): DiscriminatorAdversarialLoss()
|
| 831 |
+
(feat_match_loss): FeatureMatchLoss()
|
| 832 |
+
(mel_loss): MelSpectrogramLoss(
|
| 833 |
+
(wav_to_mel): LogMelFbank(
|
| 834 |
+
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
|
| 835 |
+
(logmel): LogMel(sr=24000, n_fft=1024, n_mels=80, fmin=0, fmax=12000.0, htk=False)
|
| 836 |
+
)
|
| 837 |
+
)
|
| 838 |
+
(var_loss): VarianceLoss(
|
| 839 |
+
(mse_criterion): MSELoss()
|
| 840 |
+
(duration_criterion): DurationPredictorLoss(
|
| 841 |
+
(criterion): MSELoss()
|
| 842 |
+
)
|
| 843 |
+
)
|
| 844 |
+
(forwardsum_loss): ForwardSumLoss()
|
| 845 |
+
)
|
| 846 |
+
)
|
| 847 |
+
|
| 848 |
+
Model summary:
|
| 849 |
+
Class Name: ESPnetGANTTSModel
|
| 850 |
+
Total Number of model parameters: 83.28 M
|
| 851 |
+
Number of trainable parameters: 83.28 M (100.0%)
|
| 852 |
+
Size: 333.11 MB
|
| 853 |
+
Type: torch.float32
|
| 854 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,870 (abs_task:1161) INFO: Optimizer:
|
| 855 |
+
AdamW (
|
| 856 |
+
Parameter Group 0
|
| 857 |
+
amsgrad: False
|
| 858 |
+
betas: [0.8, 0.99]
|
| 859 |
+
eps: 1e-09
|
| 860 |
+
initial_lr: 0.0002
|
| 861 |
+
lr: 0.0002
|
| 862 |
+
weight_decay: 0.0
|
| 863 |
+
)
|
| 864 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,870 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f3cb00fb550>
|
| 865 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,870 (abs_task:1161) INFO: Optimizer2:
|
| 866 |
+
AdamW (
|
| 867 |
+
Parameter Group 0
|
| 868 |
+
amsgrad: False
|
| 869 |
+
betas: [0.8, 0.99]
|
| 870 |
+
eps: 1e-09
|
| 871 |
+
initial_lr: 0.0002
|
| 872 |
+
lr: 0.0002
|
| 873 |
+
weight_decay: 0.0
|
| 874 |
+
)
|
| 875 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,870 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f3dab0d89d0>
|
| 876 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:47,871 (abs_task:1171) INFO: Saving the configuration in exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml
|
| 877 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,205 (abs_task:1525) INFO: [train] dataset:
|
| 878 |
+
ESPnetDataset(
|
| 879 |
+
text: {"path": "dump/raw/jvs010_tr_no_dev/text", "type": "text"}
|
| 880 |
+
speech: {"path": "dump/raw/jvs010_tr_no_dev/wav.scp", "type": "sound"}
|
| 881 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f3cb00fbdc0>)
|
| 882 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,205 (abs_task:1526) INFO: [train] Batch sampler: NumElementsBatchSampler(N-batch=3, batch_bins=9000000, sort_in_batch=descending, sort_batch=descending)
|
| 883 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,205 (abs_task:1527) INFO: [train] mini-batch sizes summary: N-batch=3, mean=33.3, min=6, max=53
|
| 884 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,225 (abs_task:1525) INFO: [valid] dataset:
|
| 885 |
+
ESPnetDataset(
|
| 886 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 887 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 888 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f3cb00fb520>)
|
| 889 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,225 (abs_task:1526) INFO: [valid] Batch sampler: NumElementsBatchSampler(N-batch=1, batch_bins=9000000, sort_in_batch=descending, sort_batch=descending)
|
| 890 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,225 (abs_task:1527) INFO: [valid] mini-batch sizes summary: N-batch=1, mean=15.0, min=15, max=15
|
| 891 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,244 (abs_task:1525) INFO: [plot_att] dataset:
|
| 892 |
+
ESPnetDataset(
|
| 893 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 894 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 895 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f3cb00b7130>)
|
| 896 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,244 (abs_task:1526) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=15, batch_size=1, key_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn,
|
| 897 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,244 (abs_task:1527) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
|
| 898 |
+
92b100c97f43:1179446:1179446 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 899 |
+
92b100c97f43:1179446:1179446 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 900 |
+
|
| 901 |
+
92b100c97f43:1179446:1179446 [0] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 902 |
+
92b100c97f43:1179446:1179446 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 903 |
+
92b100c97f43:1179446:1179446 [0] NCCL INFO Using network Socket
|
| 904 |
+
NCCL version 2.10.3+cuda11.3
|
| 905 |
+
92b100c97f43:1179447:1179447 [1] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 906 |
+
92b100c97f43:1179449:1179449 [3] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 907 |
+
92b100c97f43:1179447:1179447 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 908 |
+
92b100c97f43:1179449:1179449 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 909 |
+
|
| 910 |
+
92b100c97f43:1179447:1179447 [1] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 911 |
+
|
| 912 |
+
92b100c97f43:1179449:1179449 [3] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 913 |
+
92b100c97f43:1179447:1179447 [1] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 914 |
+
92b100c97f43:1179449:1179449 [3] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 915 |
+
92b100c97f43:1179447:1179447 [1] NCCL INFO Using network Socket
|
| 916 |
+
92b100c97f43:1179449:1179449 [3] NCCL INFO Using network Socket
|
| 917 |
+
92b100c97f43:1179448:1179448 [2] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 918 |
+
92b100c97f43:1179448:1179448 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 919 |
+
|
| 920 |
+
92b100c97f43:1179448:1179448 [2] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 921 |
+
92b100c97f43:1179448:1179448 [2] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 922 |
+
92b100c97f43:1179448:1179448 [2] NCCL INFO Using network Socket
|
| 923 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Channel 00/02 : 0 1 2 3
|
| 924 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
|
| 925 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
|
| 926 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
|
| 927 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Channel 01/02 : 0 1 2 3
|
| 928 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Setting affinity for GPU 1 to ffff,ffffffff
|
| 929 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Setting affinity for GPU 2 to ffff,ffffffff
|
| 930 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1
|
| 931 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Setting affinity for GPU 3 to ffff,ffffffff
|
| 932 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Setting affinity for GPU 0 to ffff,ffffffff
|
| 933 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Channel 00 : 3[60] -> 0[30] via direct shared memory
|
| 934 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Channel 00 : 2[50] -> 3[60] via direct shared memory
|
| 935 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Channel 01 : 3[60] -> 0[30] via direct shared memory
|
| 936 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Channel 01 : 2[50] -> 3[60] via direct shared memory
|
| 937 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Channel 00 : 1[40] -> 2[50] via direct shared memory
|
| 938 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Channel 00 : 0[30] -> 1[40] via direct shared memory
|
| 939 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Channel 01 : 0[30] -> 1[40] via direct shared memory
|
| 940 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Channel 01 : 1[40] -> 2[50] via direct shared memory
|
| 941 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Connected all rings
|
| 942 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Connected all rings
|
| 943 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Connected all rings
|
| 944 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Channel 00 : 2[50] -> 1[40] via direct shared memory
|
| 945 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Channel 01 : 2[50] -> 1[40] via direct shared memory
|
| 946 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Connected all rings
|
| 947 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Channel 00 : 3[60] -> 2[50] via direct shared memory
|
| 948 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Channel 01 : 3[60] -> 2[50] via direct shared memory
|
| 949 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO Connected all trees
|
| 950 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 951 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 952 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Channel 00 : 1[40] -> 0[30] via direct shared memory
|
| 953 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Channel 01 : 1[40] -> 0[30] via direct shared memory
|
| 954 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO Connected all trees
|
| 955 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 956 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 957 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO Connected all trees
|
| 958 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO Connected all trees
|
| 959 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 960 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 961 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 962 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 963 |
+
92b100c97f43:1179449:1179486 [3] NCCL INFO comm 0x7f41ac0030d0 rank 3 nranks 4 cudaDev 3 busId 60 - Init COMPLETE
|
| 964 |
+
92b100c97f43:1179448:1179487 [2] NCCL INFO comm 0x7fc9b80030d0 rank 2 nranks 4 cudaDev 2 busId 50 - Init COMPLETE
|
| 965 |
+
92b100c97f43:1179447:1179485 [1] NCCL INFO comm 0x7fe4540030d0 rank 1 nranks 4 cudaDev 1 busId 40 - Init COMPLETE
|
| 966 |
+
92b100c97f43:1179446:1179484 [0] NCCL INFO comm 0x7f3bc40030d0 rank 0 nranks 4 cudaDev 0 busId 30 - Init COMPLETE
|
| 967 |
+
92b100c97f43:1179446:1179446 [0] NCCL INFO Launch mode Parallel
|
| 968 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:48,689 (trainer:280) INFO: 1/130epoch started
|
| 969 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 970 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 971 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 972 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 973 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 974 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 975 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 976 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 977 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 978 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 979 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 980 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 981 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 982 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 983 |
+
[92b100c97f43:0/4] 2025-03-04 21:32:50,772 (gan_trainer:305) INFO: 1epoch:train:1-50batch: iter_time=0.045, generator_forward_time=0.898, generator_loss=140.208, generator_g_loss=110.977, generator_var_loss=5.358, generator_align_loss=23.874, generator_g_mel_loss=107.112, generator_g_adv_loss=2.215, generator_g_feat_match_loss=1.649, generator_var_dur_loss=0.605, generator_var_pitch_loss=2.439, generator_var_energy_loss=2.314, generator_align_forwardsum_loss=10.578, generator_align_bin_loss=1.359, generator_backward_time=0.317, generator_optim_step_time=0.033, optim0_lr0=2.000e-04, generator_train_time=1.350, discriminator_forward_time=0.690, discriminator_loss=2.784, discriminator_real_loss=1.570, discriminator_fake_loss=1.214, discriminator_backward_time=0.228, discriminator_optim_step_time=0.008, optim1_lr0=2.000e-04, discriminator_train_time=0.965, train_time=2.439
|
| 984 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 985 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 986 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 987 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 988 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 989 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 990 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 991 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 992 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 993 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 994 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 995 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 996 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 997 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 998 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 999 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1000 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1001 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1002 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1003 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1004 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1005 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1006 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1007 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1008 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1009 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1010 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1011 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1012 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1013 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1014 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1015 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1016 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1017 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1018 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1019 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1020 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1021 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1022 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1023 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1024 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1025 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1026 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1027 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1028 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1029 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1030 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1031 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1032 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1033 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1034 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1035 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1036 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1037 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1038 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1039 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1040 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1041 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1042 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1043 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1044 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1045 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1046 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1047 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1048 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1049 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1050 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1051 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1052 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1053 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1054 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1055 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1056 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1057 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1058 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1059 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1060 |
+
Traceback (most recent call last):
|
| 1061 |
+
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
|
| 1062 |
+
return _run_code(code, main_globals, None,
|
| 1063 |
+
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
|
| 1064 |
+
exec(code, run_globals)
|
| 1065 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 22, in <module>
|
| 1066 |
+
main()
|
| 1067 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 18, in main
|
| 1068 |
+
GANTTSTask.main(cmd=cmd)
|
| 1069 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1069, in main
|
| 1070 |
+
while not ProcessContext(processes, error_queues).join():
|
| 1071 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 99, in join
|
| 1072 |
+
ready = multiprocessing.connection.wait(
|
| 1073 |
+
File "/usr/lib/python3.8/multiprocessing/connection.py", line 931, in wait
|
| 1074 |
+
ready = selector.select(timeout)
|
| 1075 |
+
File "/usr/lib/python3.8/selectors.py", line 415, in select
|
| 1076 |
+
fd_event_list = self._selector.poll(timeout)
|
| 1077 |
+
KeyboardInterrupt
|
| 1078 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1079 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1080 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1081 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1082 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1083 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1084 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1085 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1086 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1087 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1088 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1089 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1090 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1091 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1092 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1093 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1094 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1095 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1096 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1097 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1098 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1099 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1100 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1101 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1102 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1103 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1104 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1105 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1106 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1107 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1108 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1109 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1110 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1111 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1112 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1113 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1114 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1115 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1116 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1117 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1118 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1119 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1120 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1121 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1122 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1123 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1124 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1125 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1126 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1127 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1128 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1129 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1130 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1131 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1132 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1133 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1134 |
+
Process SpawnProcess-2:
|
| 1135 |
+
Traceback (most recent call last):
|
| 1136 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1137 |
+
self.run()
|
| 1138 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1139 |
+
self._target(*self._args, **self._kwargs)
|
| 1140 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1141 |
+
cls.trainer.run(
|
| 1142 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1143 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1144 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1145 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1146 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 41, in recursive_average
|
| 1147 |
+
obj = recursive_sum(obj, weight, distributed)
|
| 1148 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in recursive_sum
|
| 1149 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1150 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in <dictcomp>
|
| 1151 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1152 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 18, in recursive_sum
|
| 1153 |
+
torch.distributed.all_reduce(obj, op=ReduceOp.SUM)
|
| 1154 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/distributed/distributed_c10d.py", line 1292, in all_reduce
|
| 1155 |
+
work.wait()
|
| 1156 |
+
KeyboardInterrupt
|
| 1157 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1158 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1159 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1160 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1161 |
+
Process SpawnProcess-4:
|
| 1162 |
+
Traceback (most recent call last):
|
| 1163 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1164 |
+
self.run()
|
| 1165 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1166 |
+
self._target(*self._args, **self._kwargs)
|
| 1167 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1168 |
+
cls.trainer.run(
|
| 1169 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1170 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1171 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1172 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1173 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 41, in recursive_average
|
| 1174 |
+
obj = recursive_sum(obj, weight, distributed)
|
| 1175 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in recursive_sum
|
| 1176 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1177 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in <dictcomp>
|
| 1178 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1179 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 18, in recursive_sum
|
| 1180 |
+
torch.distributed.all_reduce(obj, op=ReduceOp.SUM)
|
| 1181 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/distributed/distributed_c10d.py", line 1292, in all_reduce
|
| 1182 |
+
work.wait()
|
| 1183 |
+
KeyboardInterrupt
|
| 1184 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1185 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1186 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1187 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1188 |
+
Process SpawnProcess-3:
|
| 1189 |
+
Traceback (most recent call last):
|
| 1190 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1191 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1192 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 41, in recursive_average
|
| 1193 |
+
obj = recursive_sum(obj, weight, distributed)
|
| 1194 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in recursive_sum
|
| 1195 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1196 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in <dictcomp>
|
| 1197 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1198 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 18, in recursive_sum
|
| 1199 |
+
torch.distributed.all_reduce(obj, op=ReduceOp.SUM)
|
| 1200 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/distributed/distributed_c10d.py", line 1292, in all_reduce
|
| 1201 |
+
work.wait()
|
| 1202 |
+
RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800396 milliseconds before timing out.
|
| 1203 |
+
|
| 1204 |
+
During handling of the above exception, another exception occurred:
|
| 1205 |
+
|
| 1206 |
+
Traceback (most recent call last):
|
| 1207 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1208 |
+
self.run()
|
| 1209 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1210 |
+
self._target(*self._args, **self._kwargs)
|
| 1211 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1212 |
+
cls.trainer.run(
|
| 1213 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1214 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1215 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1216 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1217 |
+
KeyboardInterrupt
|
| 1218 |
+
Process SpawnProcess-1:
|
| 1219 |
+
Traceback (most recent call last):
|
| 1220 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1221 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1222 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 41, in recursive_average
|
| 1223 |
+
obj = recursive_sum(obj, weight, distributed)
|
| 1224 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in recursive_sum
|
| 1225 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1226 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 13, in <dictcomp>
|
| 1227 |
+
return {k: recursive_sum(v, weight, distributed) for k, v in obj.items()}
|
| 1228 |
+
File "/work/espnet/espnet2/torch_utils/recursive_op.py", line 18, in recursive_sum
|
| 1229 |
+
torch.distributed.all_reduce(obj, op=ReduceOp.SUM)
|
| 1230 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/distributed/distributed_c10d.py", line 1292, in all_reduce
|
| 1231 |
+
work.wait()
|
| 1232 |
+
RuntimeError: [Rank 0] Caught collective operation timeout: WorkNCCL(OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800456 milliseconds before timing out.
|
| 1233 |
+
|
| 1234 |
+
During handling of the above exception, another exception occurred:
|
| 1235 |
+
|
| 1236 |
+
Traceback (most recent call last):
|
| 1237 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1238 |
+
self.run()
|
| 1239 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1240 |
+
self._target(*self._args, **self._kwargs)
|
| 1241 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1242 |
+
cls.trainer.run(
|
| 1243 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1244 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1245 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 202, in train_one_epoch
|
| 1246 |
+
stats, weight = recursive_average(stats, weight, distributed)
|
| 1247 |
+
KeyboardInterrupt
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.4.log
ADDED
|
@@ -0,0 +1,1212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python3 -m espnet2.bin.gan_tts_train --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 2 |
+
# Started at Tue Mar 4 21:23:38 JST 2025
|
| 3 |
+
#
|
| 4 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
+
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 7 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 8 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 9 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:44,840 (distributed_c10d:217) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
|
| 10 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:44,840 (distributed_c10d:251) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
|
| 11 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:44,896 (gan_tts:304) INFO: Vocabulary size: 41
|
| 12 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:45,027 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 13 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:45,249 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 14 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,351 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 15 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1158) INFO: Model structure:
|
| 16 |
+
ESPnetGANTTSModel(
|
| 17 |
+
(feats_extract): LogMelFbank(
|
| 18 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 19 |
+
(logmel): LogMel(sr=24000, n_fft=2048, n_mels=80, fmin=80, fmax=7600, htk=False)
|
| 20 |
+
)
|
| 21 |
+
(normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz, norm_means=True, norm_vars=True)
|
| 22 |
+
(pitch_extract): Dio()
|
| 23 |
+
(pitch_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz, norm_means=True, norm_vars=True)
|
| 24 |
+
(energy_extract): Energy(
|
| 25 |
+
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 26 |
+
)
|
| 27 |
+
(energy_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz, norm_means=True, norm_vars=True)
|
| 28 |
+
(tts): JETS(
|
| 29 |
+
(generator): JETSGenerator(
|
| 30 |
+
(encoder): Encoder(
|
| 31 |
+
(embed): Sequential(
|
| 32 |
+
(0): Embedding(41, 256, padding_idx=0)
|
| 33 |
+
(1): ScaledPositionalEncoding(
|
| 34 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 35 |
+
)
|
| 36 |
+
)
|
| 37 |
+
(encoders): MultiSequential(
|
| 38 |
+
(0): EncoderLayer(
|
| 39 |
+
(self_attn): MultiHeadedAttention(
|
| 40 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 41 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 42 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 43 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 44 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 45 |
+
)
|
| 46 |
+
(feed_forward): MultiLayeredConv1d(
|
| 47 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 48 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 49 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 50 |
+
)
|
| 51 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 52 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 53 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 54 |
+
)
|
| 55 |
+
(1): EncoderLayer(
|
| 56 |
+
(self_attn): MultiHeadedAttention(
|
| 57 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 58 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 59 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 60 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 61 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 62 |
+
)
|
| 63 |
+
(feed_forward): MultiLayeredConv1d(
|
| 64 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 65 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 66 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 67 |
+
)
|
| 68 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 69 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 70 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 71 |
+
)
|
| 72 |
+
(2): EncoderLayer(
|
| 73 |
+
(self_attn): MultiHeadedAttention(
|
| 74 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 75 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 76 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 77 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 78 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 79 |
+
)
|
| 80 |
+
(feed_forward): MultiLayeredConv1d(
|
| 81 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 82 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 83 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 84 |
+
)
|
| 85 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 86 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 87 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 88 |
+
)
|
| 89 |
+
(3): EncoderLayer(
|
| 90 |
+
(self_attn): MultiHeadedAttention(
|
| 91 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 92 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 93 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 94 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 95 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 96 |
+
)
|
| 97 |
+
(feed_forward): MultiLayeredConv1d(
|
| 98 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 99 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 100 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 101 |
+
)
|
| 102 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 103 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 104 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 108 |
+
)
|
| 109 |
+
(duration_predictor): DurationPredictor(
|
| 110 |
+
(conv): ModuleList(
|
| 111 |
+
(0): Sequential(
|
| 112 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 113 |
+
(1): ReLU()
|
| 114 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 115 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 116 |
+
)
|
| 117 |
+
(1): Sequential(
|
| 118 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 119 |
+
(1): ReLU()
|
| 120 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 121 |
+
(3): Dropout(p=0.1, inplace=False)
|
| 122 |
+
)
|
| 123 |
+
)
|
| 124 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 125 |
+
)
|
| 126 |
+
(pitch_predictor): VariancePredictor(
|
| 127 |
+
(conv): ModuleList(
|
| 128 |
+
(0): Sequential(
|
| 129 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 130 |
+
(1): ReLU()
|
| 131 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 132 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 133 |
+
)
|
| 134 |
+
(1): Sequential(
|
| 135 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 136 |
+
(1): ReLU()
|
| 137 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 138 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 139 |
+
)
|
| 140 |
+
(2): Sequential(
|
| 141 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 142 |
+
(1): ReLU()
|
| 143 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 144 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 145 |
+
)
|
| 146 |
+
(3): Sequential(
|
| 147 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 148 |
+
(1): ReLU()
|
| 149 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 150 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 151 |
+
)
|
| 152 |
+
(4): Sequential(
|
| 153 |
+
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 154 |
+
(1): ReLU()
|
| 155 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 156 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 160 |
+
)
|
| 161 |
+
(pitch_embed): Sequential(
|
| 162 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 163 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 164 |
+
)
|
| 165 |
+
(energy_predictor): VariancePredictor(
|
| 166 |
+
(conv): ModuleList(
|
| 167 |
+
(0): Sequential(
|
| 168 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 169 |
+
(1): ReLU()
|
| 170 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 171 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 172 |
+
)
|
| 173 |
+
(1): Sequential(
|
| 174 |
+
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 175 |
+
(1): ReLU()
|
| 176 |
+
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 177 |
+
(3): Dropout(p=0.5, inplace=False)
|
| 178 |
+
)
|
| 179 |
+
)
|
| 180 |
+
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 181 |
+
)
|
| 182 |
+
(energy_embed): Sequential(
|
| 183 |
+
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 184 |
+
(1): Dropout(p=0.0, inplace=False)
|
| 185 |
+
)
|
| 186 |
+
(alignment_module): AlignmentModule(
|
| 187 |
+
(t_conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 188 |
+
(t_conv2): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 189 |
+
(f_conv1): Conv1d(80, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 190 |
+
(f_conv2): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 191 |
+
(f_conv3): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 192 |
+
)
|
| 193 |
+
(length_regulator): GaussianUpsampling()
|
| 194 |
+
(decoder): Encoder(
|
| 195 |
+
(embed): Sequential(
|
| 196 |
+
(0): ScaledPositionalEncoding(
|
| 197 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 198 |
+
)
|
| 199 |
+
)
|
| 200 |
+
(encoders): MultiSequential(
|
| 201 |
+
(0): EncoderLayer(
|
| 202 |
+
(self_attn): MultiHeadedAttention(
|
| 203 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 204 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 205 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 206 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 207 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 208 |
+
)
|
| 209 |
+
(feed_forward): MultiLayeredConv1d(
|
| 210 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 211 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 212 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 213 |
+
)
|
| 214 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 215 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 216 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 217 |
+
)
|
| 218 |
+
(1): EncoderLayer(
|
| 219 |
+
(self_attn): MultiHeadedAttention(
|
| 220 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 221 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 222 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 223 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 224 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 225 |
+
)
|
| 226 |
+
(feed_forward): MultiLayeredConv1d(
|
| 227 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 228 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 229 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 230 |
+
)
|
| 231 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 232 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 233 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 234 |
+
)
|
| 235 |
+
(2): EncoderLayer(
|
| 236 |
+
(self_attn): MultiHeadedAttention(
|
| 237 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 238 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 239 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 240 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 241 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 242 |
+
)
|
| 243 |
+
(feed_forward): MultiLayeredConv1d(
|
| 244 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 245 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 246 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 247 |
+
)
|
| 248 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 249 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 250 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 251 |
+
)
|
| 252 |
+
(3): EncoderLayer(
|
| 253 |
+
(self_attn): MultiHeadedAttention(
|
| 254 |
+
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 255 |
+
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 256 |
+
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 257 |
+
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 258 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 259 |
+
)
|
| 260 |
+
(feed_forward): MultiLayeredConv1d(
|
| 261 |
+
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 262 |
+
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 263 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 264 |
+
)
|
| 265 |
+
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 266 |
+
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 267 |
+
(dropout): Dropout(p=0.2, inplace=False)
|
| 268 |
+
)
|
| 269 |
+
)
|
| 270 |
+
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 271 |
+
)
|
| 272 |
+
(generator): HiFiGANGenerator(
|
| 273 |
+
(input_conv): Conv1d(256, 512, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 274 |
+
(upsamples): ModuleList(
|
| 275 |
+
(0): Sequential(
|
| 276 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 277 |
+
(1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 278 |
+
)
|
| 279 |
+
(1): Sequential(
|
| 280 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 281 |
+
(1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 282 |
+
)
|
| 283 |
+
(2): Sequential(
|
| 284 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 285 |
+
(1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 286 |
+
)
|
| 287 |
+
(3): Sequential(
|
| 288 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 289 |
+
(1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 290 |
+
)
|
| 291 |
+
)
|
| 292 |
+
(blocks): ModuleList(
|
| 293 |
+
(0): ResidualBlock(
|
| 294 |
+
(convs1): ModuleList(
|
| 295 |
+
(0): Sequential(
|
| 296 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 297 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 298 |
+
)
|
| 299 |
+
(1): Sequential(
|
| 300 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 301 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 302 |
+
)
|
| 303 |
+
(2): Sequential(
|
| 304 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 305 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 306 |
+
)
|
| 307 |
+
)
|
| 308 |
+
(convs2): ModuleList(
|
| 309 |
+
(0): Sequential(
|
| 310 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 311 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 312 |
+
)
|
| 313 |
+
(1): Sequential(
|
| 314 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 315 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 316 |
+
)
|
| 317 |
+
(2): Sequential(
|
| 318 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 319 |
+
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 320 |
+
)
|
| 321 |
+
)
|
| 322 |
+
)
|
| 323 |
+
(1): ResidualBlock(
|
| 324 |
+
(convs1): ModuleList(
|
| 325 |
+
(0): Sequential(
|
| 326 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 327 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 328 |
+
)
|
| 329 |
+
(1): Sequential(
|
| 330 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 331 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 332 |
+
)
|
| 333 |
+
(2): Sequential(
|
| 334 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 335 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 336 |
+
)
|
| 337 |
+
)
|
| 338 |
+
(convs2): ModuleList(
|
| 339 |
+
(0): Sequential(
|
| 340 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 341 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 342 |
+
)
|
| 343 |
+
(1): Sequential(
|
| 344 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 345 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 346 |
+
)
|
| 347 |
+
(2): Sequential(
|
| 348 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 349 |
+
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 350 |
+
)
|
| 351 |
+
)
|
| 352 |
+
)
|
| 353 |
+
(2): ResidualBlock(
|
| 354 |
+
(convs1): ModuleList(
|
| 355 |
+
(0): Sequential(
|
| 356 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 357 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 358 |
+
)
|
| 359 |
+
(1): Sequential(
|
| 360 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 361 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 362 |
+
)
|
| 363 |
+
(2): Sequential(
|
| 364 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 365 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 366 |
+
)
|
| 367 |
+
)
|
| 368 |
+
(convs2): ModuleList(
|
| 369 |
+
(0): Sequential(
|
| 370 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 371 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 372 |
+
)
|
| 373 |
+
(1): Sequential(
|
| 374 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 375 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 376 |
+
)
|
| 377 |
+
(2): Sequential(
|
| 378 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 379 |
+
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 380 |
+
)
|
| 381 |
+
)
|
| 382 |
+
)
|
| 383 |
+
(3): ResidualBlock(
|
| 384 |
+
(convs1): ModuleList(
|
| 385 |
+
(0): Sequential(
|
| 386 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 387 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 388 |
+
)
|
| 389 |
+
(1): Sequential(
|
| 390 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 391 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 392 |
+
)
|
| 393 |
+
(2): Sequential(
|
| 394 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 395 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 396 |
+
)
|
| 397 |
+
)
|
| 398 |
+
(convs2): ModuleList(
|
| 399 |
+
(0): Sequential(
|
| 400 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 401 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 402 |
+
)
|
| 403 |
+
(1): Sequential(
|
| 404 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 405 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 406 |
+
)
|
| 407 |
+
(2): Sequential(
|
| 408 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 409 |
+
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 410 |
+
)
|
| 411 |
+
)
|
| 412 |
+
)
|
| 413 |
+
(4): ResidualBlock(
|
| 414 |
+
(convs1): ModuleList(
|
| 415 |
+
(0): Sequential(
|
| 416 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 417 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 418 |
+
)
|
| 419 |
+
(1): Sequential(
|
| 420 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 421 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 422 |
+
)
|
| 423 |
+
(2): Sequential(
|
| 424 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 425 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 426 |
+
)
|
| 427 |
+
)
|
| 428 |
+
(convs2): ModuleList(
|
| 429 |
+
(0): Sequential(
|
| 430 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 431 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 432 |
+
)
|
| 433 |
+
(1): Sequential(
|
| 434 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 435 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 436 |
+
)
|
| 437 |
+
(2): Sequential(
|
| 438 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 439 |
+
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 440 |
+
)
|
| 441 |
+
)
|
| 442 |
+
)
|
| 443 |
+
(5): ResidualBlock(
|
| 444 |
+
(convs1): ModuleList(
|
| 445 |
+
(0): Sequential(
|
| 446 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 447 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 448 |
+
)
|
| 449 |
+
(1): Sequential(
|
| 450 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 451 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 452 |
+
)
|
| 453 |
+
(2): Sequential(
|
| 454 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 455 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 456 |
+
)
|
| 457 |
+
)
|
| 458 |
+
(convs2): ModuleList(
|
| 459 |
+
(0): Sequential(
|
| 460 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 461 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 462 |
+
)
|
| 463 |
+
(1): Sequential(
|
| 464 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 465 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 466 |
+
)
|
| 467 |
+
(2): Sequential(
|
| 468 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 469 |
+
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 470 |
+
)
|
| 471 |
+
)
|
| 472 |
+
)
|
| 473 |
+
(6): ResidualBlock(
|
| 474 |
+
(convs1): ModuleList(
|
| 475 |
+
(0): Sequential(
|
| 476 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 477 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 478 |
+
)
|
| 479 |
+
(1): Sequential(
|
| 480 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 481 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 482 |
+
)
|
| 483 |
+
(2): Sequential(
|
| 484 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 485 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 486 |
+
)
|
| 487 |
+
)
|
| 488 |
+
(convs2): ModuleList(
|
| 489 |
+
(0): Sequential(
|
| 490 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 491 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 492 |
+
)
|
| 493 |
+
(1): Sequential(
|
| 494 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 495 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 496 |
+
)
|
| 497 |
+
(2): Sequential(
|
| 498 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 499 |
+
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 500 |
+
)
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
(7): ResidualBlock(
|
| 504 |
+
(convs1): ModuleList(
|
| 505 |
+
(0): Sequential(
|
| 506 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 507 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 508 |
+
)
|
| 509 |
+
(1): Sequential(
|
| 510 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 511 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 512 |
+
)
|
| 513 |
+
(2): Sequential(
|
| 514 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 515 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 516 |
+
)
|
| 517 |
+
)
|
| 518 |
+
(convs2): ModuleList(
|
| 519 |
+
(0): Sequential(
|
| 520 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 521 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 522 |
+
)
|
| 523 |
+
(1): Sequential(
|
| 524 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 525 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 526 |
+
)
|
| 527 |
+
(2): Sequential(
|
| 528 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 529 |
+
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 530 |
+
)
|
| 531 |
+
)
|
| 532 |
+
)
|
| 533 |
+
(8): ResidualBlock(
|
| 534 |
+
(convs1): ModuleList(
|
| 535 |
+
(0): Sequential(
|
| 536 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 537 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 538 |
+
)
|
| 539 |
+
(1): Sequential(
|
| 540 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 541 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 542 |
+
)
|
| 543 |
+
(2): Sequential(
|
| 544 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 545 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 546 |
+
)
|
| 547 |
+
)
|
| 548 |
+
(convs2): ModuleList(
|
| 549 |
+
(0): Sequential(
|
| 550 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 551 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 552 |
+
)
|
| 553 |
+
(1): Sequential(
|
| 554 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 555 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 556 |
+
)
|
| 557 |
+
(2): Sequential(
|
| 558 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 559 |
+
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 560 |
+
)
|
| 561 |
+
)
|
| 562 |
+
)
|
| 563 |
+
(9): ResidualBlock(
|
| 564 |
+
(convs1): ModuleList(
|
| 565 |
+
(0): Sequential(
|
| 566 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 567 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 568 |
+
)
|
| 569 |
+
(1): Sequential(
|
| 570 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 571 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 572 |
+
)
|
| 573 |
+
(2): Sequential(
|
| 574 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 575 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 576 |
+
)
|
| 577 |
+
)
|
| 578 |
+
(convs2): ModuleList(
|
| 579 |
+
(0): Sequential(
|
| 580 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 581 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 582 |
+
)
|
| 583 |
+
(1): Sequential(
|
| 584 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 585 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 586 |
+
)
|
| 587 |
+
(2): Sequential(
|
| 588 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 589 |
+
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 590 |
+
)
|
| 591 |
+
)
|
| 592 |
+
)
|
| 593 |
+
(10): ResidualBlock(
|
| 594 |
+
(convs1): ModuleList(
|
| 595 |
+
(0): Sequential(
|
| 596 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 597 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 598 |
+
)
|
| 599 |
+
(1): Sequential(
|
| 600 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 601 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 602 |
+
)
|
| 603 |
+
(2): Sequential(
|
| 604 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 605 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 606 |
+
)
|
| 607 |
+
)
|
| 608 |
+
(convs2): ModuleList(
|
| 609 |
+
(0): Sequential(
|
| 610 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 611 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 612 |
+
)
|
| 613 |
+
(1): Sequential(
|
| 614 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 615 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 616 |
+
)
|
| 617 |
+
(2): Sequential(
|
| 618 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 619 |
+
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 620 |
+
)
|
| 621 |
+
)
|
| 622 |
+
)
|
| 623 |
+
(11): ResidualBlock(
|
| 624 |
+
(convs1): ModuleList(
|
| 625 |
+
(0): Sequential(
|
| 626 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 627 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 628 |
+
)
|
| 629 |
+
(1): Sequential(
|
| 630 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 631 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 632 |
+
)
|
| 633 |
+
(2): Sequential(
|
| 634 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 635 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 636 |
+
)
|
| 637 |
+
)
|
| 638 |
+
(convs2): ModuleList(
|
| 639 |
+
(0): Sequential(
|
| 640 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 641 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 642 |
+
)
|
| 643 |
+
(1): Sequential(
|
| 644 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 645 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 646 |
+
)
|
| 647 |
+
(2): Sequential(
|
| 648 |
+
(0): LeakyReLU(negative_slope=0.1)
|
| 649 |
+
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 650 |
+
)
|
| 651 |
+
)
|
| 652 |
+
)
|
| 653 |
+
)
|
| 654 |
+
(output_conv): Sequential(
|
| 655 |
+
(0): LeakyReLU(negative_slope=0.01)
|
| 656 |
+
(1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 657 |
+
(2): Tanh()
|
| 658 |
+
)
|
| 659 |
+
)
|
| 660 |
+
)
|
| 661 |
+
(discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
|
| 662 |
+
(msd): HiFiGANMultiScaleDiscriminator(
|
| 663 |
+
(discriminators): ModuleList(
|
| 664 |
+
(0): HiFiGANScaleDiscriminator(
|
| 665 |
+
(layers): ModuleList(
|
| 666 |
+
(0): Sequential(
|
| 667 |
+
(0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
|
| 668 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 669 |
+
)
|
| 670 |
+
(1): Sequential(
|
| 671 |
+
(0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
|
| 672 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 673 |
+
)
|
| 674 |
+
(2): Sequential(
|
| 675 |
+
(0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
|
| 676 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 677 |
+
)
|
| 678 |
+
(3): Sequential(
|
| 679 |
+
(0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 680 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 681 |
+
)
|
| 682 |
+
(4): Sequential(
|
| 683 |
+
(0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 684 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 685 |
+
)
|
| 686 |
+
(5): Sequential(
|
| 687 |
+
(0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
|
| 688 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 689 |
+
)
|
| 690 |
+
(6): Sequential(
|
| 691 |
+
(0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 692 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 693 |
+
)
|
| 694 |
+
(7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 695 |
+
)
|
| 696 |
+
)
|
| 697 |
+
)
|
| 698 |
+
)
|
| 699 |
+
(mpd): HiFiGANMultiPeriodDiscriminator(
|
| 700 |
+
(discriminators): ModuleList(
|
| 701 |
+
(0): HiFiGANPeriodDiscriminator(
|
| 702 |
+
(convs): ModuleList(
|
| 703 |
+
(0): Sequential(
|
| 704 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 705 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 706 |
+
)
|
| 707 |
+
(1): Sequential(
|
| 708 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 709 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 710 |
+
)
|
| 711 |
+
(2): Sequential(
|
| 712 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 713 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 714 |
+
)
|
| 715 |
+
(3): Sequential(
|
| 716 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 717 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 718 |
+
)
|
| 719 |
+
(4): Sequential(
|
| 720 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 721 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 722 |
+
)
|
| 723 |
+
)
|
| 724 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 725 |
+
)
|
| 726 |
+
(1): HiFiGANPeriodDiscriminator(
|
| 727 |
+
(convs): ModuleList(
|
| 728 |
+
(0): Sequential(
|
| 729 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 730 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 731 |
+
)
|
| 732 |
+
(1): Sequential(
|
| 733 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 734 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 735 |
+
)
|
| 736 |
+
(2): Sequential(
|
| 737 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 738 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 739 |
+
)
|
| 740 |
+
(3): Sequential(
|
| 741 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 742 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 743 |
+
)
|
| 744 |
+
(4): Sequential(
|
| 745 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 746 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 747 |
+
)
|
| 748 |
+
)
|
| 749 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 750 |
+
)
|
| 751 |
+
(2): HiFiGANPeriodDiscriminator(
|
| 752 |
+
(convs): ModuleList(
|
| 753 |
+
(0): Sequential(
|
| 754 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 755 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 756 |
+
)
|
| 757 |
+
(1): Sequential(
|
| 758 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 759 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 760 |
+
)
|
| 761 |
+
(2): Sequential(
|
| 762 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 763 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 764 |
+
)
|
| 765 |
+
(3): Sequential(
|
| 766 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 767 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 768 |
+
)
|
| 769 |
+
(4): Sequential(
|
| 770 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 771 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 772 |
+
)
|
| 773 |
+
)
|
| 774 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 775 |
+
)
|
| 776 |
+
(3): HiFiGANPeriodDiscriminator(
|
| 777 |
+
(convs): ModuleList(
|
| 778 |
+
(0): Sequential(
|
| 779 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 780 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 781 |
+
)
|
| 782 |
+
(1): Sequential(
|
| 783 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 784 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 785 |
+
)
|
| 786 |
+
(2): Sequential(
|
| 787 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 788 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 789 |
+
)
|
| 790 |
+
(3): Sequential(
|
| 791 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 792 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 793 |
+
)
|
| 794 |
+
(4): Sequential(
|
| 795 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 796 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 797 |
+
)
|
| 798 |
+
)
|
| 799 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 800 |
+
)
|
| 801 |
+
(4): HiFiGANPeriodDiscriminator(
|
| 802 |
+
(convs): ModuleList(
|
| 803 |
+
(0): Sequential(
|
| 804 |
+
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 805 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 806 |
+
)
|
| 807 |
+
(1): Sequential(
|
| 808 |
+
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 809 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 810 |
+
)
|
| 811 |
+
(2): Sequential(
|
| 812 |
+
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 813 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 814 |
+
)
|
| 815 |
+
(3): Sequential(
|
| 816 |
+
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 817 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 818 |
+
)
|
| 819 |
+
(4): Sequential(
|
| 820 |
+
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 821 |
+
(1): LeakyReLU(negative_slope=0.1)
|
| 822 |
+
)
|
| 823 |
+
)
|
| 824 |
+
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 825 |
+
)
|
| 826 |
+
)
|
| 827 |
+
)
|
| 828 |
+
)
|
| 829 |
+
(generator_adv_loss): GeneratorAdversarialLoss()
|
| 830 |
+
(discriminator_adv_loss): DiscriminatorAdversarialLoss()
|
| 831 |
+
(feat_match_loss): FeatureMatchLoss()
|
| 832 |
+
(mel_loss): MelSpectrogramLoss(
|
| 833 |
+
(wav_to_mel): LogMelFbank(
|
| 834 |
+
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
|
| 835 |
+
(logmel): LogMel(sr=24000, n_fft=1024, n_mels=80, fmin=0, fmax=12000.0, htk=False)
|
| 836 |
+
)
|
| 837 |
+
)
|
| 838 |
+
(var_loss): VarianceLoss(
|
| 839 |
+
(mse_criterion): MSELoss()
|
| 840 |
+
(duration_criterion): DurationPredictorLoss(
|
| 841 |
+
(criterion): MSELoss()
|
| 842 |
+
)
|
| 843 |
+
)
|
| 844 |
+
(forwardsum_loss): ForwardSumLoss()
|
| 845 |
+
)
|
| 846 |
+
)
|
| 847 |
+
|
| 848 |
+
Model summary:
|
| 849 |
+
Class Name: ESPnetGANTTSModel
|
| 850 |
+
Total Number of model parameters: 83.28 M
|
| 851 |
+
Number of trainable parameters: 83.28 M (100.0%)
|
| 852 |
+
Size: 333.11 MB
|
| 853 |
+
Type: torch.float32
|
| 854 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1161) INFO: Optimizer:
|
| 855 |
+
AdamW (
|
| 856 |
+
Parameter Group 0
|
| 857 |
+
amsgrad: False
|
| 858 |
+
betas: [0.8, 0.99]
|
| 859 |
+
eps: 1e-09
|
| 860 |
+
initial_lr: 0.0002
|
| 861 |
+
lr: 0.0002
|
| 862 |
+
weight_decay: 0.0
|
| 863 |
+
)
|
| 864 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f5660199550>
|
| 865 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1161) INFO: Optimizer2:
|
| 866 |
+
AdamW (
|
| 867 |
+
Parameter Group 0
|
| 868 |
+
amsgrad: False
|
| 869 |
+
betas: [0.8, 0.99]
|
| 870 |
+
eps: 1e-09
|
| 871 |
+
initial_lr: 0.0002
|
| 872 |
+
lr: 0.0002
|
| 873 |
+
weight_decay: 0.0
|
| 874 |
+
)
|
| 875 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f5747efa9d0>
|
| 876 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1171) INFO: Saving the configuration in exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml
|
| 877 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,698 (abs_task:1525) INFO: [train] dataset:
|
| 878 |
+
ESPnetDataset(
|
| 879 |
+
text: {"path": "dump/raw/jvs010_tr_no_dev/text", "type": "text"}
|
| 880 |
+
speech: {"path": "dump/raw/jvs010_tr_no_dev/wav.scp", "type": "sound"}
|
| 881 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660199dc0>)
|
| 882 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,698 (abs_task:1526) INFO: [train] Batch sampler: NumElementsBatchSampler(N-batch=4, batch_bins=6000000, sort_in_batch=descending, sort_batch=descending)
|
| 883 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,699 (abs_task:1527) INFO: [train] mini-batch sizes summary: N-batch=4, mean=25.0, min=5, max=41
|
| 884 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1525) INFO: [valid] dataset:
|
| 885 |
+
ESPnetDataset(
|
| 886 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 887 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 888 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660199520>)
|
| 889 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1526) INFO: [valid] Batch sampler: NumElementsBatchSampler(N-batch=1, batch_bins=6000000, sort_in_batch=descending, sort_batch=descending)
|
| 890 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1527) INFO: [valid] mini-batch sizes summary: N-batch=1, mean=15.0, min=15, max=15
|
| 891 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1525) INFO: [plot_att] dataset:
|
| 892 |
+
ESPnetDataset(
|
| 893 |
+
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 894 |
+
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 895 |
+
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660155130>)
|
| 896 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1526) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=15, batch_size=1, key_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn,
|
| 897 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1527) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
|
| 898 |
+
92b100c97f43:1159464:1159464 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 899 |
+
92b100c97f43:1159464:1159464 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 900 |
+
|
| 901 |
+
92b100c97f43:1159464:1159464 [0] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 902 |
+
92b100c97f43:1159464:1159464 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 903 |
+
92b100c97f43:1159464:1159464 [0] NCCL INFO Using network Socket
|
| 904 |
+
NCCL version 2.10.3+cuda11.3
|
| 905 |
+
92b100c97f43:1159466:1159466 [2] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 906 |
+
92b100c97f43:1159465:1159465 [1] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 907 |
+
92b100c97f43:1159466:1159466 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 908 |
+
92b100c97f43:1159465:1159465 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 909 |
+
|
| 910 |
+
92b100c97f43:1159466:1159466 [2] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 911 |
+
|
| 912 |
+
92b100c97f43:1159465:1159465 [1] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 913 |
+
92b100c97f43:1159465:1159465 [1] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 914 |
+
92b100c97f43:1159466:1159466 [2] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 915 |
+
92b100c97f43:1159465:1159465 [1] NCCL INFO Using network Socket
|
| 916 |
+
92b100c97f43:1159466:1159466 [2] NCCL INFO Using network Socket
|
| 917 |
+
92b100c97f43:1159467:1159467 [3] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 918 |
+
92b100c97f43:1159467:1159467 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 919 |
+
|
| 920 |
+
92b100c97f43:1159467:1159467 [3] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 921 |
+
92b100c97f43:1159467:1159467 [3] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 922 |
+
92b100c97f43:1159467:1159467 [3] NCCL INFO Using network Socket
|
| 923 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 00/02 : 0 1 2 3
|
| 924 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
|
| 925 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
|
| 926 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 01/02 : 0 1 2 3
|
| 927 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
|
| 928 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,ffffffff
|
| 929 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1
|
| 930 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Setting affinity for GPU 3 to ffff,ffffffff
|
| 931 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Setting affinity for GPU 2 to ffff,ffffffff
|
| 932 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Setting affinity for GPU 0 to ffff,ffffffff
|
| 933 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 00 : 2[50] -> 3[60] via direct shared memory
|
| 934 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 00 : 3[60] -> 0[30] via direct shared memory
|
| 935 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 00 : 0[30] -> 1[40] via direct shared memory
|
| 936 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 00 : 1[40] -> 2[50] via direct shared memory
|
| 937 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 01 : 2[50] -> 3[60] via direct shared memory
|
| 938 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 01 : 3[60] -> 0[30] via direct shared memory
|
| 939 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 01 : 0[30] -> 1[40] via direct shared memory
|
| 940 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 01 : 1[40] -> 2[50] via direct shared memory
|
| 941 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Connected all rings
|
| 942 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Connected all rings
|
| 943 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Connected all rings
|
| 944 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Connected all rings
|
| 945 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 00 : 3[60] -> 2[50] via direct shared memory
|
| 946 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 01 : 3[60] -> 2[50] via direct shared memory
|
| 947 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 00 : 2[50] -> 1[40] via direct shared memory
|
| 948 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 00 : 1[40] -> 0[30] via direct shared memory
|
| 949 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 01 : 2[50] -> 1[40] via direct shared memory
|
| 950 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 01 : 1[40] -> 0[30] via direct shared memory
|
| 951 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO Connected all trees
|
| 952 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 953 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 954 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO Connected all trees
|
| 955 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 956 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 957 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO Connected all trees
|
| 958 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 959 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 960 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO Connected all trees
|
| 961 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 962 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 963 |
+
92b100c97f43:1159466:1159503 [2] NCCL INFO comm 0x7f35f80030d0 rank 2 nranks 4 cudaDev 2 busId 50 - Init COMPLETE
|
| 964 |
+
92b100c97f43:1159464:1159502 [0] NCCL INFO comm 0x7f55500030d0 rank 0 nranks 4 cudaDev 0 busId 30 - Init COMPLETE
|
| 965 |
+
92b100c97f43:1159464:1159464 [0] NCCL INFO Launch mode Parallel
|
| 966 |
+
92b100c97f43:1159465:1159504 [1] NCCL INFO comm 0x7f97600030d0 rank 1 nranks 4 cudaDev 1 busId 40 - Init COMPLETE
|
| 967 |
+
92b100c97f43:1159467:1159505 [3] NCCL INFO comm 0x7f66b80030d0 rank 3 nranks 4 cudaDev 3 busId 60 - Init COMPLETE
|
| 968 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 969 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 970 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 971 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 972 |
+
[92b100c97f43:0/4] 2025-03-04 21:23:55,188 (trainer:280) INFO: 1/130epoch started
|
| 973 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 974 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 975 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 976 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 977 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 978 |
+
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 979 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 980 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 981 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 982 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 983 |
+
[92b100c97f43:0/4] 2025-03-04 21:25:36,523 (gan_trainer:305) INFO: 1epoch:train:1-50batch: iter_time=0.048, generator_forward_time=0.723, generator_loss=139.743, generator_g_loss=110.582, generator_var_loss=5.224, generator_align_loss=23.937, generator_g_mel_loss=106.758, generator_g_adv_loss=2.179, generator_g_feat_match_loss=1.645, generator_var_dur_loss=0.584, generator_var_pitch_loss=2.400, generator_var_energy_loss=2.240, generator_align_forwardsum_loss=10.599, generator_align_bin_loss=1.369, generator_backward_time=0.254, generator_optim_step_time=0.034, optim0_lr0=2.000e-04, generator_train_time=1.113, discriminator_forward_time=0.544, discriminator_loss=2.766, discriminator_real_loss=1.518, discriminator_fake_loss=1.247, discriminator_backward_time=0.198, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.788, train_time=2.024
|
| 984 |
+
[92b100c97f43:0/4] 2025-03-04 21:27:08,245 (gan_trainer:305) INFO: 1epoch:train:51-100batch: iter_time=1.198e-04, generator_forward_time=0.634, generator_loss=111.648, generator_g_loss=85.935, generator_var_loss=2.179, generator_align_loss=23.534, generator_g_mel_loss=80.251, generator_g_adv_loss=2.332, generator_g_feat_match_loss=3.352, generator_var_dur_loss=0.089, generator_var_pitch_loss=0.924, generator_var_energy_loss=1.166, generator_align_forwardsum_loss=10.437, generator_align_bin_loss=1.330, generator_backward_time=0.258, generator_optim_step_time=0.034, optim0_lr0=2.000e-04, generator_train_time=1.027, discriminator_forward_time=0.548, discriminator_loss=2.396, discriminator_real_loss=1.381, discriminator_fake_loss=1.015, discriminator_backward_time=0.201, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.796, train_time=1.835
|
| 985 |
+
[92b100c97f43:0/4] 2025-03-04 21:28:38,897 (gan_trainer:305) INFO: 1epoch:train:101-150batch: iter_time=1.203e-04, generator_forward_time=0.624, generator_loss=112.406, generator_g_loss=87.597, generator_var_loss=1.890, generator_align_loss=22.919, generator_g_mel_loss=80.508, generator_g_adv_loss=2.744, generator_g_feat_match_loss=4.346, generator_var_dur_loss=0.058, generator_var_pitch_loss=0.808, generator_var_energy_loss=1.024, generator_align_forwardsum_loss=10.071, generator_align_bin_loss=1.389, generator_backward_time=0.257, generator_optim_step_time=0.033, optim0_lr0=2.000e-04, generator_train_time=1.015, discriminator_forward_time=0.539, discriminator_loss=2.084, discriminator_real_loss=1.319, discriminator_fake_loss=0.765, discriminator_backward_time=0.201, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.787, train_time=1.813
|
| 986 |
+
[92b100c97f43:0/4] 2025-03-04 21:30:10,556 (gan_trainer:305) INFO: 1epoch:train:151-200batch: iter_time=1.211e-04, generator_forward_time=0.634, generator_loss=111.480, generator_g_loss=87.402, generator_var_loss=1.820, generator_align_loss=22.257, generator_g_mel_loss=79.313, generator_g_adv_loss=2.954, generator_g_feat_match_loss=5.134, generator_var_dur_loss=0.061, generator_var_pitch_loss=0.765, generator_var_energy_loss=0.994, generator_align_forwardsum_loss=9.687, generator_align_bin_loss=1.442, generator_backward_time=0.258, generator_optim_step_time=0.033, optim0_lr0=2.000e-04, generator_train_time=1.026, discriminator_forward_time=0.548, discriminator_loss=1.898, discriminator_real_loss=1.237, discriminator_fake_loss=0.661, discriminator_backward_time=0.202, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.796, train_time=1.833
|
| 987 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 988 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 989 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 990 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 991 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 992 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 993 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 994 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 995 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 996 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 997 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 998 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 999 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1000 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1001 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1002 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1003 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1004 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1005 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1006 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1007 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1008 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1009 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1010 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1011 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1012 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1013 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1014 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1015 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1016 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1017 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1018 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1019 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1020 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1021 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1022 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1023 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1024 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1025 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1026 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1027 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1028 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1029 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1030 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1031 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1032 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1033 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1034 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1035 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1036 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1037 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1038 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1039 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1040 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1041 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1042 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1043 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1044 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1045 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1046 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1047 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1048 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1049 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1050 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1051 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1052 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1053 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1054 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1055 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1056 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1057 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1058 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1059 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1060 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1061 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1062 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1063 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1064 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1065 |
+
Traceback (most recent call last):
|
| 1066 |
+
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
|
| 1067 |
+
return _run_code(code, main_globals, None,
|
| 1068 |
+
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
|
| 1069 |
+
exec(code, run_globals)
|
| 1070 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 22, in <module>
|
| 1071 |
+
main()
|
| 1072 |
+
File "/work/espnet/espnet2/bin/gan_tts_train.py", line 18, in main
|
| 1073 |
+
GANTTSTask.main(cmd=cmd)
|
| 1074 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1069, in main
|
| 1075 |
+
while not ProcessContext(processes, error_queues).join():
|
| 1076 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/spawn.py", line 99, in join
|
| 1077 |
+
ready = multiprocessing.connection.wait(
|
| 1078 |
+
File "/usr/lib/python3.8/multiprocessing/connection.py", line 931, in wait
|
| 1079 |
+
ready = selector.select(timeout)
|
| 1080 |
+
File "/usr/lib/python3.8/selectors.py", line 415, in select
|
| 1081 |
+
fd_event_list = self._selector.poll(timeout)
|
| 1082 |
+
KeyboardInterrupt
|
| 1083 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1084 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1085 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1086 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1087 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1088 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1089 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1090 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1091 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1092 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1093 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1094 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1095 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1096 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1097 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1098 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1099 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1100 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1101 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1102 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1103 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1104 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1105 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1106 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1107 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1108 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1109 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1110 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1111 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1112 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1113 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1114 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1115 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1116 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1117 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1118 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1119 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1120 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1121 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1122 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1123 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1124 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1125 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1126 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1127 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1128 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1129 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1130 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1131 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 1132 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 1133 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1134 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1135 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1136 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1137 |
+
Process SpawnProcess-2:
|
| 1138 |
+
Traceback (most recent call last):
|
| 1139 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1140 |
+
self.run()
|
| 1141 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1142 |
+
self._target(*self._args, **self._kwargs)
|
| 1143 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1144 |
+
cls.trainer.run(
|
| 1145 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1146 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1147 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 223, in train_one_epoch
|
| 1148 |
+
loss.backward()
|
| 1149 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/_tensor.py", line 307, in backward
|
| 1150 |
+
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
|
| 1151 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/__init__.py", line 154, in backward
|
| 1152 |
+
Variable._execution_engine.run_backward(
|
| 1153 |
+
KeyboardInterrupt
|
| 1154 |
+
Process SpawnProcess-1:
|
| 1155 |
+
Traceback (most recent call last):
|
| 1156 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1157 |
+
self.run()
|
| 1158 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1159 |
+
self._target(*self._args, **self._kwargs)
|
| 1160 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1161 |
+
cls.trainer.run(
|
| 1162 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1163 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1164 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 223, in train_one_epoch
|
| 1165 |
+
loss.backward()
|
| 1166 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/_tensor.py", line 307, in backward
|
| 1167 |
+
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
|
| 1168 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/__init__.py", line 154, in backward
|
| 1169 |
+
Variable._execution_engine.run_backward(
|
| 1170 |
+
KeyboardInterrupt
|
| 1171 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1172 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1173 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1174 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1175 |
+
Process SpawnProcess-3:
|
| 1176 |
+
Traceback (most recent call last):
|
| 1177 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1178 |
+
self.run()
|
| 1179 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1180 |
+
self._target(*self._args, **self._kwargs)
|
| 1181 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1182 |
+
cls.trainer.run(
|
| 1183 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1184 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1185 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 223, in train_one_epoch
|
| 1186 |
+
loss.backward()
|
| 1187 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/_tensor.py", line 307, in backward
|
| 1188 |
+
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
|
| 1189 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/__init__.py", line 154, in backward
|
| 1190 |
+
Variable._execution_engine.run_backward(
|
| 1191 |
+
KeyboardInterrupt
|
| 1192 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1193 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1194 |
+
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 1195 |
+
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 1196 |
+
Process SpawnProcess-4:
|
| 1197 |
+
Traceback (most recent call last):
|
| 1198 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
|
| 1199 |
+
self.run()
|
| 1200 |
+
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
|
| 1201 |
+
self._target(*self._args, **self._kwargs)
|
| 1202 |
+
File "/work/espnet/espnet2/tasks/abs_task.py", line 1315, in main_worker
|
| 1203 |
+
cls.trainer.run(
|
| 1204 |
+
File "/work/espnet/espnet2/train/trainer.py", line 286, in run
|
| 1205 |
+
all_steps_are_invalid = cls.train_one_epoch(
|
| 1206 |
+
File "/work/espnet/espnet2/train/gan_trainer.py", line 223, in train_one_epoch
|
| 1207 |
+
loss.backward()
|
| 1208 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/_tensor.py", line 307, in backward
|
| 1209 |
+
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
|
| 1210 |
+
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/__init__.py", line 154, in backward
|
| 1211 |
+
Variable._execution_engine.run_backward(
|
| 1212 |
+
KeyboardInterrupt
|
exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/train.log
CHANGED
|
@@ -1,985 +1,13 @@
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 2 |
-
# Started at Tue Mar 4
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 7 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 8 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 9 |
-
[92b100c97f43:0/4] 2025-03-04
|
| 10 |
-
[92b100c97f43:0/4] 2025-03-04
|
| 11 |
-
[92b100c97f43:0/4] 2025-03-04
|
| 12 |
-
[92b100c97f43:0/4] 2025-03-04
|
| 13 |
-
[92b100c97f43:0/4] 2025-03-04
|
| 14 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,351 (abs_task:1157) INFO: pytorch.version=1.10.1+cu113, cuda.available=True, cudnn.version=8200, cudnn.benchmark=False, cudnn.deterministic=False
|
| 15 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1158) INFO: Model structure:
|
| 16 |
-
ESPnetGANTTSModel(
|
| 17 |
-
(feats_extract): LogMelFbank(
|
| 18 |
-
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 19 |
-
(logmel): LogMel(sr=24000, n_fft=2048, n_mels=80, fmin=80, fmax=7600, htk=False)
|
| 20 |
-
)
|
| 21 |
-
(normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz, norm_means=True, norm_vars=True)
|
| 22 |
-
(pitch_extract): Dio()
|
| 23 |
-
(pitch_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz, norm_means=True, norm_vars=True)
|
| 24 |
-
(energy_extract): Energy(
|
| 25 |
-
(stft): Stft(n_fft=2048, win_length=1200, hop_length=300, center=True, normalized=False, onesided=True)
|
| 26 |
-
)
|
| 27 |
-
(energy_normalize): GlobalMVN(stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz, norm_means=True, norm_vars=True)
|
| 28 |
-
(tts): JETS(
|
| 29 |
-
(generator): JETSGenerator(
|
| 30 |
-
(encoder): Encoder(
|
| 31 |
-
(embed): Sequential(
|
| 32 |
-
(0): Embedding(41, 256, padding_idx=0)
|
| 33 |
-
(1): ScaledPositionalEncoding(
|
| 34 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 35 |
-
)
|
| 36 |
-
)
|
| 37 |
-
(encoders): MultiSequential(
|
| 38 |
-
(0): EncoderLayer(
|
| 39 |
-
(self_attn): MultiHeadedAttention(
|
| 40 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 41 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 42 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 43 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 44 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 45 |
-
)
|
| 46 |
-
(feed_forward): MultiLayeredConv1d(
|
| 47 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 48 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 49 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 50 |
-
)
|
| 51 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 52 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 53 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 54 |
-
)
|
| 55 |
-
(1): EncoderLayer(
|
| 56 |
-
(self_attn): MultiHeadedAttention(
|
| 57 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 58 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 59 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 60 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 61 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 62 |
-
)
|
| 63 |
-
(feed_forward): MultiLayeredConv1d(
|
| 64 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 65 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 66 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 67 |
-
)
|
| 68 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 69 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 70 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 71 |
-
)
|
| 72 |
-
(2): EncoderLayer(
|
| 73 |
-
(self_attn): MultiHeadedAttention(
|
| 74 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 75 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 76 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 77 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 78 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 79 |
-
)
|
| 80 |
-
(feed_forward): MultiLayeredConv1d(
|
| 81 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 82 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 83 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 84 |
-
)
|
| 85 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 86 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 87 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 88 |
-
)
|
| 89 |
-
(3): EncoderLayer(
|
| 90 |
-
(self_attn): MultiHeadedAttention(
|
| 91 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 92 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 93 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 94 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 95 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 96 |
-
)
|
| 97 |
-
(feed_forward): MultiLayeredConv1d(
|
| 98 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 99 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 100 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 101 |
-
)
|
| 102 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 103 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 104 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 105 |
-
)
|
| 106 |
-
)
|
| 107 |
-
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 108 |
-
)
|
| 109 |
-
(duration_predictor): DurationPredictor(
|
| 110 |
-
(conv): ModuleList(
|
| 111 |
-
(0): Sequential(
|
| 112 |
-
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 113 |
-
(1): ReLU()
|
| 114 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 115 |
-
(3): Dropout(p=0.1, inplace=False)
|
| 116 |
-
)
|
| 117 |
-
(1): Sequential(
|
| 118 |
-
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 119 |
-
(1): ReLU()
|
| 120 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 121 |
-
(3): Dropout(p=0.1, inplace=False)
|
| 122 |
-
)
|
| 123 |
-
)
|
| 124 |
-
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 125 |
-
)
|
| 126 |
-
(pitch_predictor): VariancePredictor(
|
| 127 |
-
(conv): ModuleList(
|
| 128 |
-
(0): Sequential(
|
| 129 |
-
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 130 |
-
(1): ReLU()
|
| 131 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 132 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 133 |
-
)
|
| 134 |
-
(1): Sequential(
|
| 135 |
-
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 136 |
-
(1): ReLU()
|
| 137 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 138 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 139 |
-
)
|
| 140 |
-
(2): Sequential(
|
| 141 |
-
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 142 |
-
(1): ReLU()
|
| 143 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 144 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 145 |
-
)
|
| 146 |
-
(3): Sequential(
|
| 147 |
-
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 148 |
-
(1): ReLU()
|
| 149 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 150 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 151 |
-
)
|
| 152 |
-
(4): Sequential(
|
| 153 |
-
(0): Conv1d(256, 256, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 154 |
-
(1): ReLU()
|
| 155 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 156 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 157 |
-
)
|
| 158 |
-
)
|
| 159 |
-
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 160 |
-
)
|
| 161 |
-
(pitch_embed): Sequential(
|
| 162 |
-
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 163 |
-
(1): Dropout(p=0.0, inplace=False)
|
| 164 |
-
)
|
| 165 |
-
(energy_predictor): VariancePredictor(
|
| 166 |
-
(conv): ModuleList(
|
| 167 |
-
(0): Sequential(
|
| 168 |
-
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 169 |
-
(1): ReLU()
|
| 170 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 171 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 172 |
-
)
|
| 173 |
-
(1): Sequential(
|
| 174 |
-
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 175 |
-
(1): ReLU()
|
| 176 |
-
(2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 177 |
-
(3): Dropout(p=0.5, inplace=False)
|
| 178 |
-
)
|
| 179 |
-
)
|
| 180 |
-
(linear): Linear(in_features=256, out_features=1, bias=True)
|
| 181 |
-
)
|
| 182 |
-
(energy_embed): Sequential(
|
| 183 |
-
(0): Conv1d(1, 256, kernel_size=(1,), stride=(1,))
|
| 184 |
-
(1): Dropout(p=0.0, inplace=False)
|
| 185 |
-
)
|
| 186 |
-
(alignment_module): AlignmentModule(
|
| 187 |
-
(t_conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 188 |
-
(t_conv2): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 189 |
-
(f_conv1): Conv1d(80, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 190 |
-
(f_conv2): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 191 |
-
(f_conv3): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
|
| 192 |
-
)
|
| 193 |
-
(length_regulator): GaussianUpsampling()
|
| 194 |
-
(decoder): Encoder(
|
| 195 |
-
(embed): Sequential(
|
| 196 |
-
(0): ScaledPositionalEncoding(
|
| 197 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 198 |
-
)
|
| 199 |
-
)
|
| 200 |
-
(encoders): MultiSequential(
|
| 201 |
-
(0): EncoderLayer(
|
| 202 |
-
(self_attn): MultiHeadedAttention(
|
| 203 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 204 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 205 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 206 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 207 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 208 |
-
)
|
| 209 |
-
(feed_forward): MultiLayeredConv1d(
|
| 210 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 211 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 212 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 213 |
-
)
|
| 214 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 215 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 216 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 217 |
-
)
|
| 218 |
-
(1): EncoderLayer(
|
| 219 |
-
(self_attn): MultiHeadedAttention(
|
| 220 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 221 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 222 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 223 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 224 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 225 |
-
)
|
| 226 |
-
(feed_forward): MultiLayeredConv1d(
|
| 227 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 228 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 229 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 230 |
-
)
|
| 231 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 232 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 233 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 234 |
-
)
|
| 235 |
-
(2): EncoderLayer(
|
| 236 |
-
(self_attn): MultiHeadedAttention(
|
| 237 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 238 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 239 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 240 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 241 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 242 |
-
)
|
| 243 |
-
(feed_forward): MultiLayeredConv1d(
|
| 244 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 245 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 246 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 247 |
-
)
|
| 248 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 249 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 250 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 251 |
-
)
|
| 252 |
-
(3): EncoderLayer(
|
| 253 |
-
(self_attn): MultiHeadedAttention(
|
| 254 |
-
(linear_q): Linear(in_features=256, out_features=256, bias=True)
|
| 255 |
-
(linear_k): Linear(in_features=256, out_features=256, bias=True)
|
| 256 |
-
(linear_v): Linear(in_features=256, out_features=256, bias=True)
|
| 257 |
-
(linear_out): Linear(in_features=256, out_features=256, bias=True)
|
| 258 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 259 |
-
)
|
| 260 |
-
(feed_forward): MultiLayeredConv1d(
|
| 261 |
-
(w_1): Conv1d(256, 1024, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 262 |
-
(w_2): Conv1d(1024, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 263 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 264 |
-
)
|
| 265 |
-
(norm1): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 266 |
-
(norm2): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 267 |
-
(dropout): Dropout(p=0.2, inplace=False)
|
| 268 |
-
)
|
| 269 |
-
)
|
| 270 |
-
(after_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
|
| 271 |
-
)
|
| 272 |
-
(generator): HiFiGANGenerator(
|
| 273 |
-
(input_conv): Conv1d(256, 512, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 274 |
-
(upsamples): ModuleList(
|
| 275 |
-
(0): Sequential(
|
| 276 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 277 |
-
(1): ConvTranspose1d(512, 256, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 278 |
-
)
|
| 279 |
-
(1): Sequential(
|
| 280 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 281 |
-
(1): ConvTranspose1d(256, 128, kernel_size=(16,), stride=(8,), padding=(4,))
|
| 282 |
-
)
|
| 283 |
-
(2): Sequential(
|
| 284 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 285 |
-
(1): ConvTranspose1d(128, 64, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 286 |
-
)
|
| 287 |
-
(3): Sequential(
|
| 288 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 289 |
-
(1): ConvTranspose1d(64, 32, kernel_size=(4,), stride=(2,), padding=(1,))
|
| 290 |
-
)
|
| 291 |
-
)
|
| 292 |
-
(blocks): ModuleList(
|
| 293 |
-
(0): ResidualBlock(
|
| 294 |
-
(convs1): ModuleList(
|
| 295 |
-
(0): Sequential(
|
| 296 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 297 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 298 |
-
)
|
| 299 |
-
(1): Sequential(
|
| 300 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 301 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 302 |
-
)
|
| 303 |
-
(2): Sequential(
|
| 304 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 305 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 306 |
-
)
|
| 307 |
-
)
|
| 308 |
-
(convs2): ModuleList(
|
| 309 |
-
(0): Sequential(
|
| 310 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 311 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 312 |
-
)
|
| 313 |
-
(1): Sequential(
|
| 314 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 315 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 316 |
-
)
|
| 317 |
-
(2): Sequential(
|
| 318 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 319 |
-
(1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 320 |
-
)
|
| 321 |
-
)
|
| 322 |
-
)
|
| 323 |
-
(1): ResidualBlock(
|
| 324 |
-
(convs1): ModuleList(
|
| 325 |
-
(0): Sequential(
|
| 326 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 327 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 328 |
-
)
|
| 329 |
-
(1): Sequential(
|
| 330 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 331 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 332 |
-
)
|
| 333 |
-
(2): Sequential(
|
| 334 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 335 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 336 |
-
)
|
| 337 |
-
)
|
| 338 |
-
(convs2): ModuleList(
|
| 339 |
-
(0): Sequential(
|
| 340 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 341 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 342 |
-
)
|
| 343 |
-
(1): Sequential(
|
| 344 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 345 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 346 |
-
)
|
| 347 |
-
(2): Sequential(
|
| 348 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 349 |
-
(1): Conv1d(256, 256, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 350 |
-
)
|
| 351 |
-
)
|
| 352 |
-
)
|
| 353 |
-
(2): ResidualBlock(
|
| 354 |
-
(convs1): ModuleList(
|
| 355 |
-
(0): Sequential(
|
| 356 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 357 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 358 |
-
)
|
| 359 |
-
(1): Sequential(
|
| 360 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 361 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 362 |
-
)
|
| 363 |
-
(2): Sequential(
|
| 364 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 365 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 366 |
-
)
|
| 367 |
-
)
|
| 368 |
-
(convs2): ModuleList(
|
| 369 |
-
(0): Sequential(
|
| 370 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 371 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 372 |
-
)
|
| 373 |
-
(1): Sequential(
|
| 374 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 375 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 376 |
-
)
|
| 377 |
-
(2): Sequential(
|
| 378 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 379 |
-
(1): Conv1d(256, 256, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 380 |
-
)
|
| 381 |
-
)
|
| 382 |
-
)
|
| 383 |
-
(3): ResidualBlock(
|
| 384 |
-
(convs1): ModuleList(
|
| 385 |
-
(0): Sequential(
|
| 386 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 387 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 388 |
-
)
|
| 389 |
-
(1): Sequential(
|
| 390 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 391 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 392 |
-
)
|
| 393 |
-
(2): Sequential(
|
| 394 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 395 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 396 |
-
)
|
| 397 |
-
)
|
| 398 |
-
(convs2): ModuleList(
|
| 399 |
-
(0): Sequential(
|
| 400 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 401 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 402 |
-
)
|
| 403 |
-
(1): Sequential(
|
| 404 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 405 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 406 |
-
)
|
| 407 |
-
(2): Sequential(
|
| 408 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 409 |
-
(1): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 410 |
-
)
|
| 411 |
-
)
|
| 412 |
-
)
|
| 413 |
-
(4): ResidualBlock(
|
| 414 |
-
(convs1): ModuleList(
|
| 415 |
-
(0): Sequential(
|
| 416 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 417 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 418 |
-
)
|
| 419 |
-
(1): Sequential(
|
| 420 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 421 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 422 |
-
)
|
| 423 |
-
(2): Sequential(
|
| 424 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 425 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 426 |
-
)
|
| 427 |
-
)
|
| 428 |
-
(convs2): ModuleList(
|
| 429 |
-
(0): Sequential(
|
| 430 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 431 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 432 |
-
)
|
| 433 |
-
(1): Sequential(
|
| 434 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 435 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 436 |
-
)
|
| 437 |
-
(2): Sequential(
|
| 438 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 439 |
-
(1): Conv1d(128, 128, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 440 |
-
)
|
| 441 |
-
)
|
| 442 |
-
)
|
| 443 |
-
(5): ResidualBlock(
|
| 444 |
-
(convs1): ModuleList(
|
| 445 |
-
(0): Sequential(
|
| 446 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 447 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 448 |
-
)
|
| 449 |
-
(1): Sequential(
|
| 450 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 451 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 452 |
-
)
|
| 453 |
-
(2): Sequential(
|
| 454 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 455 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 456 |
-
)
|
| 457 |
-
)
|
| 458 |
-
(convs2): ModuleList(
|
| 459 |
-
(0): Sequential(
|
| 460 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 461 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 462 |
-
)
|
| 463 |
-
(1): Sequential(
|
| 464 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 465 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 466 |
-
)
|
| 467 |
-
(2): Sequential(
|
| 468 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 469 |
-
(1): Conv1d(128, 128, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 470 |
-
)
|
| 471 |
-
)
|
| 472 |
-
)
|
| 473 |
-
(6): ResidualBlock(
|
| 474 |
-
(convs1): ModuleList(
|
| 475 |
-
(0): Sequential(
|
| 476 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 477 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 478 |
-
)
|
| 479 |
-
(1): Sequential(
|
| 480 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 481 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 482 |
-
)
|
| 483 |
-
(2): Sequential(
|
| 484 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 485 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 486 |
-
)
|
| 487 |
-
)
|
| 488 |
-
(convs2): ModuleList(
|
| 489 |
-
(0): Sequential(
|
| 490 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 491 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 492 |
-
)
|
| 493 |
-
(1): Sequential(
|
| 494 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 495 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 496 |
-
)
|
| 497 |
-
(2): Sequential(
|
| 498 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 499 |
-
(1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 500 |
-
)
|
| 501 |
-
)
|
| 502 |
-
)
|
| 503 |
-
(7): ResidualBlock(
|
| 504 |
-
(convs1): ModuleList(
|
| 505 |
-
(0): Sequential(
|
| 506 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 507 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 508 |
-
)
|
| 509 |
-
(1): Sequential(
|
| 510 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 511 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 512 |
-
)
|
| 513 |
-
(2): Sequential(
|
| 514 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 515 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 516 |
-
)
|
| 517 |
-
)
|
| 518 |
-
(convs2): ModuleList(
|
| 519 |
-
(0): Sequential(
|
| 520 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 521 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 522 |
-
)
|
| 523 |
-
(1): Sequential(
|
| 524 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 525 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 526 |
-
)
|
| 527 |
-
(2): Sequential(
|
| 528 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 529 |
-
(1): Conv1d(64, 64, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 530 |
-
)
|
| 531 |
-
)
|
| 532 |
-
)
|
| 533 |
-
(8): ResidualBlock(
|
| 534 |
-
(convs1): ModuleList(
|
| 535 |
-
(0): Sequential(
|
| 536 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 537 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 538 |
-
)
|
| 539 |
-
(1): Sequential(
|
| 540 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 541 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 542 |
-
)
|
| 543 |
-
(2): Sequential(
|
| 544 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 545 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 546 |
-
)
|
| 547 |
-
)
|
| 548 |
-
(convs2): ModuleList(
|
| 549 |
-
(0): Sequential(
|
| 550 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 551 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 552 |
-
)
|
| 553 |
-
(1): Sequential(
|
| 554 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 555 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 556 |
-
)
|
| 557 |
-
(2): Sequential(
|
| 558 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 559 |
-
(1): Conv1d(64, 64, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 560 |
-
)
|
| 561 |
-
)
|
| 562 |
-
)
|
| 563 |
-
(9): ResidualBlock(
|
| 564 |
-
(convs1): ModuleList(
|
| 565 |
-
(0): Sequential(
|
| 566 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 567 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 568 |
-
)
|
| 569 |
-
(1): Sequential(
|
| 570 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 571 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(3,), dilation=(3,))
|
| 572 |
-
)
|
| 573 |
-
(2): Sequential(
|
| 574 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 575 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(5,), dilation=(5,))
|
| 576 |
-
)
|
| 577 |
-
)
|
| 578 |
-
(convs2): ModuleList(
|
| 579 |
-
(0): Sequential(
|
| 580 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 581 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 582 |
-
)
|
| 583 |
-
(1): Sequential(
|
| 584 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 585 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 586 |
-
)
|
| 587 |
-
(2): Sequential(
|
| 588 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 589 |
-
(1): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 590 |
-
)
|
| 591 |
-
)
|
| 592 |
-
)
|
| 593 |
-
(10): ResidualBlock(
|
| 594 |
-
(convs1): ModuleList(
|
| 595 |
-
(0): Sequential(
|
| 596 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 597 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 598 |
-
)
|
| 599 |
-
(1): Sequential(
|
| 600 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 601 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(9,), dilation=(3,))
|
| 602 |
-
)
|
| 603 |
-
(2): Sequential(
|
| 604 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 605 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(15,), dilation=(5,))
|
| 606 |
-
)
|
| 607 |
-
)
|
| 608 |
-
(convs2): ModuleList(
|
| 609 |
-
(0): Sequential(
|
| 610 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 611 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 612 |
-
)
|
| 613 |
-
(1): Sequential(
|
| 614 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 615 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 616 |
-
)
|
| 617 |
-
(2): Sequential(
|
| 618 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 619 |
-
(1): Conv1d(32, 32, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 620 |
-
)
|
| 621 |
-
)
|
| 622 |
-
)
|
| 623 |
-
(11): ResidualBlock(
|
| 624 |
-
(convs1): ModuleList(
|
| 625 |
-
(0): Sequential(
|
| 626 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 627 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 628 |
-
)
|
| 629 |
-
(1): Sequential(
|
| 630 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 631 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(15,), dilation=(3,))
|
| 632 |
-
)
|
| 633 |
-
(2): Sequential(
|
| 634 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 635 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(25,), dilation=(5,))
|
| 636 |
-
)
|
| 637 |
-
)
|
| 638 |
-
(convs2): ModuleList(
|
| 639 |
-
(0): Sequential(
|
| 640 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 641 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 642 |
-
)
|
| 643 |
-
(1): Sequential(
|
| 644 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 645 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 646 |
-
)
|
| 647 |
-
(2): Sequential(
|
| 648 |
-
(0): LeakyReLU(negative_slope=0.1)
|
| 649 |
-
(1): Conv1d(32, 32, kernel_size=(11,), stride=(1,), padding=(5,))
|
| 650 |
-
)
|
| 651 |
-
)
|
| 652 |
-
)
|
| 653 |
-
)
|
| 654 |
-
(output_conv): Sequential(
|
| 655 |
-
(0): LeakyReLU(negative_slope=0.01)
|
| 656 |
-
(1): Conv1d(32, 1, kernel_size=(7,), stride=(1,), padding=(3,))
|
| 657 |
-
(2): Tanh()
|
| 658 |
-
)
|
| 659 |
-
)
|
| 660 |
-
)
|
| 661 |
-
(discriminator): HiFiGANMultiScaleMultiPeriodDiscriminator(
|
| 662 |
-
(msd): HiFiGANMultiScaleDiscriminator(
|
| 663 |
-
(discriminators): ModuleList(
|
| 664 |
-
(0): HiFiGANScaleDiscriminator(
|
| 665 |
-
(layers): ModuleList(
|
| 666 |
-
(0): Sequential(
|
| 667 |
-
(0): Conv1d(1, 128, kernel_size=(15,), stride=(1,), padding=(7,))
|
| 668 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 669 |
-
)
|
| 670 |
-
(1): Sequential(
|
| 671 |
-
(0): Conv1d(128, 128, kernel_size=(41,), stride=(2,), padding=(20,), groups=4)
|
| 672 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 673 |
-
)
|
| 674 |
-
(2): Sequential(
|
| 675 |
-
(0): Conv1d(128, 256, kernel_size=(41,), stride=(2,), padding=(20,), groups=16)
|
| 676 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 677 |
-
)
|
| 678 |
-
(3): Sequential(
|
| 679 |
-
(0): Conv1d(256, 512, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 680 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 681 |
-
)
|
| 682 |
-
(4): Sequential(
|
| 683 |
-
(0): Conv1d(512, 1024, kernel_size=(41,), stride=(4,), padding=(20,), groups=16)
|
| 684 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 685 |
-
)
|
| 686 |
-
(5): Sequential(
|
| 687 |
-
(0): Conv1d(1024, 1024, kernel_size=(41,), stride=(1,), padding=(20,), groups=16)
|
| 688 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 689 |
-
)
|
| 690 |
-
(6): Sequential(
|
| 691 |
-
(0): Conv1d(1024, 1024, kernel_size=(5,), stride=(1,), padding=(2,))
|
| 692 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 693 |
-
)
|
| 694 |
-
(7): Conv1d(1024, 1, kernel_size=(3,), stride=(1,), padding=(1,))
|
| 695 |
-
)
|
| 696 |
-
)
|
| 697 |
-
)
|
| 698 |
-
)
|
| 699 |
-
(mpd): HiFiGANMultiPeriodDiscriminator(
|
| 700 |
-
(discriminators): ModuleList(
|
| 701 |
-
(0): HiFiGANPeriodDiscriminator(
|
| 702 |
-
(convs): ModuleList(
|
| 703 |
-
(0): Sequential(
|
| 704 |
-
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 705 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 706 |
-
)
|
| 707 |
-
(1): Sequential(
|
| 708 |
-
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 709 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 710 |
-
)
|
| 711 |
-
(2): Sequential(
|
| 712 |
-
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 713 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 714 |
-
)
|
| 715 |
-
(3): Sequential(
|
| 716 |
-
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 717 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 718 |
-
)
|
| 719 |
-
(4): Sequential(
|
| 720 |
-
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 721 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 722 |
-
)
|
| 723 |
-
)
|
| 724 |
-
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 725 |
-
)
|
| 726 |
-
(1): HiFiGANPeriodDiscriminator(
|
| 727 |
-
(convs): ModuleList(
|
| 728 |
-
(0): Sequential(
|
| 729 |
-
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 730 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 731 |
-
)
|
| 732 |
-
(1): Sequential(
|
| 733 |
-
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 734 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 735 |
-
)
|
| 736 |
-
(2): Sequential(
|
| 737 |
-
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 738 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 739 |
-
)
|
| 740 |
-
(3): Sequential(
|
| 741 |
-
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 742 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 743 |
-
)
|
| 744 |
-
(4): Sequential(
|
| 745 |
-
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 746 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 747 |
-
)
|
| 748 |
-
)
|
| 749 |
-
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 750 |
-
)
|
| 751 |
-
(2): HiFiGANPeriodDiscriminator(
|
| 752 |
-
(convs): ModuleList(
|
| 753 |
-
(0): Sequential(
|
| 754 |
-
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 755 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 756 |
-
)
|
| 757 |
-
(1): Sequential(
|
| 758 |
-
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 759 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 760 |
-
)
|
| 761 |
-
(2): Sequential(
|
| 762 |
-
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 763 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 764 |
-
)
|
| 765 |
-
(3): Sequential(
|
| 766 |
-
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 767 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 768 |
-
)
|
| 769 |
-
(4): Sequential(
|
| 770 |
-
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 771 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 772 |
-
)
|
| 773 |
-
)
|
| 774 |
-
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 775 |
-
)
|
| 776 |
-
(3): HiFiGANPeriodDiscriminator(
|
| 777 |
-
(convs): ModuleList(
|
| 778 |
-
(0): Sequential(
|
| 779 |
-
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 780 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 781 |
-
)
|
| 782 |
-
(1): Sequential(
|
| 783 |
-
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 784 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 785 |
-
)
|
| 786 |
-
(2): Sequential(
|
| 787 |
-
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 788 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 789 |
-
)
|
| 790 |
-
(3): Sequential(
|
| 791 |
-
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 792 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 793 |
-
)
|
| 794 |
-
(4): Sequential(
|
| 795 |
-
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 796 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 797 |
-
)
|
| 798 |
-
)
|
| 799 |
-
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 800 |
-
)
|
| 801 |
-
(4): HiFiGANPeriodDiscriminator(
|
| 802 |
-
(convs): ModuleList(
|
| 803 |
-
(0): Sequential(
|
| 804 |
-
(0): Conv2d(1, 32, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 805 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 806 |
-
)
|
| 807 |
-
(1): Sequential(
|
| 808 |
-
(0): Conv2d(32, 128, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 809 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 810 |
-
)
|
| 811 |
-
(2): Sequential(
|
| 812 |
-
(0): Conv2d(128, 512, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 813 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 814 |
-
)
|
| 815 |
-
(3): Sequential(
|
| 816 |
-
(0): Conv2d(512, 1024, kernel_size=(5, 1), stride=(3, 1), padding=(2, 0))
|
| 817 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 818 |
-
)
|
| 819 |
-
(4): Sequential(
|
| 820 |
-
(0): Conv2d(1024, 1024, kernel_size=(5, 1), stride=(1, 1), padding=(2, 0))
|
| 821 |
-
(1): LeakyReLU(negative_slope=0.1)
|
| 822 |
-
)
|
| 823 |
-
)
|
| 824 |
-
(output_conv): Conv2d(1024, 1, kernel_size=(2, 1), stride=(1, 1), padding=(1, 0))
|
| 825 |
-
)
|
| 826 |
-
)
|
| 827 |
-
)
|
| 828 |
-
)
|
| 829 |
-
(generator_adv_loss): GeneratorAdversarialLoss()
|
| 830 |
-
(discriminator_adv_loss): DiscriminatorAdversarialLoss()
|
| 831 |
-
(feat_match_loss): FeatureMatchLoss()
|
| 832 |
-
(mel_loss): MelSpectrogramLoss(
|
| 833 |
-
(wav_to_mel): LogMelFbank(
|
| 834 |
-
(stft): Stft(n_fft=1024, win_length=1024, hop_length=256, center=True, normalized=False, onesided=True)
|
| 835 |
-
(logmel): LogMel(sr=24000, n_fft=1024, n_mels=80, fmin=0, fmax=12000.0, htk=False)
|
| 836 |
-
)
|
| 837 |
-
)
|
| 838 |
-
(var_loss): VarianceLoss(
|
| 839 |
-
(mse_criterion): MSELoss()
|
| 840 |
-
(duration_criterion): DurationPredictorLoss(
|
| 841 |
-
(criterion): MSELoss()
|
| 842 |
-
)
|
| 843 |
-
)
|
| 844 |
-
(forwardsum_loss): ForwardSumLoss()
|
| 845 |
-
)
|
| 846 |
-
)
|
| 847 |
-
|
| 848 |
-
Model summary:
|
| 849 |
-
Class Name: ESPnetGANTTSModel
|
| 850 |
-
Total Number of model parameters: 83.28 M
|
| 851 |
-
Number of trainable parameters: 83.28 M (100.0%)
|
| 852 |
-
Size: 333.11 MB
|
| 853 |
-
Type: torch.float32
|
| 854 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1161) INFO: Optimizer:
|
| 855 |
-
AdamW (
|
| 856 |
-
Parameter Group 0
|
| 857 |
-
amsgrad: False
|
| 858 |
-
betas: [0.8, 0.99]
|
| 859 |
-
eps: 1e-09
|
| 860 |
-
initial_lr: 0.0002
|
| 861 |
-
lr: 0.0002
|
| 862 |
-
weight_decay: 0.0
|
| 863 |
-
)
|
| 864 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1162) INFO: Scheduler: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f5660199550>
|
| 865 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1161) INFO: Optimizer2:
|
| 866 |
-
AdamW (
|
| 867 |
-
Parameter Group 0
|
| 868 |
-
amsgrad: False
|
| 869 |
-
betas: [0.8, 0.99]
|
| 870 |
-
eps: 1e-09
|
| 871 |
-
initial_lr: 0.0002
|
| 872 |
-
lr: 0.0002
|
| 873 |
-
weight_decay: 0.0
|
| 874 |
-
)
|
| 875 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1162) INFO: Scheduler2: <torch.optim.lr_scheduler.ExponentialLR object at 0x7f5747efa9d0>
|
| 876 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,361 (abs_task:1171) INFO: Saving the configuration in exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk/config.yaml
|
| 877 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,698 (abs_task:1525) INFO: [train] dataset:
|
| 878 |
-
ESPnetDataset(
|
| 879 |
-
text: {"path": "dump/raw/jvs010_tr_no_dev/text", "type": "text"}
|
| 880 |
-
speech: {"path": "dump/raw/jvs010_tr_no_dev/wav.scp", "type": "sound"}
|
| 881 |
-
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660199dc0>)
|
| 882 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,698 (abs_task:1526) INFO: [train] Batch sampler: NumElementsBatchSampler(N-batch=4, batch_bins=6000000, sort_in_batch=descending, sort_batch=descending)
|
| 883 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,699 (abs_task:1527) INFO: [train] mini-batch sizes summary: N-batch=4, mean=25.0, min=5, max=41
|
| 884 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1525) INFO: [valid] dataset:
|
| 885 |
-
ESPnetDataset(
|
| 886 |
-
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 887 |
-
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 888 |
-
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660199520>)
|
| 889 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1526) INFO: [valid] Batch sampler: NumElementsBatchSampler(N-batch=1, batch_bins=6000000, sort_in_batch=descending, sort_batch=descending)
|
| 890 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,719 (abs_task:1527) INFO: [valid] mini-batch sizes summary: N-batch=1, mean=15.0, min=15, max=15
|
| 891 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1525) INFO: [plot_att] dataset:
|
| 892 |
-
ESPnetDataset(
|
| 893 |
-
text: {"path": "dump/raw/jvs010_dev/text", "type": "text"}
|
| 894 |
-
speech: {"path": "dump/raw/jvs010_dev/wav.scp", "type": "sound"}
|
| 895 |
-
preprocess: <espnet2.train.preprocessor.CommonPreprocessor object at 0x7f5660155130>)
|
| 896 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1526) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=15, batch_size=1, key_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn,
|
| 897 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:54,739 (abs_task:1527) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
|
| 898 |
-
92b100c97f43:1159464:1159464 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 899 |
-
92b100c97f43:1159464:1159464 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 900 |
-
|
| 901 |
-
92b100c97f43:1159464:1159464 [0] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 902 |
-
92b100c97f43:1159464:1159464 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 903 |
-
92b100c97f43:1159464:1159464 [0] NCCL INFO Using network Socket
|
| 904 |
-
NCCL version 2.10.3+cuda11.3
|
| 905 |
-
92b100c97f43:1159466:1159466 [2] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 906 |
-
92b100c97f43:1159465:1159465 [1] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 907 |
-
92b100c97f43:1159466:1159466 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 908 |
-
92b100c97f43:1159465:1159465 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 909 |
-
|
| 910 |
-
92b100c97f43:1159466:1159466 [2] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 911 |
-
|
| 912 |
-
92b100c97f43:1159465:1159465 [1] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 913 |
-
92b100c97f43:1159465:1159465 [1] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 914 |
-
92b100c97f43:1159466:1159466 [2] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 915 |
-
92b100c97f43:1159465:1159465 [1] NCCL INFO Using network Socket
|
| 916 |
-
92b100c97f43:1159466:1159466 [2] NCCL INFO Using network Socket
|
| 917 |
-
92b100c97f43:1159467:1159467 [3] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0>
|
| 918 |
-
92b100c97f43:1159467:1159467 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
|
| 919 |
-
|
| 920 |
-
92b100c97f43:1159467:1159467 [3] misc/ibvwrap.cc:63 NCCL WARN Failed to open libibverbs.so[.1]
|
| 921 |
-
92b100c97f43:1159467:1159467 [3] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0>
|
| 922 |
-
92b100c97f43:1159467:1159467 [3] NCCL INFO Using network Socket
|
| 923 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 00/02 : 0 1 2 3
|
| 924 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
|
| 925 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
|
| 926 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 01/02 : 0 1 2 3
|
| 927 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
|
| 928 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,ffffffff
|
| 929 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1
|
| 930 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Setting affinity for GPU 3 to ffff,ffffffff
|
| 931 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Setting affinity for GPU 2 to ffff,ffffffff
|
| 932 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Setting affinity for GPU 0 to ffff,ffffffff
|
| 933 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 00 : 2[50] -> 3[60] via direct shared memory
|
| 934 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 00 : 3[60] -> 0[30] via direct shared memory
|
| 935 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 00 : 0[30] -> 1[40] via direct shared memory
|
| 936 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 00 : 1[40] -> 2[50] via direct shared memory
|
| 937 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 01 : 2[50] -> 3[60] via direct shared memory
|
| 938 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 01 : 3[60] -> 0[30] via direct shared memory
|
| 939 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Channel 01 : 0[30] -> 1[40] via direct shared memory
|
| 940 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 01 : 1[40] -> 2[50] via direct shared memory
|
| 941 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Connected all rings
|
| 942 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Connected all rings
|
| 943 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Connected all rings
|
| 944 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Connected all rings
|
| 945 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 00 : 3[60] -> 2[50] via direct shared memory
|
| 946 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Channel 01 : 3[60] -> 2[50] via direct shared memory
|
| 947 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 00 : 2[50] -> 1[40] via direct shared memory
|
| 948 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 00 : 1[40] -> 0[30] via direct shared memory
|
| 949 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Channel 01 : 2[50] -> 1[40] via direct shared memory
|
| 950 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Channel 01 : 1[40] -> 0[30] via direct shared memory
|
| 951 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO Connected all trees
|
| 952 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 953 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 954 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO Connected all trees
|
| 955 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 956 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 957 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO Connected all trees
|
| 958 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 959 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 960 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO Connected all trees
|
| 961 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/512
|
| 962 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
|
| 963 |
-
92b100c97f43:1159466:1159503 [2] NCCL INFO comm 0x7f35f80030d0 rank 2 nranks 4 cudaDev 2 busId 50 - Init COMPLETE
|
| 964 |
-
92b100c97f43:1159464:1159502 [0] NCCL INFO comm 0x7f55500030d0 rank 0 nranks 4 cudaDev 0 busId 30 - Init COMPLETE
|
| 965 |
-
92b100c97f43:1159464:1159464 [0] NCCL INFO Launch mode Parallel
|
| 966 |
-
92b100c97f43:1159465:1159504 [1] NCCL INFO comm 0x7f97600030d0 rank 1 nranks 4 cudaDev 1 busId 40 - Init COMPLETE
|
| 967 |
-
92b100c97f43:1159467:1159505 [3] NCCL INFO comm 0x7f66b80030d0 rank 3 nranks 4 cudaDev 3 busId 60 - Init COMPLETE
|
| 968 |
-
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 969 |
-
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 970 |
-
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 971 |
-
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 972 |
-
[92b100c97f43:0/4] 2025-03-04 21:23:55,188 (trainer:280) INFO: 1/130epoch started
|
| 973 |
-
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 974 |
-
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 975 |
-
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 976 |
-
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 977 |
-
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 978 |
-
[W reducer.cpp:1303] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
| 979 |
-
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 980 |
-
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 981 |
-
/work/espnet/espnet2/layers/stft.py:166: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
| 982 |
-
olens = (ilens - self.n_fft) // self.hop_length + 1
|
| 983 |
-
[92b100c97f43:0/4] 2025-03-04 21:25:36,523 (gan_trainer:305) INFO: 1epoch:train:1-50batch: iter_time=0.048, generator_forward_time=0.723, generator_loss=139.743, generator_g_loss=110.582, generator_var_loss=5.224, generator_align_loss=23.937, generator_g_mel_loss=106.758, generator_g_adv_loss=2.179, generator_g_feat_match_loss=1.645, generator_var_dur_loss=0.584, generator_var_pitch_loss=2.400, generator_var_energy_loss=2.240, generator_align_forwardsum_loss=10.599, generator_align_bin_loss=1.369, generator_backward_time=0.254, generator_optim_step_time=0.034, optim0_lr0=2.000e-04, generator_train_time=1.113, discriminator_forward_time=0.544, discriminator_loss=2.766, discriminator_real_loss=1.518, discriminator_fake_loss=1.247, discriminator_backward_time=0.198, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.788, train_time=2.024
|
| 984 |
-
[92b100c97f43:0/4] 2025-03-04 21:27:08,245 (gan_trainer:305) INFO: 1epoch:train:51-100batch: iter_time=1.198e-04, generator_forward_time=0.634, generator_loss=111.648, generator_g_loss=85.935, generator_var_loss=2.179, generator_align_loss=23.534, generator_g_mel_loss=80.251, generator_g_adv_loss=2.332, generator_g_feat_match_loss=3.352, generator_var_dur_loss=0.089, generator_var_pitch_loss=0.924, generator_var_energy_loss=1.166, generator_align_forwardsum_loss=10.437, generator_align_bin_loss=1.330, generator_backward_time=0.258, generator_optim_step_time=0.034, optim0_lr0=2.000e-04, generator_train_time=1.027, discriminator_forward_time=0.548, discriminator_loss=2.396, discriminator_real_loss=1.381, discriminator_fake_loss=1.015, discriminator_backward_time=0.201, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.796, train_time=1.835
|
| 985 |
-
[92b100c97f43:0/4] 2025-03-04 21:28:38,897 (gan_trainer:305) INFO: 1epoch:train:101-150batch: iter_time=1.203e-04, generator_forward_time=0.624, generator_loss=112.406, generator_g_loss=87.597, generator_var_loss=1.890, generator_align_loss=22.919, generator_g_mel_loss=80.508, generator_g_adv_loss=2.744, generator_g_feat_match_loss=4.346, generator_var_dur_loss=0.058, generator_var_pitch_loss=0.808, generator_var_energy_loss=1.024, generator_align_forwardsum_loss=10.071, generator_align_bin_loss=1.389, generator_backward_time=0.257, generator_optim_step_time=0.033, optim0_lr0=2.000e-04, generator_train_time=1.015, discriminator_forward_time=0.539, discriminator_loss=2.084, discriminator_real_loss=1.319, discriminator_fake_loss=0.765, discriminator_backward_time=0.201, discriminator_optim_step_time=0.009, optim1_lr0=2.000e-04, discriminator_train_time=0.787, train_time=1.813
|
|
|
|
| 1 |
# python3 -m espnet2.bin.gan_tts_train --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 2 |
+
# Started at Tue Mar 4 22:09:50 JST 2025
|
| 3 |
#
|
| 4 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 5 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 6 |
/usr/bin/python3 /work/espnet/espnet2/bin/gan_tts_train.py --use_preprocessor true --token_type phn --token_list dump/token_list/phn_jaconv_pyopenjtalk/tokens.txt --non_linguistic_symbols none --cleaner jaconv --g2p pyopenjtalk --normalize global_mvn --resume true --fold_length 150 --fold_length 240000 --output_dir exp/tts_train_jets_raw_phn_jaconv_pyopenjtalk --config conf/tuning/train_jets.yaml --feats_extract fbank --feats_extract_conf n_fft=2048 --feats_extract_conf hop_length=300 --feats_extract_conf win_length=1200 --feats_extract_conf fs=24000 --feats_extract_conf fmin=80 --feats_extract_conf fmax=7600 --feats_extract_conf n_mels=80 --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/text,text,text --train_data_path_and_name_and_type dump/raw/jvs010_tr_no_dev/wav.scp,speech,sound --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/text_shape.phn --train_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/speech_shape --valid_data_path_and_name_and_type dump/raw/jvs010_dev/text,text,text --valid_data_path_and_name_and_type dump/raw/jvs010_dev/wav.scp,speech,sound --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/text_shape.phn --valid_shape_file exp/tts_stats_raw_phn_jaconv_pyopenjtalk/valid/speech_shape --pitch_extract_conf fs=24000 --pitch_extract_conf n_fft=2048 --pitch_extract_conf hop_length=300 --pitch_extract_conf f0max=400 --pitch_extract_conf f0min=80 --pitch_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/pitch_stats.npz --energy_extract_conf fs=24000 --energy_extract_conf n_fft=2048 --energy_extract_conf hop_length=300 --energy_extract_conf win_length=1200 --energy_normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/energy_stats.npz --normalize_conf stats_file=exp/tts_stats_raw_phn_jaconv_pyopenjtalk/train/feats_stats.npz --ngpu 4 --multiprocessing_distributed True
|
| 7 |
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (2.2.3) or chardet (3.0.4) doesn't match a supported version!
|
| 8 |
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
| 9 |
+
[92b100c97f43:0/4] 2025-03-04 22:09:57,737 (distributed_c10d:217) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
|
| 10 |
+
[92b100c97f43:0/4] 2025-03-04 22:09:57,738 (distributed_c10d:251) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
|
| 11 |
+
[92b100c97f43:0/4] 2025-03-04 22:09:57,793 (gan_tts:304) INFO: Vocabulary size: 41
|
| 12 |
+
[92b100c97f43:0/4] 2025-03-04 22:09:57,962 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
| 13 |
+
[92b100c97f43:0/4] 2025-03-04 22:09:58,184 (encoder:172) INFO: encoder self-attention layer type = self-attention
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|