Upload Wavenet-B-Wavenet-C/config.yml with huggingface_hub
Browse files- Wavenet-B-Wavenet-C/config.yml +112 -0
Wavenet-B-Wavenet-C/config.yml
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
additional_config: null
|
| 2 |
+
allow_cache: true
|
| 3 |
+
batch_size: 1
|
| 4 |
+
collater_type: NARVCCollater
|
| 5 |
+
config: conf/aas_vc.melmelmel.v1.yaml
|
| 6 |
+
criterions:
|
| 7 |
+
ForwardSumLoss: {}
|
| 8 |
+
L1Loss: {}
|
| 9 |
+
StochasticDurationPredictorLoss: {}
|
| 10 |
+
dev_dp_input_dir: dump/ar-XA-Wavenet-B_dev/norm_self
|
| 11 |
+
dev_duration_dir: null
|
| 12 |
+
distributed: false
|
| 13 |
+
dp_train_start_steps: 0
|
| 14 |
+
duration_predictor_feat: mel
|
| 15 |
+
eval_interval_steps: 1000
|
| 16 |
+
feat_list:
|
| 17 |
+
mel: {}
|
| 18 |
+
fft_size: 1024
|
| 19 |
+
fmax: null
|
| 20 |
+
fmin: 0
|
| 21 |
+
format: hdf5
|
| 22 |
+
global_gain_scale: 1.0
|
| 23 |
+
grad_norm: 1.0
|
| 24 |
+
gradient_accumulate_steps: 8
|
| 25 |
+
hop_size: 256
|
| 26 |
+
init_checkpoint: ''
|
| 27 |
+
lambda_align: 2.0
|
| 28 |
+
log_interval_steps: 10
|
| 29 |
+
model_params:
|
| 30 |
+
adim: 384
|
| 31 |
+
aheads: 2
|
| 32 |
+
conformer_dec_kernel_size: 15
|
| 33 |
+
conformer_enc_kernel_size: 15
|
| 34 |
+
conformer_pos_enc_layer_type: rel_pos
|
| 35 |
+
conformer_self_attn_layer_type: rel_selfattn
|
| 36 |
+
decoder_normalize_before: true
|
| 37 |
+
decoder_reduction_factor: 1
|
| 38 |
+
decoder_type: conformer
|
| 39 |
+
dlayers: 4
|
| 40 |
+
dunits: 1536
|
| 41 |
+
duration_predictor_chans: 256
|
| 42 |
+
duration_predictor_input_dim: 80
|
| 43 |
+
duration_predictor_kernel_size: 3
|
| 44 |
+
duration_predictor_layers: 2
|
| 45 |
+
duration_predictor_type: stochastic
|
| 46 |
+
duration_predictor_use_encoder_outputs: false
|
| 47 |
+
elayers: 4
|
| 48 |
+
encoder_input_layer: linear
|
| 49 |
+
encoder_normalize_before: true
|
| 50 |
+
encoder_reduction_factor: 1
|
| 51 |
+
encoder_type: conformer
|
| 52 |
+
eunits: 1536
|
| 53 |
+
idim: 80
|
| 54 |
+
init_type: xavier_uniform
|
| 55 |
+
odim: 80
|
| 56 |
+
positionwise_conv_kernel_size: 1
|
| 57 |
+
positionwise_layer_type: linear
|
| 58 |
+
post_encoder_reduction_factor: 4
|
| 59 |
+
postnet_chans: 256
|
| 60 |
+
postnet_filts: 5
|
| 61 |
+
postnet_layers: 5
|
| 62 |
+
transformer_dec_attn_dropout_rate: 0.2
|
| 63 |
+
transformer_dec_dropout_rate: 0.2
|
| 64 |
+
transformer_dec_positional_dropout_rate: 0.2
|
| 65 |
+
transformer_enc_attn_dropout_rate: 0.2
|
| 66 |
+
transformer_enc_dropout_rate: 0.2
|
| 67 |
+
transformer_enc_positional_dropout_rate: 0.2
|
| 68 |
+
use_cnn_in_conformer: true
|
| 69 |
+
use_macaron_style_in_conformer: true
|
| 70 |
+
use_masking: true
|
| 71 |
+
model_type: AASVC
|
| 72 |
+
mp: false
|
| 73 |
+
num_mels: 80
|
| 74 |
+
num_save_intermediate_results: 4
|
| 75 |
+
num_workers: 0
|
| 76 |
+
optimizer_params:
|
| 77 |
+
lr: 8.0e-05
|
| 78 |
+
optimizer_type: Adam
|
| 79 |
+
outdir: exp/ar-XA-Wavenet-B_ar-XA-Wavenet-C_male_male
|
| 80 |
+
pin_memory: true
|
| 81 |
+
rank: 0
|
| 82 |
+
resume: /workspace/seq2seq-vc/egs/ArVoice/vc2/exp/ar-XA-Wavenet-B_ar-XA-Wavenet-C_male_male/checkpoint-10632steps.pkl
|
| 83 |
+
sampling_rate: 24000
|
| 84 |
+
save_interval_steps: 5000
|
| 85 |
+
scheduler: warmuplr
|
| 86 |
+
scheduler_params:
|
| 87 |
+
warmup_steps: 4000
|
| 88 |
+
src_dev_dumpdir: dump/ar-XA-Wavenet-B_dev/norm_self
|
| 89 |
+
src_feat: mel
|
| 90 |
+
src_feat_type: mel
|
| 91 |
+
src_train_dumpdir: dump/ar-XA-Wavenet-B_train/norm_self
|
| 92 |
+
train_dp_input_dir: dump/ar-XA-Wavenet-B_train/norm_self
|
| 93 |
+
train_duration_dir: null
|
| 94 |
+
train_max_steps: 50000
|
| 95 |
+
trainer_type: AASVCTrainer
|
| 96 |
+
trg_dev_dumpdir: dump/ar-XA-Wavenet-C_dev/norm_self
|
| 97 |
+
trg_feat: mel
|
| 98 |
+
trg_feat_type: mel
|
| 99 |
+
trg_stats: exp/ar-XA-Wavenet-B_ar-XA-Wavenet-C_male_male/stats.h5
|
| 100 |
+
trg_train_dumpdir: dump/ar-XA-Wavenet-C_train/norm_self
|
| 101 |
+
trim_frame_size: 2048
|
| 102 |
+
trim_hop_size: 512
|
| 103 |
+
trim_silence: false
|
| 104 |
+
trim_threshold_in_db: 60
|
| 105 |
+
verbose: 1
|
| 106 |
+
version: 0.1.0
|
| 107 |
+
vocoder:
|
| 108 |
+
checkpoint: ./downloads/arvoice-syn-wavenet-vocoder/checkpoint-400000steps.pkl
|
| 109 |
+
config: ./downloads/arvoice-syn-wavenet-vocoder/config.yml
|
| 110 |
+
stats: ./downloads/arvoice-syn-wavenet-vocoder/stats.h5
|
| 111 |
+
win_length: null
|
| 112 |
+
window: hann
|