taihan
/

fastspeech2_libritts

Model card Files Files and versions

xet

Community

taihan commited on Dec 6, 2024

Commit

dd24060

verified ·

1 Parent(s): e4260d8

Upload 2 files

Browse files

Files changed (2) hide show

config.yml +79 -0
model.h5 +3 -0

config.yml ADDED Viewed

	@@ -0,0 +1,79 @@

+# This is the hyperparameter configuration file for FastSpeech2 v1.
+# Please make sure this is adjusted for the LibriTTS dataset. If you want to
+# apply to the other dataset, you might need to carefully change some parameters.
+# This configuration performs 200k iters but a best checkpoint is around 150k iters.
+###########################################################
+#                FEATURE EXTRACTION SETTING               #
+###########################################################
+hop_size: 300            # Hop size.
+format: "npy"
+###########################################################
+#              NETWORK ARCHITECTURE SETTING               #
+###########################################################
+model_type: fastspeech2
+fastspeech2_params:
+    dataset: "libritts"
+    n_speakers: 5  #20
+    encoder_hidden_size: 384
+    encoder_num_hidden_layers: 4
+    encoder_num_attention_heads: 2
+    encoder_attention_head_size: 192  # hidden_size // num_attention_heads
+    encoder_intermediate_size: 1024
+    encoder_intermediate_kernel_size: 3
+    encoder_hidden_act: "mish"
+    decoder_hidden_size: 384
+    decoder_num_hidden_layers: 4
+    decoder_num_attention_heads: 2
+    decoder_attention_head_size: 192  # hidden_size // num_attention_heads
+    decoder_intermediate_size: 1024
+    decoder_intermediate_kernel_size: 3
+    decoder_hidden_act: "mish"
+    variant_prediction_num_conv_layers: 2
+    variant_predictor_filter: 256
+    variant_predictor_kernel_size: 3
+    variant_predictor_dropout_rate: 0.5
+    num_mels: 80
+    hidden_dropout_prob: 0.2
+    attention_probs_dropout_prob: 0.1
+    max_position_embeddings: 2048
+    initializer_range: 0.02
+    output_attentions: False
+    output_hidden_states: False
+###########################################################
+#                  DATA LOADER SETTING                    #
+###########################################################
+batch_size: 32              # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
+remove_short_samples: true  # Whether to remove samples the length of which are less than batch_max_steps.
+allow_cache: true           # Whether to allow cache in dataset. If true, it requires cpu memory.
+mel_length_threshold: 48    # remove all targets has mel_length <= 32
+is_shuffle: true            # shuffle dataset after each epoch.
+###########################################################
+#             OPTIMIZER & SCHEDULER SETTING               #
+###########################################################
+optimizer_params:
+    initial_learning_rate: 0.0001
+    end_learning_rate: 0.00001
+    decay_steps: 120000          # < train_max_steps is recommend.
+    warmup_proportion: 0.02
+    weight_decay: 0.001
+gradient_accumulation_steps: 1
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we
+                      # training all variable
+###########################################################
+#                    INTERVAL SETTING                     #
+###########################################################
+train_max_steps: 150000               # Number of training steps.
+save_interval_steps: 5000             # Interval steps to save checkpoint.
+eval_interval_steps: 5000              # Interval steps to evaluate the network.
+log_interval_steps: 200               # Interval steps to record the training log.
+###########################################################
+#                     OTHER SETTING                       #
+###########################################################
+use_griffin: true                 # Use GL on evaluation or not.
+num_save_intermediate_results: 1  # Number of batch to be saved as intermediate results.

model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2477eadeb8b83a08ef10ade5561d5780ef6e593e7686afc89557b44ce8049f88
+size 127715496