taihan commited on
Commit
dd24060
·
verified ·
1 Parent(s): e4260d8

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yml +79 -0
  2. model.h5 +3 -0
config.yml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is the hyperparameter configuration file for FastSpeech2 v1.
2
+ # Please make sure this is adjusted for the LibriTTS dataset. If you want to
3
+ # apply to the other dataset, you might need to carefully change some parameters.
4
+ # This configuration performs 200k iters but a best checkpoint is around 150k iters.
5
+
6
+ ###########################################################
7
+ # FEATURE EXTRACTION SETTING #
8
+ ###########################################################
9
+ hop_size: 300 # Hop size.
10
+ format: "npy"
11
+
12
+ ###########################################################
13
+ # NETWORK ARCHITECTURE SETTING #
14
+ ###########################################################
15
+ model_type: fastspeech2
16
+
17
+ fastspeech2_params:
18
+ dataset: "libritts"
19
+ n_speakers: 5 #20
20
+ encoder_hidden_size: 384
21
+ encoder_num_hidden_layers: 4
22
+ encoder_num_attention_heads: 2
23
+ encoder_attention_head_size: 192 # hidden_size // num_attention_heads
24
+ encoder_intermediate_size: 1024
25
+ encoder_intermediate_kernel_size: 3
26
+ encoder_hidden_act: "mish"
27
+ decoder_hidden_size: 384
28
+ decoder_num_hidden_layers: 4
29
+ decoder_num_attention_heads: 2
30
+ decoder_attention_head_size: 192 # hidden_size // num_attention_heads
31
+ decoder_intermediate_size: 1024
32
+ decoder_intermediate_kernel_size: 3
33
+ decoder_hidden_act: "mish"
34
+ variant_prediction_num_conv_layers: 2
35
+ variant_predictor_filter: 256
36
+ variant_predictor_kernel_size: 3
37
+ variant_predictor_dropout_rate: 0.5
38
+ num_mels: 80
39
+ hidden_dropout_prob: 0.2
40
+ attention_probs_dropout_prob: 0.1
41
+ max_position_embeddings: 2048
42
+ initializer_range: 0.02
43
+ output_attentions: False
44
+ output_hidden_states: False
45
+
46
+ ###########################################################
47
+ # DATA LOADER SETTING #
48
+ ###########################################################
49
+ batch_size: 32 # Batch size for each GPU with assuming that gradient_accumulation_steps == 1.
50
+ remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps.
51
+ allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory.
52
+ mel_length_threshold: 48 # remove all targets has mel_length <= 32
53
+ is_shuffle: true # shuffle dataset after each epoch.
54
+ ###########################################################
55
+ # OPTIMIZER & SCHEDULER SETTING #
56
+ ###########################################################
57
+ optimizer_params:
58
+ initial_learning_rate: 0.0001
59
+ end_learning_rate: 0.00001
60
+ decay_steps: 120000 # < train_max_steps is recommend.
61
+ warmup_proportion: 0.02
62
+ weight_decay: 0.001
63
+
64
+ gradient_accumulation_steps: 1
65
+ var_train_expr: null # trainable variable expr (eg. 'embeddings|encoder|decoder' )
66
+ # must separate by |. if var_train_expr is null then we
67
+ # training all variable
68
+ ###########################################################
69
+ # INTERVAL SETTING #
70
+ ###########################################################
71
+ train_max_steps: 150000 # Number of training steps.
72
+ save_interval_steps: 5000 # Interval steps to save checkpoint.
73
+ eval_interval_steps: 5000 # Interval steps to evaluate the network.
74
+ log_interval_steps: 200 # Interval steps to record the training log.
75
+ ###########################################################
76
+ # OTHER SETTING #
77
+ ###########################################################
78
+ use_griffin: true # Use GL on evaluation or not.
79
+ num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2477eadeb8b83a08ef10ade5561d5780ef6e593e7686afc89557b44ce8049f88
3
+ size 127715496