5Hyeons commited on
Commit
cc17649
Β·
1 Parent(s): ce7820e

Add LibtiTTS Vocos model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.txt filter=lfs diff=lfs merge=lfs -text
37
+ *.log filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,26 @@
1
- ---
2
- license: cc-by-nc-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Vocos LibriTTS Model
2
+
3
+ This model was trained using the train-clean-100 and train-clean-360 subsets of the LibriTTS dataset.
4
+
5
+ ## Model Information
6
+
7
+ - **Model Architecture**: Vocos
8
+ - **Training Data**: LibriTTS train-clean-100 + train-clean-360
9
+ - **License**: MIT
10
+
11
+ ## Repository
12
+
13
+ The training and inference code can be found at: [StyleTTS2-Vocos](https://github.com/5Hyeons/StyleTTS2-Vocos)
14
+
15
+ ### Folder Structure
16
+ ```
17
+ StyleTTS2
18
+ β”œβ”€β”€ README.md
19
+ └── Vocos
20
+ └── LibriTTS
21
+ └── [checkpoint files]
22
+ ```
23
+
24
+ ## License
25
+
26
+ This model is released under the MIT License. This is one of the most permissive open-source licenses, allowing for both commercial and non-commercial use, modification, and distribution.
Vocos/LibriTTS/Data/OOD_texts2.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c1886c0f8c25b91d0069c2d75f5911e3bfa2766d97098779cb685cd9a23749
3
+ size 42723963
Vocos/LibriTTS/Data/train_libritts.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9da2bab6aaa3dfaea1b70c7adc915bde6e04826c1b02fba9bf7b5ceffc8fa0d
3
+ size 31124170
Vocos/LibriTTS/Data/valid_libritts.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b42ece5bcb11f9078b79f26832ae17936b78d97a326b99978062600da3aaa2
3
+ size 319831
Vocos/LibriTTS/LibriTTS_vocos_first_stage.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:308e539a889a0152a89d946265ecd5993ff3ab21650f22c2d7515abf167f788f
3
+ size 2163605292
Vocos/LibriTTS/config_libritts_vocos.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {ASR_config: Utils/ASR/config.yml, ASR_path: Utils/ASR/epoch_00080.pth, F0_path: Utils/JDC/bst.t7,
2
+ PLBERT_dir: Utils/PLBERT/, batch_size: 16, data_params: {OOD_data: Data/OOD_texts.txt,
3
+ min_length: 50, root_path: /data/LibriTTS, train_data: Data/train_libritts.txt,
4
+ val_data: Data/valid_libritts.txt}, device: cuda, epochs_1st: 50, epochs_2nd: 30,
5
+ first_stage_path: LibriTTS_vocos_first_stage.pth, load_only_params: false, log_dir: /data/ckpts/stts2/LibriTTS_vocos,
6
+ log_interval: 10, loss_params: {TMA_epoch: 5, diff_epoch: 10, joint_epoch: 15, lambda_F0: 1.0,
7
+ lambda_ce: 20.0, lambda_diff: 1.0, lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 5.0,
8
+ lambda_mono: 1.0, lambda_norm: 1.0, lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0},
9
+ max_len: 300, model_params: {decoder: {gen_istft_hop_size: 300, gen_istft_n_fft: 1200,
10
+ intermediate_dim: 1536, num_layers: 8, type: vocos}, diffusion: {dist: {estimate_sigma_data: true,
11
+ mean: -3.0, sigma_data: 0.23322181252793212, std: 1.0}, embedding_mask_proba: 0.1,
12
+ transformer: {head_features: 64, multiplier: 2, num_heads: 8, num_layers: 3}},
13
+ dim_in: 64, dropout: 0.2, hidden_dim: 512, max_conv_dim: 512, max_dur: 50, multispeaker: true,
14
+ n_layer: 3, n_mels: 80, n_token: 178, slm: {hidden: 768, initial_channel: 64,
15
+ model: microsoft/wavlm-base-plus, nlayers: 13, sr: 16000}, style_dim: 128},
16
+ optimizer_params: {bert_lr: 1.0e-05, ft_lr: 1.0e-05, lr: 0.0001}, preprocess_params: {
17
+ spect_params: {hop_length: 300, n_fft: 2048, win_length: 1200}, sr: 24000}, pretrained_model: '',
18
+ save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
19
+ iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}
Vocos/LibriTTS/epoch_2nd_00029.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8a5436301091400054f348d0aff74081d433c9402fc56bd71bad21f74e7996a
3
+ size 2480791363
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740379572.gpu-1.2895635.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cba9acbcf3eacdf270df91d837d7cb403b973c87924ff9aab5411322cb7cc0c
3
+ size 6041600
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450365.gpu-1.3390091.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0c308bfa92b0c6ff4bc86a9284bfdf6d287f12f4880e66cc992730c1b934a9
3
+ size 88
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450615.gpu-1.3400827.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18f377d0ae08dd42391967d2d9c2a685abd7e98ef21c2de7440d3a85b52afaf
3
+ size 73350099
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224850.gpu-1.3455425.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972ef9eb2d0d05e73a6373e5db26211d96f9cb7825585e66254f40212716d709
3
+ size 88
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224909.gpu-1.3456868.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444728d61889724bd6265749d0a378b0ce99f78158a7b599baa91e05fccc0521
3
+ size 4741948
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741249732.gpu-1.3929847.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a31ec93df37c599f68259abd22392729eb5c081112642f1bd156de19a2e2686
3
+ size 78370138
Vocos/LibriTTS/train.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf58487b746afa5729e48134ab118e58c749d0805a25b0e3a2c4ea28f0354a2
3
+ size 15614787