Add LibtiTTS Vocos model

Browse files

Files changed (15) hide show

.gitattributes +2 -0
README.md +26 -3
Vocos/LibriTTS/Data/OOD_texts2.txt +3 -0
Vocos/LibriTTS/Data/train_libritts.txt +3 -0
Vocos/LibriTTS/Data/valid_libritts.txt +3 -0
Vocos/LibriTTS/LibriTTS_vocos_first_stage.pth +3 -0
Vocos/LibriTTS/config_libritts_vocos.yml +19 -0
Vocos/LibriTTS/epoch_2nd_00029.pth +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740379572.gpu-1.2895635.0 +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450365.gpu-1.3390091.0 +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450615.gpu-1.3400827.0 +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224850.gpu-1.3455425.0 +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224909.gpu-1.3456868.0 +3 -0
Vocos/LibriTTS/tensorboard/events.out.tfevents.1741249732.gpu-1.3929847.0 +3 -0
Vocos/LibriTTS/train.log +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.txt filter=lfs diff=lfs merge=lfs -text
+*.log filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,26 @@
----
-license: cc-by-nc-4.0
----

+# Vocos LibriTTS Model
+This model was trained using the train-clean-100 and train-clean-360 subsets of the LibriTTS dataset.
+## Model Information
+- **Model Architecture**: Vocos
+- **Training Data**: LibriTTS train-clean-100 + train-clean-360
+- **License**: MIT
+## Repository
+The training and inference code can be found at: [StyleTTS2-Vocos](https://github.com/5Hyeons/StyleTTS2-Vocos)
+### Folder Structure
+```
+StyleTTS2
+├── README.md
+└── Vocos
+    └── LibriTTS
+        └── [checkpoint files]
+```
+## License
+This model is released under the MIT License. This is one of the most permissive open-source licenses, allowing for both commercial and non-commercial use, modification, and distribution.

Vocos/LibriTTS/Data/OOD_texts2.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83c1886c0f8c25b91d0069c2d75f5911e3bfa2766d97098779cb685cd9a23749
+size 42723963

Vocos/LibriTTS/Data/train_libritts.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9da2bab6aaa3dfaea1b70c7adc915bde6e04826c1b02fba9bf7b5ceffc8fa0d
+size 31124170

Vocos/LibriTTS/Data/valid_libritts.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67b42ece5bcb11f9078b79f26832ae17936b78d97a326b99978062600da3aaa2
+size 319831

Vocos/LibriTTS/LibriTTS_vocos_first_stage.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:308e539a889a0152a89d946265ecd5993ff3ab21650f22c2d7515abf167f788f
+size 2163605292

Vocos/LibriTTS/config_libritts_vocos.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+{ASR_config: Utils/ASR/config.yml, ASR_path: Utils/ASR/epoch_00080.pth, F0_path: Utils/JDC/bst.t7,
+  PLBERT_dir: Utils/PLBERT/, batch_size: 16, data_params: {OOD_data: Data/OOD_texts.txt,
+    min_length: 50, root_path: /data/LibriTTS, train_data: Data/train_libritts.txt,
+    val_data: Data/valid_libritts.txt}, device: cuda, epochs_1st: 50, epochs_2nd: 30,
+  first_stage_path: LibriTTS_vocos_first_stage.pth, load_only_params: false, log_dir: /data/ckpts/stts2/LibriTTS_vocos,
+  log_interval: 10, loss_params: {TMA_epoch: 5, diff_epoch: 10, joint_epoch: 15, lambda_F0: 1.0,
+    lambda_ce: 20.0, lambda_diff: 1.0, lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 5.0,
+    lambda_mono: 1.0, lambda_norm: 1.0, lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0},
+  max_len: 300, model_params: {decoder: {gen_istft_hop_size: 300, gen_istft_n_fft: 1200,
+      intermediate_dim: 1536, num_layers: 8, type: vocos}, diffusion: {dist: {estimate_sigma_data: true,
+        mean: -3.0, sigma_data: 0.23322181252793212, std: 1.0}, embedding_mask_proba: 0.1,
+      transformer: {head_features: 64, multiplier: 2, num_heads: 8, num_layers: 3}},
+    dim_in: 64, dropout: 0.2, hidden_dim: 512, max_conv_dim: 512, max_dur: 50, multispeaker: true,
+    n_layer: 3, n_mels: 80, n_token: 178, slm: {hidden: 768, initial_channel: 64,
+      model: microsoft/wavlm-base-plus, nlayers: 13, sr: 16000}, style_dim: 128},
+  optimizer_params: {bert_lr: 1.0e-05, ft_lr: 1.0e-05, lr: 0.0001}, preprocess_params: {
+    spect_params: {hop_length: 300, n_fft: 2048, win_length: 1200}, sr: 24000}, pretrained_model: '',
+  save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
+    iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}

Vocos/LibriTTS/epoch_2nd_00029.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8a5436301091400054f348d0aff74081d433c9402fc56bd71bad21f74e7996a
+size 2480791363

Vocos/LibriTTS/tensorboard/events.out.tfevents.1740379572.gpu-1.2895635.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cba9acbcf3eacdf270df91d837d7cb403b973c87924ff9aab5411322cb7cc0c
+size 6041600

Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450365.gpu-1.3390091.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb0c308bfa92b0c6ff4bc86a9284bfdf6d287f12f4880e66cc992730c1b934a9
+size 88

Vocos/LibriTTS/tensorboard/events.out.tfevents.1740450615.gpu-1.3400827.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e18f377d0ae08dd42391967d2d9c2a685abd7e98ef21c2de7440d3a85b52afaf
+size 73350099

Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224850.gpu-1.3455425.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:972ef9eb2d0d05e73a6373e5db26211d96f9cb7825585e66254f40212716d709
+size 88

Vocos/LibriTTS/tensorboard/events.out.tfevents.1741224909.gpu-1.3456868.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:444728d61889724bd6265749d0a378b0ce99f78158a7b599baa91e05fccc0521
+size 4741948

Vocos/LibriTTS/tensorboard/events.out.tfevents.1741249732.gpu-1.3929847.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a31ec93df37c599f68259abd22392729eb5c081112642f1bd156de19a2e2686
+size 78370138

Vocos/LibriTTS/train.log ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf58487b746afa5729e48134ab118e58c749d0805a25b0e3a2c4ea28f0354a2
+size 15614787