Edresson commited on Feb 2, 2024

Commit

a2877d4

1 Parent(s): a1d8f54

Update

Browse files

Files changed (30) hide show

Experiments/nohup.out +2 -2
Experiments/run/events.out.tfevents.1706462806.edresson-train-80.145564.0 +2 -2
Experiments/{runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/trainer_0_log.txt → run/events.out.tfevents.1706899297.edresson-train-80-3.1052.0} +2 -2
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/best_model.pth +2 -2
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/best_model_85001.pth → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/best_model_195001.pth} +2 -2
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/config.json +6 -6
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/language_ids.json +0 -0
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/speakers.pth +0 -0
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/train_syntacc_baseline.py +3 -1
Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/trainer_0_log.txt +2 -2
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/best_model.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/best_model_124752.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/checkpoint_130000.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/checkpoint_135000.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/train_syntacc_baseline.py +0 -352
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/config.json +0 -496
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/language_ids.json +0 -15
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/speakers.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/train_syntacc_baseline.py +0 -352
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/best_model.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/best_model_87192.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/checkpoint_130000.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/checkpoint_135000.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/config.json +0 -496
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/language_ids.json +0 -15
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/speakers.pth +0 -3
Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/trainer_0_log.txt +0 -3
Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/{checkpoint_185000.pth → checkpoint_195000.pth} +1 -1
Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/trainer_0_log.txt +2 -2
Experiments/train_syntacc_baseline.py +1 -1

Experiments/nohup.out CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d10475e5d035b7e6fcf8289e9069f3dd25e6285616e228cbd23ff95b48dba11
-size 18092959

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f3ff491be1a22770ad6be06a4ab637e3ee1fdd7ab56a46d56b6ee5ce294191a
+size 19098782

Experiments/run/events.out.tfevents.1706462806.edresson-train-80.145564.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce36d2c723c356665a705554a83e2b8142863730587e4f854c58a08781c9696c
-size 573377595

 version https://git-lfs.github.com/spec/v1
+oid sha256:edf473f639006f00be06083dcda982e19ad249445299bba3ccfa9d3c3be668c9
+size 603478571

Experiments/{runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/trainer_0_log.txt → run/events.out.tfevents.1706899297.edresson-train-80-3.1052.0} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9eb020abfc0ef9798a6097596138d1567d58429ca6c2ce6e59b350acc5301cff
-size 1771305

 version https://git-lfs.github.com/spec/v1
+oid sha256:f92cb9921885f7784782d7c4cf4983bd9ebf92511857b363ad6c4a213d77e7fb
+size 1426573

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/best_model.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a4a050e0d7a9c6c302b70b3f59dc195b12ad8922988de81bae55cbc1a89b9c8
-size 347719275

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a8ca0385eb8c2d74471a308ead9447f46334969a793ff980a527783b55f6571
+size 347720178

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/best_model_85001.pth → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/best_model_195001.pth} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a4a050e0d7a9c6c302b70b3f59dc195b12ad8922988de81bae55cbc1a89b9c8
-size 347719275

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a8ca0385eb8c2d74471a308ead9447f46334969a793ff980a527783b55f6571
+size 347720178

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/config.json RENAMED Viewed

@@ -397,16 +397,16 @@
         ],
         "use_sdp": true,
         "noise_scale": 1.0,
-        "inference_noise_scale": 0.667,
         "length_scale": 1,
         "noise_scale_dp": 1.0,
-        "inference_noise_scale_dp": 1.0,
         "max_inference_len": null,
         "init_discriminator": true,
         "use_spectral_norm_disriminator": false,
         "use_speaker_embedding": false,
         "num_speakers": 0,
-        "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/speakers.pth",
         "d_vector_file": [
             "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
             "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
@@ -429,7 +429,7 @@
         "use_language_embedding": true,
         "embedded_language_dim": 4,
         "num_languages": 0,
-        "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/language_ids.json",
         "use_speaker_encoder_as_loss": false,
         "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
         "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
@@ -472,9 +472,9 @@
     "r": 1,
     "num_speakers": 0,
     "use_speaker_embedding": false,
-    "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/speakers.pth",
     "speaker_embedding_channels": 256,
-    "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/language_ids.json",
     "use_language_embedding": true,
     "use_d_vector_file": true,
     "d_vector_file": [

         ],
         "use_sdp": true,
         "noise_scale": 1.0,
+        "inference_noise_scale": 0.33,
         "length_scale": 1,
         "noise_scale_dp": 1.0,
+        "inference_noise_scale_dp": 0.33,
         "max_inference_len": null,
         "init_discriminator": true,
         "use_spectral_norm_disriminator": false,
         "use_speaker_embedding": false,
         "num_speakers": 0,
+        "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/speakers.pth",
         "d_vector_file": [
             "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
             "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
         "use_language_embedding": true,
         "embedded_language_dim": 4,
         "num_languages": 0,
+        "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/language_ids.json",
         "use_speaker_encoder_as_loss": false,
         "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
         "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
     "r": 1,
     "num_speakers": 0,
     "use_speaker_embedding": false,
+    "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/speakers.pth",
     "speaker_embedding_channels": 256,
+    "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a/language_ids.json",
     "use_language_embedding": true,
     "use_d_vector_file": true,
     "d_vector_file": [

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/language_ids.json RENAMED Viewed

File without changes

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/speakers.pth RENAMED Viewed

File without changes

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/train_syntacc_baseline.py RENAMED Viewed

@@ -28,7 +28,7 @@ RUN_NAME = "YourTTS-Baseline-PT"
 OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT-January-25-2024_02+59PM-0000000/checkpoint_85000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False
@@ -221,6 +221,8 @@ audio_config = VitsAudioConfig(
 # Init VITSArgs setting the arguments that are needed for the YourTTS model
 model_args = VitsArgs(
     spec_segment_size=62,
     hidden_channels=192,
     hidden_channels_ffn_text_encoder=768,

 OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
+RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/checkpoint_195000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False
 # Init VITSArgs setting the arguments that are needed for the YourTTS model
 model_args = VitsArgs(
+    inference_noise_scale=0.33,
+    inference_noise_scale_dp=0.33,
     spec_segment_size=62,
     hidden_channels=192,
     hidden_channels_ffn_text_encoder=768,

Experiments/runs/{YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9 → YourTTS-Baseline-PT-February-02-2024_03+41PM-a1d8f544a}/trainer_0_log.txt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94c095ee47fd6e763ee0e129a7728cf80e5e4f21301e767ab0141c478d369b89
-size 128993

 version https://git-lfs.github.com/spec/v1
+oid sha256:800fa1ba79843ee3494b41dbc8ffa45c6f147a7eb369e72260cbc0a5ce75dd72
+size 135592

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/best_model.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c62e29c7a1dd4f701ab4998e0b1f569cfe7486cc7806f149c1ff857f172383e0
-size 1043220702

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/best_model_124752.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c62e29c7a1dd4f701ab4998e0b1f569cfe7486cc7806f149c1ff857f172383e0
-size 1043220702

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/checkpoint_130000.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a71ead47e605fc525b264ad882fd54630c15a42eb69aaf88993d26d5ea84ae3b
-size 1043220766

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/checkpoint_135000.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:96e16ee83729813041c17f6edf8a702bdf59e7afe345cfad1fe65dd4ba0b1fce
-size 1043220766

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+00PM-e3c7cbd05/train_syntacc_baseline.py DELETED Viewed

@@ -1,352 +0,0 @@
-import os
-import torch
-from trainer import Trainer, TrainerArgs
-from TTS.bin.compute_embeddings import compute_embeddings
-from TTS.bin.resample import resample_files
-from TTS.config.shared_configs import BaseDatasetConfig
-from TTS.tts.configs.vits_config import VitsConfig
-from TTS.tts.datasets import load_tts_samples
-from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs, VitsAudioConfig, VitsDataset
-from TTS.utils.downloaders import download_libri_tts
-from torch.utils.data import DataLoader
-from TTS.utils.samplers import PerfectBatchSampler
-torch.set_num_threads(24)
-# pylint: disable=W0105
-"""
-    This recipe replicates the first experiment proposed in the CML-TTS paper (https://arxiv.org/abs/2306.10097). It uses the YourTTS model.
-    YourTTS model is based on the VITS model however it uses external speaker embeddings extracted from a pre-trained speaker encoder and has small architecture changes.
-"""
-CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
-# Name of the run for the Trainer
-RUN_NAME = "YourTTS-Baseline-PT"
-# Path where you want to save the models outputs (configs, checkpoints and tensorboard logs)
-OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
-# If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT-January-25-2024_02+59PM-0000000/checkpoint_85000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-# This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
-SKIP_TRAIN_EPOCH = False
-# Set here the batch size to be used in training and evaluation
-BATCH_SIZE = 26
-# Training Sampling rate and the target sampling rate for resampling the downloaded dataset (Note: If you change this you might need to redownload the dataset !!)
-# Note: If you add new datasets, please make sure that the dataset sampling rate and this parameter are matching, otherwise resample your audios
-SAMPLE_RATE = 16000
-DASHBOARD_LOGGER="tensorboard"
-LOGGER_URI = None
-DASHBOARD_LOGGER = "clearml"
-LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/YourTTS/MUPE/"
-# Max audio length in seconds to be used in training (every audio bigger than it will be ignored)
-MAX_AUDIO_LEN_IN_SECONDS = float("inf")
-# Define here the datasets config
-brpb_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpb.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpb"
-)
-brba_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brba.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brba"
-)
-brportugal_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brportugal.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brportugal"
-)
-brsp_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brsp.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brsp"
-)
-brpe_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpe.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpe"
-)
-brmg_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brmg.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brmg"
-)
-brrj_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brrj.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brrj"
-)
-brce_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brce.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brce"
-)
-brrs_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brrs.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brrs"
-)
-bralemanha_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bralemanha.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bralemanha"
-)
-brgo_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brgo.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brgo"
-)
-bral_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bral.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bral"
-)
-brpr_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpr.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpr"
-)
-bres_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bres.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bres"
-)
-brpi_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpi.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpi"
-)
-# bres_train_config, brpi_train_config  no files found
-DATASETS_CONFIG_LIST = [brpb_train_config,brba_train_config,brportugal_train_config,brsp_train_config,brpe_train_config,brmg_train_config,brrj_train_config,brce_train_config,brrs_train_config,bralemanha_train_config,brgo_train_config,bral_train_config,brpr_train_config]
-### Extract speaker embeddings
-SPEAKER_ENCODER_CHECKPOINT_PATH = (
-    "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar"
-)
-SPEAKER_ENCODER_CONFIG_PATH = "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json"
-D_VECTOR_FILES = []  # List of speaker embeddings/d-vectors to be used during the training
-# Iterates all the dataset configs checking if the speakers embeddings are already computated, if not compute it
-for dataset_conf in DATASETS_CONFIG_LIST:
-    # Check if the embeddings weren't already computed, if not compute it
-    embeddings_file = os.path.join(dataset_conf.path, f"H_ASP_speaker_embeddings_{dataset_conf.language}.pth")
-    if not os.path.isfile(embeddings_file):
-        print(f">>> Computing the speaker embeddings for the {dataset_conf.dataset_name} dataset")
-        compute_embeddings(
-            SPEAKER_ENCODER_CHECKPOINT_PATH,
-            SPEAKER_ENCODER_CONFIG_PATH,
-            embeddings_file,
-            old_speakers_file=None,
-            config_dataset_path=None,
-            formatter_name=dataset_conf.formatter,
-            dataset_name=dataset_conf.dataset_name,
-            dataset_path=dataset_conf.path,
-            meta_file_train=dataset_conf.meta_file_train,
-            meta_file_val=dataset_conf.meta_file_val,
-            disable_cuda=False,
-            no_eval=False,
-        )
-    D_VECTOR_FILES.append(embeddings_file)
-# Audio config used in training.
-audio_config = VitsAudioConfig(
-    sample_rate=SAMPLE_RATE,
-    hop_length=256,
-    win_length=1024,
-    fft_size=1024,
-    mel_fmin=0.0,
-    mel_fmax=None,
-    num_mels=80,
-)
-# Init VITSArgs setting the arguments that are needed for the YourTTS model
-model_args = VitsArgs(
-    spec_segment_size=62,
-    hidden_channels=192,
-    hidden_channels_ffn_text_encoder=768,
-    num_heads_text_encoder=2,
-    num_layers_text_encoder=10,
-    kernel_size_text_encoder=3,
-    dropout_p_text_encoder=0.1,
-    d_vector_file=D_VECTOR_FILES,
-    use_d_vector_file=True,
-    d_vector_dim=512,
-    speaker_encoder_model_path=SPEAKER_ENCODER_CHECKPOINT_PATH,
-    speaker_encoder_config_path=SPEAKER_ENCODER_CONFIG_PATH,
-    resblock_type_decoder="2",  # In the paper, we accidentally trained the YourTTS using ResNet blocks type 2, if you like you can use the ResNet blocks type 1 like the VITS model
-    # Useful parameters to enable the Speaker Consistency Loss (SCL) described in the paper
-    use_speaker_encoder_as_loss=False,
-    # Useful parameters to enable multilingual training
-    use_language_embedding=True,
-    embedded_language_dim=4,
-    use_adaptive_weight_text_encoder=False,
-    use_perfect_class_batch_sampler=True,
-    perfect_class_batch_sampler_key="language"
-)
-# General training config, here you can change the batch size and others useful parameters
-config = VitsConfig(
-    output_path=OUT_PATH,
-    model_args=model_args,
-    run_name=RUN_NAME,
-    project_name="SYNTACC",
-    run_description="""
-            - YourTTS with SYNTACC text encoder
-        """,
-    dashboard_logger=DASHBOARD_LOGGER,
-    logger_uri=LOGGER_URI,
-    audio=audio_config,
-    batch_size=BATCH_SIZE,
-    batch_group_size=48,
-    eval_batch_size=BATCH_SIZE,
-    num_loader_workers=8,
-    eval_split_max_size=256,
-    print_step=50,
-    plot_step=100,
-    log_model_step=1000,
-    save_step=5000,
-    save_n_checkpoints=2,
-    save_checkpoints=True,
-    # target_loss="loss_1",
-    print_eval=False,
-    use_phonemes=False,
-    phonemizer="espeak",
-    phoneme_language="en",
-    compute_input_seq_cache=True,
-    add_blank=True,
-    text_cleaner="multilingual_cleaners",
-    characters=CharactersConfig(
-        characters_class="TTS.tts.models.vits.VitsCharacters",
-        pad="_",
-        eos="&",
-        bos="*",
-        blank=None,
-        characters="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00a1\u00a3\u00b7\u00b8\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d9\u00da\u00db\u00dc\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u0101\u0104\u0105\u0106\u0107\u010b\u0119\u0141\u0142\u0143\u0144\u0152\u0153\u015a\u015b\u0161\u0178\u0179\u017a\u017b\u017c\u020e\u04e7\u05c2\u1b20",
-        punctuations="\u2014!'(),-.:;?\u00bf ",
-        phonemes="iy\u0268\u0289\u026fu\u026a\u028f\u028ae\u00f8\u0258\u0259\u0275\u0264o\u025b\u0153\u025c\u025e\u028c\u0254\u00e6\u0250a\u0276\u0251\u0252\u1d7b\u0298\u0253\u01c0\u0257\u01c3\u0284\u01c2\u0260\u01c1\u029bpbtd\u0288\u0256c\u025fk\u0261q\u0262\u0294\u0274\u014b\u0272\u0273n\u0271m\u0299r\u0280\u2c71\u027e\u027d\u0278\u03b2fv\u03b8\u00f0sz\u0283\u0292\u0282\u0290\u00e7\u029dx\u0263\u03c7\u0281\u0127\u0295h\u0266\u026c\u026e\u028b\u0279\u027bj\u0270l\u026d\u028e\u029f\u02c8\u02cc\u02d0\u02d1\u028dw\u0265\u029c\u02a2\u02a1\u0255\u0291\u027a\u0267\u025a\u02de\u026b'\u0303' ",
-        is_unique=True,
-        is_sorted=True,
-    ),
-    phoneme_cache_path=None,
-    precompute_num_workers=12,
-    start_by_longest=True,
-    datasets=DATASETS_CONFIG_LIST,
-    cudnn_benchmark=False,
-    max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
-    mixed_precision=False,
-    test_sentences=[
-        #GUSTAVO: apenas pessoas do treino
-        ["Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.", "EDILEINE_FONSECA", None, "brsp"],
-        ["Quem semeia ventos, colhe tempestades.", "JOSE_PAULO_DE_ARAUJO", None, "brpb"],
-        ["O olho do dono \u00e9 que engorda o gado.", "VITOR_RAFAEL_OLIVEIRA_ALVES", None, "brba"],
-        ["\u00c1gua mole em pedra dura, tanto bate at\u00e9 que fura.", "MARIA_AURORA_FELIX", None, "brportugal"],
-        ["Quem espera sempre alcan\u00e7a.", "ANTONIO_DE_AMORIM_COSTA", None, "brpe"],
-        ["Cada macaco no seu galho.", "ALCIDES_DE_LIMA", None, "brmg"],
-        ["Em terra de cego, quem tem um olho \u00e9 rei.", "ALUISIO_SOARES_DE_SOUSA", None, "brrj"],
-        ["A ocasi\u00e3o faz o ladr\u00e3o.", "FRANCISCO_JOSE_MOREIRA_MOTA", None, "brce"],
-        ["De gr\u00e3o em gr\u00e3o, a galinha enche o papo.", "EVALDO_ANDRADA_CORREA", None, "brrs"],
-        ["Mais vale um p\u00c1ssaro na m\u00e3o do que dois voando.", "DORIS_ALEXANDER", None, "bralemanha"],
-        ["Quem n\u00e3o arrisca, n\u00e3o petisca.", "DONALDO_LUIZ_DE_ALMEIDA", None, "brgo"],
-        ["A uni\u00e3o faz a for\u00e7a.", "GERONCIO_HENRIQUE_NETO", None, "bral"],
-        ["Em boca fechada n\u00e3o entra mosquito.", "MALU_NATEL_FREIRE_WEBER", None, "brpr"],
-        # ["Quem n\u00e3o tem dinheiro, n\u00e3o tem v\u00edcios.", "INES_VIEIRA_BOGEA", None, "bres"],
-        # ["Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.", "MARIA_ASSUNCAO_SOUSA", None, "brpi"]
-    ],
-    # Enable the weighted sampler
-    use_weighted_sampler=True,
-    # Ensures that all speakers are seen in the training batch equally no matter how many samples each speaker has
-    # weighted_sampler_attrs={"language": 1.0, "speaker_name": 1.0},
-    weighted_sampler_attrs={"language": 1.0},
-    weighted_sampler_multipliers={
-        # "speaker_name": {
-        # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
-        # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
-        # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
-        # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
-        # }
-    },
-    # It defines the Speaker Consistency Loss (SCL) α to 9 like the YourTTS paper
-    speaker_encoder_loss_alpha=9.0,
-)
-# Load all the datasets samples and split traning and evaluation sets
-train_samples, eval_samples = load_tts_samples(
-    config.datasets,
-    eval_split=True,
-    eval_split_max_size=config.eval_split_max_size,
-    eval_split_size=config.eval_split_size,
-)
-# Init the model
-model = Vits.init_from_config(config)
-# Init the trainer and 🚀
-trainer = Trainer(
-    TrainerArgs(restore_path=RESTORE_PATH, skip_train_epoch=SKIP_TRAIN_EPOCH, start_with_eval=True),
-    config,
-    output_path=OUT_PATH,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples,
-)
-trainer.fit()

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/config.json DELETED Viewed

@@ -1,496 +0,0 @@
-{
-    "output_path": "/raid/datasets/MUPE/Experiments/runs",
-    "logger_uri": "s3://coqui-ai-models/TTS/Checkpoints/YourTTS/MUPE/",
-    "run_name": "YourTTS-Baseline-PT",
-    "project_name": "SYNTACC",
-    "run_description": "\n            - YourTTS with SYNTACC text encoder\n        ",
-    "print_step": 50,
-    "plot_step": 100,
-    "model_param_stats": false,
-    "wandb_entity": null,
-    "dashboard_logger": "clearml",
-    "save_on_interrupt": true,
-    "log_model_step": 1000,
-    "save_step": 5000,
-    "save_n_checkpoints": 2,
-    "save_checkpoints": true,
-    "save_all_best": false,
-    "save_best_after": 10000,
-    "target_loss": null,
-    "print_eval": false,
-    "test_delay_epochs": 0,
-    "run_eval": true,
-    "run_eval_steps": null,
-    "distributed_backend": "nccl",
-    "distributed_url": "tcp://localhost:54321",
-    "mixed_precision": false,
-    "precision": "fp16",
-    "epochs": 1000,
-    "batch_size": 26,
-    "eval_batch_size": 26,
-    "grad_clip": [
-        1000,
-        1000
-    ],
-    "scheduler_after_epoch": true,
-    "lr": 0.001,
-    "optimizer": "AdamW",
-    "optimizer_params": {
-        "betas": [
-            0.8,
-            0.99
-        ],
-        "eps": 1e-09,
-        "weight_decay": 0.01
-    },
-    "lr_scheduler": null,
-    "lr_scheduler_params": {},
-    "use_grad_scaler": false,
-    "allow_tf32": false,
-    "cudnn_enable": true,
-    "cudnn_deterministic": false,
-    "cudnn_benchmark": false,
-    "training_seed": 54321,
-    "model": "vits",
-    "num_loader_workers": 8,
-    "num_eval_loader_workers": 0,
-    "use_noise_augment": false,
-    "audio": {
-        "fft_size": 1024,
-        "sample_rate": 16000,
-        "win_length": 1024,
-        "hop_length": 256,
-        "num_mels": 80,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "use_phonemes": false,
-    "phonemizer": "espeak",
-    "phoneme_language": "en",
-    "compute_input_seq_cache": true,
-    "text_cleaner": "multilingual_cleaners",
-    "enable_eos_bos_chars": false,
-    "test_sentences_file": "",
-    "phoneme_cache_path": null,
-    "characters": {
-        "characters_class": "TTS.tts.models.vits.VitsCharacters",
-        "vocab_dict": null,
-        "pad": "_",
-        "eos": "&",
-        "bos": "*",
-        "blank": null,
-        "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00a1\u00a3\u00b7\u00b8\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d9\u00da\u00db\u00dc\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u0101\u0104\u0105\u0106\u0107\u010b\u0119\u0141\u0142\u0143\u0144\u0152\u0153\u015a\u015b\u0161\u0178\u0179\u017a\u017b\u017c\u020e\u04e7\u05c2\u1b20",
-        "punctuations": "\u2014!'(),-.:;?\u00bf ",
-        "phonemes": "iy\u0268\u0289\u026fu\u026a\u028f\u028ae\u00f8\u0258\u0259\u0275\u0264o\u025b\u0153\u025c\u025e\u028c\u0254\u00e6\u0250a\u0276\u0251\u0252\u1d7b\u0298\u0253\u01c0\u0257\u01c3\u0284\u01c2\u0260\u01c1\u029bpbtd\u0288\u0256c\u025fk\u0261q\u0262\u0294\u0274\u014b\u0272\u0273n\u0271m\u0299r\u0280\u2c71\u027e\u027d\u0278\u03b2fv\u03b8\u00f0sz\u0283\u0292\u0282\u0290\u00e7\u029dx\u0263\u03c7\u0281\u0127\u0295h\u0266\u026c\u026e\u028b\u0279\u027bj\u0270l\u026d\u028e\u029f\u02c8\u02cc\u02d0\u02d1\u028dw\u0265\u029c\u02a2\u02a1\u0255\u0291\u027a\u0267\u025a\u02de\u026b'\u0303' ",
-        "is_unique": true,
-        "is_sorted": true
-    },
-    "add_blank": true,
-    "batch_group_size": 48,
-    "loss_masking": null,
-    "min_audio_len": 1,
-    "max_audio_len": Infinity,
-    "min_text_len": 1,
-    "max_text_len": Infinity,
-    "compute_f0": false,
-    "compute_energy": false,
-    "compute_linear_spec": true,
-    "precompute_num_workers": 12,
-    "start_by_longest": true,
-    "shuffle": false,
-    "drop_last": false,
-    "datasets": [
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpb.csv",
-            "ignored_speakers": null,
-            "language": "brpb",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brba.csv",
-            "ignored_speakers": null,
-            "language": "brba",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brportugal.csv",
-            "ignored_speakers": null,
-            "language": "brportugal",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brsp.csv",
-            "ignored_speakers": null,
-            "language": "brsp",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpe.csv",
-            "ignored_speakers": null,
-            "language": "brpe",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brmg.csv",
-            "ignored_speakers": null,
-            "language": "brmg",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brrj.csv",
-            "ignored_speakers": null,
-            "language": "brrj",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brce.csv",
-            "ignored_speakers": null,
-            "language": "brce",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brrs.csv",
-            "ignored_speakers": null,
-            "language": "brrs",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_bralemanha.csv",
-            "ignored_speakers": null,
-            "language": "bralemanha",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brgo.csv",
-            "ignored_speakers": null,
-            "language": "brgo",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_bral.csv",
-            "ignored_speakers": null,
-            "language": "bral",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpr.csv",
-            "ignored_speakers": null,
-            "language": "brpr",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        }
-    ],
-    "test_sentences": [
-        [
-            "Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.",
-            "EDILEINE_FONSECA",
-            null,
-            "brsp"
-        ],
-        [
-            "Quem semeia ventos, colhe tempestades.",
-            "JOSE_PAULO_DE_ARAUJO",
-            null,
-            "brpb"
-        ],
-        [
-            "O olho do dono \u00e9 que engorda o gado.",
-            "VITOR_RAFAEL_OLIVEIRA_ALVES",
-            null,
-            "brba"
-        ],
-        [
-            "\u00c1gua mole em pedra dura, tanto bate at\u00e9 que fura.",
-            "MARIA_AURORA_FELIX",
-            null,
-            "brportugal"
-        ],
-        [
-            "Quem espera sempre alcan\u00e7a.",
-            "ANTONIO_DE_AMORIM_COSTA",
-            null,
-            "brpe"
-        ],
-        [
-            "Cada macaco no seu galho.",
-            "ALCIDES_DE_LIMA",
-            null,
-            "brmg"
-        ],
-        [
-            "Em terra de cego, quem tem um olho \u00e9 rei.",
-            "ALUISIO_SOARES_DE_SOUSA",
-            null,
-            "brrj"
-        ],
-        [
-            "A ocasi\u00e3o faz o ladr\u00e3o.",
-            "FRANCISCO_JOSE_MOREIRA_MOTA",
-            null,
-            "brce"
-        ],
-        [
-            "De gr\u00e3o em gr\u00e3o, a galinha enche o papo.",
-            "EVALDO_ANDRADA_CORREA",
-            null,
-            "brrs"
-        ],
-        [
-            "Mais vale um p\u00c1ssaro na m\u00e3o do que dois voando.",
-            "DORIS_ALEXANDER",
-            null,
-            "bralemanha"
-        ],
-        [
-            "Quem n\u00e3o arrisca, n\u00e3o petisca.",
-            "DONALDO_LUIZ_DE_ALMEIDA",
-            null,
-            "brgo"
-        ],
-        [
-            "A uni\u00e3o faz a for\u00e7a.",
-            "GERONCIO_HENRIQUE_NETO",
-            null,
-            "bral"
-        ],
-        [
-            "Em boca fechada n\u00e3o entra mosquito.",
-            "MALU_NATEL_FREIRE_WEBER",
-            null,
-            "brpr"
-        ]
-    ],
-    "eval_split_max_size": 256,
-    "eval_split_size": 0.01,
-    "use_speaker_weighted_sampler": false,
-    "speaker_weighted_sampler_alpha": 1.0,
-    "use_language_weighted_sampler": false,
-    "language_weighted_sampler_alpha": 1.0,
-    "use_length_weighted_sampler": false,
-    "length_weighted_sampler_alpha": 1.0,
-    "model_args": {
-        "num_chars": 266,
-        "out_channels": 513,
-        "spec_segment_size": 62,
-        "hidden_channels": 192,
-        "use_adaptive_weight_text_encoder": false,
-        "use_perfect_class_batch_sampler": true,
-        "perfect_class_batch_sampler_key": "language",
-        "hidden_channels_ffn_text_encoder": 768,
-        "num_heads_text_encoder": 2,
-        "num_layers_text_encoder": 10,
-        "kernel_size_text_encoder": 3,
-        "dropout_p_text_encoder": 0.1,
-        "dropout_p_duration_predictor": 0.5,
-        "kernel_size_posterior_encoder": 5,
-        "dilation_rate_posterior_encoder": 1,
-        "num_layers_posterior_encoder": 16,
-        "kernel_size_flow": 5,
-        "dilation_rate_flow": 1,
-        "num_layers_flow": 4,
-        "resblock_type_decoder": "2",
-        "resblock_kernel_sizes_decoder": [
-            3,
-            7,
-            11
-        ],
-        "resblock_dilation_sizes_decoder": [
-            [
-                1,
-                3,
-                5
-            ],
-            [
-                1,
-                3,
-                5
-            ],
-            [
-                1,
-                3,
-                5
-            ]
-        ],
-        "upsample_rates_decoder": [
-            8,
-            8,
-            2,
-            2
-        ],
-        "upsample_initial_channel_decoder": 512,
-        "upsample_kernel_sizes_decoder": [
-            16,
-            16,
-            4,
-            4
-        ],
-        "periods_multi_period_discriminator": [
-            2,
-            3,
-            5,
-            7,
-            11
-        ],
-        "use_sdp": true,
-        "noise_scale": 1.0,
-        "inference_noise_scale": 0.667,
-        "length_scale": 1,
-        "noise_scale_dp": 1.0,
-        "inference_noise_scale_dp": 1.0,
-        "max_inference_len": null,
-        "init_discriminator": true,
-        "use_spectral_norm_disriminator": false,
-        "use_speaker_embedding": false,
-        "num_speakers": 0,
-        "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/speakers.pth",
-        "d_vector_file": [
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brportugal.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brsp.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpe.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brmg.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrj.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brce.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrs.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bralemanha.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brgo.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bral.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpr.pth"
-        ],
-        "speaker_embedding_channels": 256,
-        "use_d_vector_file": true,
-        "d_vector_dim": 512,
-        "detach_dp_input": true,
-        "use_language_embedding": true,
-        "embedded_language_dim": 4,
-        "num_languages": 0,
-        "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/language_ids.json",
-        "use_speaker_encoder_as_loss": false,
-        "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
-        "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
-        "condition_dp_on_speaker": true,
-        "freeze_encoder": false,
-        "freeze_DP": false,
-        "freeze_PE": false,
-        "freeze_flow_decoder": false,
-        "freeze_waveform_decoder": false,
-        "encoder_sample_rate": null,
-        "interpolate_z": true,
-        "reinit_DP": false,
-        "reinit_text_encoder": false
-    },
-    "lr_gen": 0.0002,
-    "lr_disc": 0.0002,
-    "lr_scheduler_gen": "ExponentialLR",
-    "lr_scheduler_gen_params": {
-        "gamma": 0.999875,
-        "last_epoch": -1
-    },
-    "lr_scheduler_disc": "ExponentialLR",
-    "lr_scheduler_disc_params": {
-        "gamma": 0.999875,
-        "last_epoch": -1
-    },
-    "kl_loss_alpha": 1.0,
-    "disc_loss_alpha": 1.0,
-    "gen_loss_alpha": 1.0,
-    "feat_loss_alpha": 1.0,
-    "mel_loss_alpha": 45.0,
-    "dur_loss_alpha": 1.0,
-    "speaker_encoder_loss_alpha": 9.0,
-    "return_wav": true,
-    "use_weighted_sampler": true,
-    "weighted_sampler_attrs": {
-        "language": 1.0
-    },
-    "weighted_sampler_multipliers": {},
-    "r": 1,
-    "num_speakers": 0,
-    "use_speaker_embedding": false,
-    "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/speakers.pth",
-    "speaker_embedding_channels": 256,
-    "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/language_ids.json",
-    "use_language_embedding": true,
-    "use_d_vector_file": true,
-    "d_vector_file": [
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brportugal.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brsp.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpe.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brmg.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrj.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brce.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrs.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bralemanha.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brgo.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bral.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpr.pth"
-    ],
-    "d_vector_dim": 512
-}

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/language_ids.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-    "bral": 0,
-    "bralemanha": 1,
-    "brba": 2,
-    "brce": 3,
-    "brgo": 4,
-    "brmg": 5,
-    "brpb": 6,
-    "brpe": 7,
-    "brportugal": 8,
-    "brpr": 9,
-    "brrj": 10,
-    "brrs": 11,
-    "brsp": 12
-}

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/speakers.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d0b8d8013199105bfba41bbef0ac6c7fc44ecb3385a39980da80931496c039bf
-size 3296

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+04PM-2bc0892f9/train_syntacc_baseline.py DELETED Viewed

@@ -1,352 +0,0 @@
-import os
-import torch
-from trainer import Trainer, TrainerArgs
-from TTS.bin.compute_embeddings import compute_embeddings
-from TTS.bin.resample import resample_files
-from TTS.config.shared_configs import BaseDatasetConfig
-from TTS.tts.configs.vits_config import VitsConfig
-from TTS.tts.datasets import load_tts_samples
-from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs, VitsAudioConfig, VitsDataset
-from TTS.utils.downloaders import download_libri_tts
-from torch.utils.data import DataLoader
-from TTS.utils.samplers import PerfectBatchSampler
-torch.set_num_threads(24)
-# pylint: disable=W0105
-"""
-    This recipe replicates the first experiment proposed in the CML-TTS paper (https://arxiv.org/abs/2306.10097). It uses the YourTTS model.
-    YourTTS model is based on the VITS model however it uses external speaker embeddings extracted from a pre-trained speaker encoder and has small architecture changes.
-"""
-CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
-# Name of the run for the Trainer
-RUN_NAME = "YourTTS-Baseline-PT"
-# Path where you want to save the models outputs (configs, checkpoints and tensorboard logs)
-OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
-# If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT-January-25-2024_02+59PM-0000000/checkpoint_85000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-# This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
-SKIP_TRAIN_EPOCH = False
-# Set here the batch size to be used in training and evaluation
-BATCH_SIZE = 26
-# Training Sampling rate and the target sampling rate for resampling the downloaded dataset (Note: If you change this you might need to redownload the dataset !!)
-# Note: If you add new datasets, please make sure that the dataset sampling rate and this parameter are matching, otherwise resample your audios
-SAMPLE_RATE = 16000
-DASHBOARD_LOGGER="tensorboard"
-LOGGER_URI = None
-DASHBOARD_LOGGER = "clearml"
-LOGGER_URI = "s3://coqui-ai-models/TTS/Checkpoints/YourTTS/MUPE/"
-# Max audio length in seconds to be used in training (every audio bigger than it will be ignored)
-MAX_AUDIO_LEN_IN_SECONDS = float("inf")
-# Define here the datasets config
-brpb_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpb.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpb"
-)
-brba_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brba.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brba"
-)
-brportugal_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brportugal.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brportugal"
-)
-brsp_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brsp.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brsp"
-)
-brpe_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpe.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpe"
-)
-brmg_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brmg.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brmg"
-)
-brrj_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brrj.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brrj"
-)
-brce_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brce.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brce"
-)
-brrs_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brrs.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brrs"
-)
-bralemanha_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bralemanha.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bralemanha"
-)
-brgo_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brgo.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brgo"
-)
-bral_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bral.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bral"
-)
-brpr_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpr.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpr"
-)
-bres_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_bres.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="bres"
-)
-brpi_train_config = BaseDatasetConfig(
-    formatter="coqui",
-    dataset_name="mupe",
-    meta_file_train="metadata_coqui_brpi.csv",
-    path="/raid/datasets/MUPE/dataset/mupe/",
-    language="brpi"
-)
-# bres_train_config, brpi_train_config  no files found
-DATASETS_CONFIG_LIST = [brpb_train_config,brba_train_config,brportugal_train_config,brsp_train_config,brpe_train_config,brmg_train_config,brrj_train_config,brce_train_config,brrs_train_config,bralemanha_train_config,brgo_train_config,bral_train_config,brpr_train_config]
-### Extract speaker embeddings
-SPEAKER_ENCODER_CHECKPOINT_PATH = (
-    "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar"
-)
-SPEAKER_ENCODER_CONFIG_PATH = "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json"
-D_VECTOR_FILES = []  # List of speaker embeddings/d-vectors to be used during the training
-# Iterates all the dataset configs checking if the speakers embeddings are already computated, if not compute it
-for dataset_conf in DATASETS_CONFIG_LIST:
-    # Check if the embeddings weren't already computed, if not compute it
-    embeddings_file = os.path.join(dataset_conf.path, f"H_ASP_speaker_embeddings_{dataset_conf.language}.pth")
-    if not os.path.isfile(embeddings_file):
-        print(f">>> Computing the speaker embeddings for the {dataset_conf.dataset_name} dataset")
-        compute_embeddings(
-            SPEAKER_ENCODER_CHECKPOINT_PATH,
-            SPEAKER_ENCODER_CONFIG_PATH,
-            embeddings_file,
-            old_speakers_file=None,
-            config_dataset_path=None,
-            formatter_name=dataset_conf.formatter,
-            dataset_name=dataset_conf.dataset_name,
-            dataset_path=dataset_conf.path,
-            meta_file_train=dataset_conf.meta_file_train,
-            meta_file_val=dataset_conf.meta_file_val,
-            disable_cuda=False,
-            no_eval=False,
-        )
-    D_VECTOR_FILES.append(embeddings_file)
-# Audio config used in training.
-audio_config = VitsAudioConfig(
-    sample_rate=SAMPLE_RATE,
-    hop_length=256,
-    win_length=1024,
-    fft_size=1024,
-    mel_fmin=0.0,
-    mel_fmax=None,
-    num_mels=80,
-)
-# Init VITSArgs setting the arguments that are needed for the YourTTS model
-model_args = VitsArgs(
-    spec_segment_size=62,
-    hidden_channels=192,
-    hidden_channels_ffn_text_encoder=768,
-    num_heads_text_encoder=2,
-    num_layers_text_encoder=10,
-    kernel_size_text_encoder=3,
-    dropout_p_text_encoder=0.1,
-    d_vector_file=D_VECTOR_FILES,
-    use_d_vector_file=True,
-    d_vector_dim=512,
-    speaker_encoder_model_path=SPEAKER_ENCODER_CHECKPOINT_PATH,
-    speaker_encoder_config_path=SPEAKER_ENCODER_CONFIG_PATH,
-    resblock_type_decoder="2",  # In the paper, we accidentally trained the YourTTS using ResNet blocks type 2, if you like you can use the ResNet blocks type 1 like the VITS model
-    # Useful parameters to enable the Speaker Consistency Loss (SCL) described in the paper
-    use_speaker_encoder_as_loss=False,
-    # Useful parameters to enable multilingual training
-    use_language_embedding=True,
-    embedded_language_dim=4,
-    use_adaptive_weight_text_encoder=False,
-    use_perfect_class_batch_sampler=True,
-    perfect_class_batch_sampler_key="language"
-)
-# General training config, here you can change the batch size and others useful parameters
-config = VitsConfig(
-    output_path=OUT_PATH,
-    model_args=model_args,
-    run_name=RUN_NAME,
-    project_name="SYNTACC",
-    run_description="""
-            - YourTTS with SYNTACC text encoder
-        """,
-    dashboard_logger=DASHBOARD_LOGGER,
-    logger_uri=LOGGER_URI,
-    audio=audio_config,
-    batch_size=BATCH_SIZE,
-    batch_group_size=48,
-    eval_batch_size=BATCH_SIZE,
-    num_loader_workers=8,
-    eval_split_max_size=256,
-    print_step=50,
-    plot_step=100,
-    log_model_step=1000,
-    save_step=5000,
-    save_n_checkpoints=2,
-    save_checkpoints=True,
-    # target_loss="loss_1",
-    print_eval=False,
-    use_phonemes=False,
-    phonemizer="espeak",
-    phoneme_language="en",
-    compute_input_seq_cache=True,
-    add_blank=True,
-    text_cleaner="multilingual_cleaners",
-    characters=CharactersConfig(
-        characters_class="TTS.tts.models.vits.VitsCharacters",
-        pad="_",
-        eos="&",
-        bos="*",
-        blank=None,
-        characters="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00a1\u00a3\u00b7\u00b8\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d9\u00da\u00db\u00dc\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u0101\u0104\u0105\u0106\u0107\u010b\u0119\u0141\u0142\u0143\u0144\u0152\u0153\u015a\u015b\u0161\u0178\u0179\u017a\u017b\u017c\u020e\u04e7\u05c2\u1b20",
-        punctuations="\u2014!'(),-.:;?\u00bf ",
-        phonemes="iy\u0268\u0289\u026fu\u026a\u028f\u028ae\u00f8\u0258\u0259\u0275\u0264o\u025b\u0153\u025c\u025e\u028c\u0254\u00e6\u0250a\u0276\u0251\u0252\u1d7b\u0298\u0253\u01c0\u0257\u01c3\u0284\u01c2\u0260\u01c1\u029bpbtd\u0288\u0256c\u025fk\u0261q\u0262\u0294\u0274\u014b\u0272\u0273n\u0271m\u0299r\u0280\u2c71\u027e\u027d\u0278\u03b2fv\u03b8\u00f0sz\u0283\u0292\u0282\u0290\u00e7\u029dx\u0263\u03c7\u0281\u0127\u0295h\u0266\u026c\u026e\u028b\u0279\u027bj\u0270l\u026d\u028e\u029f\u02c8\u02cc\u02d0\u02d1\u028dw\u0265\u029c\u02a2\u02a1\u0255\u0291\u027a\u0267\u025a\u02de\u026b'\u0303' ",
-        is_unique=True,
-        is_sorted=True,
-    ),
-    phoneme_cache_path=None,
-    precompute_num_workers=12,
-    start_by_longest=True,
-    datasets=DATASETS_CONFIG_LIST,
-    cudnn_benchmark=False,
-    max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
-    mixed_precision=False,
-    test_sentences=[
-        #GUSTAVO: apenas pessoas do treino
-        ["Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.", "EDILEINE_FONSECA", None, "brsp"],
-        ["Quem semeia ventos, colhe tempestades.", "JOSE_PAULO_DE_ARAUJO", None, "brpb"],
-        ["O olho do dono \u00e9 que engorda o gado.", "VITOR_RAFAEL_OLIVEIRA_ALVES", None, "brba"],
-        ["\u00c1gua mole em pedra dura, tanto bate at\u00e9 que fura.", "MARIA_AURORA_FELIX", None, "brportugal"],
-        ["Quem espera sempre alcan\u00e7a.", "ANTONIO_DE_AMORIM_COSTA", None, "brpe"],
-        ["Cada macaco no seu galho.", "ALCIDES_DE_LIMA", None, "brmg"],
-        ["Em terra de cego, quem tem um olho \u00e9 rei.", "ALUISIO_SOARES_DE_SOUSA", None, "brrj"],
-        ["A ocasi\u00e3o faz o ladr\u00e3o.", "FRANCISCO_JOSE_MOREIRA_MOTA", None, "brce"],
-        ["De gr\u00e3o em gr\u00e3o, a galinha enche o papo.", "EVALDO_ANDRADA_CORREA", None, "brrs"],
-        ["Mais vale um p\u00c1ssaro na m\u00e3o do que dois voando.", "DORIS_ALEXANDER", None, "bralemanha"],
-        ["Quem n\u00e3o arrisca, n\u00e3o petisca.", "DONALDO_LUIZ_DE_ALMEIDA", None, "brgo"],
-        ["A uni\u00e3o faz a for\u00e7a.", "GERONCIO_HENRIQUE_NETO", None, "bral"],
-        ["Em boca fechada n\u00e3o entra mosquito.", "MALU_NATEL_FREIRE_WEBER", None, "brpr"],
-        # ["Quem n\u00e3o tem dinheiro, n\u00e3o tem v\u00edcios.", "INES_VIEIRA_BOGEA", None, "bres"],
-        # ["Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.", "MARIA_ASSUNCAO_SOUSA", None, "brpi"]
-    ],
-    # Enable the weighted sampler
-    use_weighted_sampler=True,
-    # Ensures that all speakers are seen in the training batch equally no matter how many samples each speaker has
-    # weighted_sampler_attrs={"language": 1.0, "speaker_name": 1.0},
-    weighted_sampler_attrs={"language": 1.0},
-    weighted_sampler_multipliers={
-        # "speaker_name": {
-        # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
-        # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
-        # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
-        # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
-        # }
-    },
-    # It defines the Speaker Consistency Loss (SCL) α to 9 like the YourTTS paper
-    speaker_encoder_loss_alpha=9.0,
-)
-# Load all the datasets samples and split traning and evaluation sets
-train_samples, eval_samples = load_tts_samples(
-    config.datasets,
-    eval_split=True,
-    eval_split_max_size=config.eval_split_max_size,
-    eval_split_size=config.eval_split_size,
-)
-# Init the model
-model = Vits.init_from_config(config)
-# Init the trainer and 🚀
-trainer = Trainer(
-    TrainerArgs(restore_path=RESTORE_PATH, skip_train_epoch=SKIP_TRAIN_EPOCH, start_with_eval=True),
-    config,
-    output_path=OUT_PATH,
-    model=model,
-    train_samples=train_samples,
-    eval_samples=eval_samples,
-)
-trainer.fit()

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/best_model.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a082ddde12d21020f66a70cf05a74826488d10008a8379b699458d92509e85d1
-size 1043216142

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/best_model_87192.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a082ddde12d21020f66a70cf05a74826488d10008a8379b699458d92509e85d1
-size 1043216142

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/checkpoint_130000.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5a584eb832a857f9a11180b34a84b81117d8690ed1e5fa39e4ff711cf6ffd7f7
-size 1043220766

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/checkpoint_135000.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:367ac46477805942658a7a78e8cf473409537967f9382a46249a8d11521ed3f9
-size 1043220766

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/config.json DELETED Viewed

@@ -1,496 +0,0 @@
-{
-    "output_path": "/raid/datasets/MUPE/Experiments/runs",
-    "logger_uri": "s3://coqui-ai-models/TTS/Checkpoints/YourTTS/MUPE/",
-    "run_name": "YourTTS-Baseline-PT",
-    "project_name": "SYNTACC",
-    "run_description": "\n            - YourTTS with SYNTACC text encoder\n        ",
-    "print_step": 50,
-    "plot_step": 100,
-    "model_param_stats": false,
-    "wandb_entity": null,
-    "dashboard_logger": "clearml",
-    "save_on_interrupt": true,
-    "log_model_step": 1000,
-    "save_step": 5000,
-    "save_n_checkpoints": 2,
-    "save_checkpoints": true,
-    "save_all_best": false,
-    "save_best_after": 10000,
-    "target_loss": null,
-    "print_eval": false,
-    "test_delay_epochs": 0,
-    "run_eval": true,
-    "run_eval_steps": null,
-    "distributed_backend": "nccl",
-    "distributed_url": "tcp://localhost:54321",
-    "mixed_precision": false,
-    "precision": "fp16",
-    "epochs": 1000,
-    "batch_size": 26,
-    "eval_batch_size": 26,
-    "grad_clip": [
-        1000,
-        1000
-    ],
-    "scheduler_after_epoch": true,
-    "lr": 0.001,
-    "optimizer": "AdamW",
-    "optimizer_params": {
-        "betas": [
-            0.8,
-            0.99
-        ],
-        "eps": 1e-09,
-        "weight_decay": 0.01
-    },
-    "lr_scheduler": null,
-    "lr_scheduler_params": {},
-    "use_grad_scaler": false,
-    "allow_tf32": false,
-    "cudnn_enable": true,
-    "cudnn_deterministic": false,
-    "cudnn_benchmark": false,
-    "training_seed": 54321,
-    "model": "vits",
-    "num_loader_workers": 8,
-    "num_eval_loader_workers": 0,
-    "use_noise_augment": false,
-    "audio": {
-        "fft_size": 1024,
-        "sample_rate": 16000,
-        "win_length": 1024,
-        "hop_length": 256,
-        "num_mels": 80,
-        "mel_fmin": 0.0,
-        "mel_fmax": null
-    },
-    "use_phonemes": false,
-    "phonemizer": "espeak",
-    "phoneme_language": "en",
-    "compute_input_seq_cache": true,
-    "text_cleaner": "multilingual_cleaners",
-    "enable_eos_bos_chars": false,
-    "test_sentences_file": "",
-    "phoneme_cache_path": null,
-    "characters": {
-        "characters_class": "TTS.tts.models.vits.VitsCharacters",
-        "vocab_dict": null,
-        "pad": "_",
-        "eos": "&",
-        "bos": "*",
-        "blank": null,
-        "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00a1\u00a3\u00b7\u00b8\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d9\u00da\u00db\u00dc\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u0101\u0104\u0105\u0106\u0107\u010b\u0119\u0141\u0142\u0143\u0144\u0152\u0153\u015a\u015b\u0161\u0178\u0179\u017a\u017b\u017c\u020e\u04e7\u05c2\u1b20",
-        "punctuations": "\u2014!'(),-.:;?\u00bf ",
-        "phonemes": "iy\u0268\u0289\u026fu\u026a\u028f\u028ae\u00f8\u0258\u0259\u0275\u0264o\u025b\u0153\u025c\u025e\u028c\u0254\u00e6\u0250a\u0276\u0251\u0252\u1d7b\u0298\u0253\u01c0\u0257\u01c3\u0284\u01c2\u0260\u01c1\u029bpbtd\u0288\u0256c\u025fk\u0261q\u0262\u0294\u0274\u014b\u0272\u0273n\u0271m\u0299r\u0280\u2c71\u027e\u027d\u0278\u03b2fv\u03b8\u00f0sz\u0283\u0292\u0282\u0290\u00e7\u029dx\u0263\u03c7\u0281\u0127\u0295h\u0266\u026c\u026e\u028b\u0279\u027bj\u0270l\u026d\u028e\u029f\u02c8\u02cc\u02d0\u02d1\u028dw\u0265\u029c\u02a2\u02a1\u0255\u0291\u027a\u0267\u025a\u02de\u026b'\u0303' ",
-        "is_unique": true,
-        "is_sorted": true
-    },
-    "add_blank": true,
-    "batch_group_size": 48,
-    "loss_masking": null,
-    "min_audio_len": 1,
-    "max_audio_len": Infinity,
-    "min_text_len": 1,
-    "max_text_len": Infinity,
-    "compute_f0": false,
-    "compute_energy": false,
-    "compute_linear_spec": true,
-    "precompute_num_workers": 12,
-    "start_by_longest": true,
-    "shuffle": false,
-    "drop_last": false,
-    "datasets": [
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpb.csv",
-            "ignored_speakers": null,
-            "language": "brpb",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brba.csv",
-            "ignored_speakers": null,
-            "language": "brba",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brportugal.csv",
-            "ignored_speakers": null,
-            "language": "brportugal",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brsp.csv",
-            "ignored_speakers": null,
-            "language": "brsp",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpe.csv",
-            "ignored_speakers": null,
-            "language": "brpe",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brmg.csv",
-            "ignored_speakers": null,
-            "language": "brmg",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brrj.csv",
-            "ignored_speakers": null,
-            "language": "brrj",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brce.csv",
-            "ignored_speakers": null,
-            "language": "brce",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brrs.csv",
-            "ignored_speakers": null,
-            "language": "brrs",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_bralemanha.csv",
-            "ignored_speakers": null,
-            "language": "bralemanha",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brgo.csv",
-            "ignored_speakers": null,
-            "language": "brgo",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_bral.csv",
-            "ignored_speakers": null,
-            "language": "bral",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        },
-        {
-            "formatter": "coqui",
-            "dataset_name": "mupe",
-            "path": "/raid/datasets/MUPE/dataset/mupe/",
-            "meta_file_train": "metadata_coqui_brpr.csv",
-            "ignored_speakers": null,
-            "language": "brpr",
-            "phonemizer": "",
-            "meta_file_val": "",
-            "meta_file_attn_mask": ""
-        }
-    ],
-    "test_sentences": [
-        [
-            "Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.",
-            "EDILEINE_FONSECA",
-            null,
-            "brsp"
-        ],
-        [
-            "Quem semeia ventos, colhe tempestades.",
-            "JOSE_PAULO_DE_ARAUJO",
-            null,
-            "brpb"
-        ],
-        [
-            "O olho do dono \u00e9 que engorda o gado.",
-            "VITOR_RAFAEL_OLIVEIRA_ALVES",
-            null,
-            "brba"
-        ],
-        [
-            "\u00c1gua mole em pedra dura, tanto bate at\u00e9 que fura.",
-            "MARIA_AURORA_FELIX",
-            null,
-            "brportugal"
-        ],
-        [
-            "Quem espera sempre alcan\u00e7a.",
-            "ANTONIO_DE_AMORIM_COSTA",
-            null,
-            "brpe"
-        ],
-        [
-            "Cada macaco no seu galho.",
-            "ALCIDES_DE_LIMA",
-            null,
-            "brmg"
-        ],
-        [
-            "Em terra de cego, quem tem um olho \u00e9 rei.",
-            "ALUISIO_SOARES_DE_SOUSA",
-            null,
-            "brrj"
-        ],
-        [
-            "A ocasi\u00e3o faz o ladr\u00e3o.",
-            "FRANCISCO_JOSE_MOREIRA_MOTA",
-            null,
-            "brce"
-        ],
-        [
-            "De gr\u00e3o em gr\u00e3o, a galinha enche o papo.",
-            "EVALDO_ANDRADA_CORREA",
-            null,
-            "brrs"
-        ],
-        [
-            "Mais vale um p\u00c1ssaro na m\u00e3o do que dois voando.",
-            "DORIS_ALEXANDER",
-            null,
-            "bralemanha"
-        ],
-        [
-            "Quem n\u00e3o arrisca, n\u00e3o petisca.",
-            "DONALDO_LUIZ_DE_ALMEIDA",
-            null,
-            "brgo"
-        ],
-        [
-            "A uni\u00e3o faz a for\u00e7a.",
-            "GERONCIO_HENRIQUE_NETO",
-            null,
-            "bral"
-        ],
-        [
-            "Em boca fechada n\u00e3o entra mosquito.",
-            "MALU_NATEL_FREIRE_WEBER",
-            null,
-            "brpr"
-        ]
-    ],
-    "eval_split_max_size": 256,
-    "eval_split_size": 0.01,
-    "use_speaker_weighted_sampler": false,
-    "speaker_weighted_sampler_alpha": 1.0,
-    "use_language_weighted_sampler": false,
-    "language_weighted_sampler_alpha": 1.0,
-    "use_length_weighted_sampler": false,
-    "length_weighted_sampler_alpha": 1.0,
-    "model_args": {
-        "num_chars": 266,
-        "out_channels": 513,
-        "spec_segment_size": 62,
-        "hidden_channels": 192,
-        "use_adaptive_weight_text_encoder": false,
-        "use_perfect_class_batch_sampler": true,
-        "perfect_class_batch_sampler_key": "language",
-        "hidden_channels_ffn_text_encoder": 768,
-        "num_heads_text_encoder": 2,
-        "num_layers_text_encoder": 10,
-        "kernel_size_text_encoder": 3,
-        "dropout_p_text_encoder": 0.1,
-        "dropout_p_duration_predictor": 0.5,
-        "kernel_size_posterior_encoder": 5,
-        "dilation_rate_posterior_encoder": 1,
-        "num_layers_posterior_encoder": 16,
-        "kernel_size_flow": 5,
-        "dilation_rate_flow": 1,
-        "num_layers_flow": 4,
-        "resblock_type_decoder": "2",
-        "resblock_kernel_sizes_decoder": [
-            3,
-            7,
-            11
-        ],
-        "resblock_dilation_sizes_decoder": [
-            [
-                1,
-                3,
-                5
-            ],
-            [
-                1,
-                3,
-                5
-            ],
-            [
-                1,
-                3,
-                5
-            ]
-        ],
-        "upsample_rates_decoder": [
-            8,
-            8,
-            2,
-            2
-        ],
-        "upsample_initial_channel_decoder": 512,
-        "upsample_kernel_sizes_decoder": [
-            16,
-            16,
-            4,
-            4
-        ],
-        "periods_multi_period_discriminator": [
-            2,
-            3,
-            5,
-            7,
-            11
-        ],
-        "use_sdp": true,
-        "noise_scale": 1.0,
-        "inference_noise_scale": 0.667,
-        "length_scale": 1,
-        "noise_scale_dp": 1.0,
-        "inference_noise_scale_dp": 1.0,
-        "max_inference_len": null,
-        "init_discriminator": true,
-        "use_spectral_norm_disriminator": false,
-        "use_speaker_embedding": false,
-        "num_speakers": 0,
-        "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/speakers.pth",
-        "d_vector_file": [
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brportugal.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brsp.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpe.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brmg.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrj.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brce.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrs.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bralemanha.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brgo.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bral.pth",
-            "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpr.pth"
-        ],
-        "speaker_embedding_channels": 256,
-        "use_d_vector_file": true,
-        "d_vector_dim": 512,
-        "detach_dp_input": true,
-        "use_language_embedding": true,
-        "embedded_language_dim": 4,
-        "num_languages": 0,
-        "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/language_ids.json",
-        "use_speaker_encoder_as_loss": false,
-        "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
-        "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
-        "condition_dp_on_speaker": true,
-        "freeze_encoder": false,
-        "freeze_DP": false,
-        "freeze_PE": false,
-        "freeze_flow_decoder": false,
-        "freeze_waveform_decoder": false,
-        "encoder_sample_rate": null,
-        "interpolate_z": true,
-        "reinit_DP": false,
-        "reinit_text_encoder": false
-    },
-    "lr_gen": 0.0002,
-    "lr_disc": 0.0002,
-    "lr_scheduler_gen": "ExponentialLR",
-    "lr_scheduler_gen_params": {
-        "gamma": 0.999875,
-        "last_epoch": -1
-    },
-    "lr_scheduler_disc": "ExponentialLR",
-    "lr_scheduler_disc_params": {
-        "gamma": 0.999875,
-        "last_epoch": -1
-    },
-    "kl_loss_alpha": 1.0,
-    "disc_loss_alpha": 1.0,
-    "gen_loss_alpha": 1.0,
-    "feat_loss_alpha": 1.0,
-    "mel_loss_alpha": 45.0,
-    "dur_loss_alpha": 1.0,
-    "speaker_encoder_loss_alpha": 9.0,
-    "return_wav": true,
-    "use_weighted_sampler": true,
-    "weighted_sampler_attrs": {
-        "language": 1.0
-    },
-    "weighted_sampler_multipliers": {},
-    "r": 1,
-    "num_speakers": 0,
-    "use_speaker_embedding": false,
-    "speakers_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/speakers.pth",
-    "speaker_embedding_channels": 256,
-    "language_ids_file": "/raid/datasets/MUPE/Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/language_ids.json",
-    "use_language_embedding": true,
-    "use_d_vector_file": true,
-    "d_vector_file": [
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpb.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brba.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brportugal.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brsp.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpe.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brmg.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrj.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brce.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brrs.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bralemanha.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brgo.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_bral.pth",
-        "/raid/datasets/MUPE/dataset/mupe/H_ASP_speaker_embeddings_brpr.pth"
-    ],
-    "d_vector_dim": 512
-}

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/language_ids.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-    "bral": 0,
-    "bralemanha": 1,
-    "brba": 2,
-    "brce": 3,
-    "brgo": 4,
-    "brmg": 5,
-    "brpb": 6,
-    "brpe": 7,
-    "brportugal": 8,
-    "brpr": 9,
-    "brrj": 10,
-    "brrs": 11,
-    "brsp": 12
-}

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/speakers.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d0b8d8013199105bfba41bbef0ac6c7fc44ecb3385a39980da80931496c039bf
-size 3296

Experiments/runs/YourTTS-Baseline-PT-January-27-2024_12+05PM-165973116/trainer_0_log.txt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5ddf81cb4061c7e47bd824c3ebb109cc02bc31ab79ee21e4e69d60d32aca454b
-size 1794644

Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/{checkpoint_185000.pth → checkpoint_195000.pth} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00fabae247abd9845b02ea35b314b4aab9714d3a2a63948b160c115008dc96da
 size 1044066458

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c552bdeff67502deab77d3f587269e090fac00dc991bcfba8dedfa21594d471
 size 1044066458

Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/trainer_0_log.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:433a8e8d660ca8de05748c6b575c29657381d5c2c504b073249e9f2cb833c25f
-size 3244264

 version https://git-lfs.github.com/spec/v1
+oid sha256:327601981f984533599c289f977acc81f9d7479999f14235302e6ad1a171d710
+size 3401880

Experiments/train_syntacc_baseline.py CHANGED Viewed

@@ -28,7 +28,7 @@ RUN_NAME = "YourTTS-Baseline-PT"
 OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT-January-25-2024_02+59PM-0000000/checkpoint_85000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False

 OUT_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "runs")  # "/raid/coqui/Checkpoints/original-YourTTS/"
 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
+RESTORE_PATH = "/raid/datasets/MUPE/Experiments/runs/YourTTS-Syntacc-PT_continue-January-28-2024_02+26PM-8a499b88c/checkpoint_195000.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False