Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +1 -0
best_model.pth +3 -0
best_model_1009517.pth +3 -0
checkpoint_1111000.pth +3 -0
checkpoint_1112000.pth +3 -0
checkpoint_1113000.pth +3 -0
checkpoint_1113117.pth +3 -0
events.out.tfevents.1693935825.ip-172-16-76-92.ec2.internal.52882.0 +3 -0
train_vits.py +92 -0
trainer_0_log.txt +3 -0
trainer_1_log.txt +0 -0
trainer_2_log.txt +0 -0
trainer_3_log.txt +0 -0
trainer_4_log.txt +0 -0
trainer_5_log.txt +0 -0
trainer_6_log.txt +0 -0
trainer_7_log.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+trainer_0_log.txt filter=lfs diff=lfs merge=lfs -text

best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6d473e8956c52204d2c1dfc2b43299a6ed3f3064dba5ad39682cbbfc629e8d4
+size 997871045

best_model_1009517.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6d473e8956c52204d2c1dfc2b43299a6ed3f3064dba5ad39682cbbfc629e8d4
+size 997871045

checkpoint_1111000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e42647d7d4f6a03d61e08961e753fa5366d693c49eb35a734edb5caae8a1ddd
+size 997871109

checkpoint_1112000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae36115905bed58d939e8f79733ce590d2dce0b357e933a8c88782ea173debf5
+size 997871109

checkpoint_1113000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5b8b41c60301c234f7031bcdf80cae317859aaf6815ce7983d4248a98678c66
+size 997871109

checkpoint_1113117.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c96fda4ad9778107ba5e80c5cf3306f5a050b588ad850cb9c9b31014de70bf1f
+size 997871109

events.out.tfevents.1693935825.ip-172-16-76-92.ec2.internal.52882.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e085546e402a09aa9a8ae7d884d1cd03586e3a88a316c36ab8aa199a263c12c
+size 10044566854

train_vits.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+from trainer import Trainer, TrainerArgs
+from TTS.tts.configs.shared_configs import BaseDatasetConfig
+from TTS.tts.configs.vits_config import VitsConfig
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.models.vits import Vits, VitsAudioConfig
+from TTS.tts.utils.text.tokenizer import TTSTokenizer
+from TTS.utils.audio import AudioProcessor
+#output_path = os.path.dirname(os.path.abspath(__file__))
+##########################################
+#Change this to your dataset directory
+##########################################
+output_path = os.path.dirname(os.path.abspath(__file__))
+dataset_config = BaseDatasetConfig(
+##########################################
+#Change this to your dataset directory
+##########################################
+    formatter="ljspeech", meta_file_train="metadata.csv", path="/home/ec2-user/SageMaker/tts-sage/recipes/ljspeech/vits_tts/adam"
+)
+audio_config = VitsAudioConfig(
+    sample_rate=48000, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None
+)
+config = VitsConfig(
+    audio=audio_config,
+    run_name="tts-adam-48k",
+    batch_size=7,
+    eval_batch_size=12,
+    batch_group_size=4,
+#    num_loader_workers=8,
+    num_loader_workers=4,
+    num_eval_loader_workers=4,
+    run_eval=True,
+    test_delay_epochs=-1,
+    epochs=100000,
+    save_step=1000,
+	  save_checkpoints=True,
+	  save_n_checkpoints=4,
+	  save_best_after=1000,
+    #text_cleaner="english_cleaners",
+    text_cleaner="multilingual_cleaners",
+    use_phonemes=True,
+    phoneme_language="en-us",
+    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
+    compute_input_seq_cache=True,
+    print_step=25,
+    print_eval=True,
+    mixed_precision=True,
+    output_path=output_path,
+    datasets=[dataset_config],
+    cudnn_benchmark=False,
+)
+# INITIALIZE THE AUDIO PROCESSOR
+# Audio processor is used for feature extraction and audio I/O.
+# It mainly serves to the dataloader and the training loggers.
+ap = AudioProcessor.init_from_config(config)
+# INITIALIZE THE TOKENIZER
+# Tokenizer is used to convert text to sequences of token IDs.
+# config is updated with the default characters if not defined in the config.
+tokenizer, config = TTSTokenizer.init_from_config(config)
+# LOAD DATA SAMPLES
+# Each sample is a list of ```[text, audio_file_path, speaker_name]```
+# You can define your custom sample loader returning the list of samples.
+# Or define your custom formatter and pass it to the `load_tts_samples`.
+# Check `TTS.tts.datasets.load_tts_samples` for more details.
+train_samples, eval_samples = load_tts_samples(
+    dataset_config,
+    eval_split=True,
+    eval_split_max_size=config.eval_split_max_size,
+    eval_split_size=config.eval_split_size,
+)
+# init model
+model = Vits(config, ap, tokenizer, speaker_manager=None)
+# init the trainer and begin
+trainer = Trainer(
+    TrainerArgs(),
+    config,
+    output_path,
+    model=model,
+    train_samples=train_samples,
+    eval_samples=eval_samples,
+)
+trainer.fit()

trainer_0_log.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:865cd234a1230128eeb78c0d507eba4019732b0bbdc5360225011581c43dd60f
+size 18198855

trainer_1_log.txt ADDED Viewed

File without changes

trainer_2_log.txt ADDED Viewed

File without changes

trainer_3_log.txt ADDED Viewed

File without changes

trainer_4_log.txt ADDED Viewed

File without changes

trainer_5_log.txt ADDED Viewed

File without changes

trainer_6_log.txt ADDED Viewed

File without changes

trainer_7_log.txt ADDED Viewed

File without changes