Spaces:

Tamazight-NLP
/

TTS

Running

App Files Files Community

ayymen commited on Nov 25, 2024

Commit

15e7b85

1 Parent(s): e707d6a

Enable voice cloning

Browse files

Files changed (1) hide show

app.py +19 -18

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import tempfile
-from TTS.utils.synthesizer import Synthesizer
 from huggingface_hub import hf_hub_download
 import torch
@@ -20,41 +20,42 @@ my_examples = [
 my_inputs = [
   gr.Textbox(lines=5, label="Input Text"),
   gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
 ]
-my_outputs = gr.Audio(type="filepath", label="Output Audio")
-def tts(text: str, split_sentences: bool = True):
-    best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
-    config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
-    # init synthesizer
-    synthesizer = Synthesizer(
-        best_model_path,
-        config_path,
-        use_cuda=CUDA
-    )
     # replace oov characters
     text = text.replace("\n", ". ")
     text = text.replace("(", ",")
     text = text.replace(")", ",")
     text = text.replace(";", ",")
-    # create audio file
-    wavs = synthesizer.tts(text, split_sentences=split_sentences)
     with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
-        synthesizer.save_wav(wavs, fp)
     return fp.name
 iface = gr.Interface(
     fn=tts,
     inputs=my_inputs,
     outputs=my_outputs,
     title=my_title,
-    description = my_description,
-    examples = my_examples,
     cache_examples=True
 )
 iface.launch()

 import gradio as gr
 import tempfile
+from TTS.api import TTS
 from huggingface_hub import hf_hub_download
 import torch
 my_inputs = [
   gr.Textbox(lines=5, label="Input Text"),
+  gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
   gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=True)
 ]
+my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
+best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth")
+config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
+api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
+# load voice conversion model
+api.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24", gpu=CUDA)
+def tts(text: str, speaker_wav: str = None, split_sentences: bool = True):
     # replace oov characters
     text = text.replace("\n", ". ")
     text = text.replace("(", ",")
     text = text.replace(")", ",")
     text = text.replace(";", ",")
     with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
+        if speaker_wav:
+            api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
+        else:
+            api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
     return fp.name
 iface = gr.Interface(
     fn=tts,
     inputs=my_inputs,
     outputs=my_outputs,
     title=my_title,
+    description=my_description,
+    examples=my_examples,
     cache_examples=True
 )
 iface.launch()