Spaces:

Scralius
/

StyleTTS2_French

Runtime error

App Files Files Community

Scralius commited on Oct 28, 2024

Commit

3e23daa

1 Parent(s): 381bccd

Add TTS

Browse files

Files changed (5) hide show

.gitignore +5 -0
README.md +2 -0
app.py +48 -5
inference.py +1 -0
requirements.txt +2 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+/Models/SIWIS_local
+/__pycache__
+*.pyc
+/Models/SIWIS

README.md CHANGED Viewed

@@ -8,4 +8,6 @@ sdk_version: 5.4.0
 app_file: app.py
 pinned: false
 license: mit
 ---

 app_file: app.py
 pinned: false
 license: mit
+models:
+- Scralius/StyleTTS2_SIWIS_French
 ---

app.py CHANGED Viewed

@@ -1,9 +1,52 @@
 from inference import STTS2
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 from inference import STTS2
 import gradio as gr
+from huggingface_hub import snapshot_download
+import numpy as np
+def synthesise(text, progress=gr.Progress()):
+    if text.strip() == "":
+        raise gr.Error("Please enter some text.")
+    if len(text) > 300:
+        raise gr.Error("Please enter text less than 300 characters.")
+    audio = stts2.inference(text, ref_s, alpha=0.9, beta=0.9, diffusion_steps=16, embedding_scale=1)
+    audio = (audio * 32767).astype(np.int16)
+    return 24000, audio
+if __name__ == '__main__':
+    snapshot_download(repo_id="Scralius/StyleTTS2_SIWIS_French", local_dir="Models/SIWIS")
+    stts2 = STTS2(config_path='Models/SIWIS_local/config.yml', model_folder='Models/SIWIS_local')
+    ref_s = stts2.compute_style('Models/SIWIS_local/reference_audio.wav')
+    demo = gr.Interface(
+        fn=synthesise,
+        inputs=[
+            gr.Textbox(label='Enter Text:', lines=5, max_lines=10, placeholder="Type your text here..."),
+        ],
+        outputs=[
+            gr.Audio(
+                label="Generated Audio:",
+                autoplay=False,
+                streaming=False,
+                type="numpy",
+            ),
+        ],
+        allow_flagging='never',
+        title="French StyleTTS2 demo",
+        description=(
+            "This application uses a Text-to-Speech (TTS) model trained from scratch using StyleTTS2 on the SIWIS dataset. "
+            "Enter some text in the input box below and let the model read it out loud with a natural and expressive voice. "
+            "The model is capable of generating high-quality speech."
+        ),
+        theme="origin",
+        examples=[
+            ["Voici une démonstration de synthèse vocale. Ce modèle est capable de lire du texte avec une voix naturelle. Essayez d'entrer votre propre texte pour voir comment cela fonctionne."],
+            ["La technologie de synthèse vocale a beaucoup évolué ces dernières années. Elle est maintenant utilisée dans de nombreux domaines, y compris les assistants virtuels, les livres audio et les systèmes de navigation."],
+            ["Le modèle que nous utilisons ici a été entraîné sur le dataset SIWIS, qui contient des enregistrements de voix en français. Cela permet au modèle de générer une prononciation précise et naturelle."]
+        ]
+    )
+    demo.launch()

inference.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import nltk
 nltk.download('punkt')
 import torch
 torch.manual_seed(0)
 torch.backends.cudnn.benchmark = False

 import nltk
 nltk.download('punkt')
+nltk.download('punkt_tab')
 import torch
 torch.manual_seed(0)
 torch.backends.cudnn.benchmark = False

requirements.txt CHANGED Viewed

@@ -15,4 +15,5 @@ tqdm
 typing
 typing-extensions
 git+https://github.com/resemble-ai/monotonic_align.git
-phonemizer

 typing
 typing-extensions
 git+https://github.com/resemble-ai/monotonic_align.git
+phonemizer
+cached-path