xtts

Running on Zero

App Files Files Community

hasanbasbunar commited on 12 days ago

Commit

c3afa70

verified ·

1 Parent(s): 07d3ed0

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -20

app.py CHANGED Viewed

@@ -1,6 +1,17 @@
 import sys
 import io, os, stat
 import subprocess
 import random
 from zipfile import ZipFile
 import uuid
@@ -8,9 +19,12 @@ import time
 import torch
 import torchaudio
 #download for mecab
-os.system('python -m unidic download')
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
@@ -30,8 +44,9 @@ from pydub import AudioSegment
 from TTS.api import TTS
 from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
 from TTS.utils.generic_utils import get_user_data_dir
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -61,14 +76,18 @@ config = XttsConfig()
 config.load_json(os.path.join(model_path, "config.json"))
 model = Xtts.init_from_config(config)
 model.load_checkpoint(
     config,
     checkpoint_path=os.path.join(model_path, "model.pth"),
     vocab_path=os.path.join(model_path, "vocab.json"),
     eval=True,
-    use_deepspeed=True,
 )
-model.cuda()
 # This is for debugging purposes only
 DEVICE_ASSERT_DETECTED = 0
@@ -77,6 +96,8 @@ DEVICE_ASSERT_LANG = None
 supported_languages = config.languages
 def predict(
     prompt,
     language,
@@ -87,6 +108,10 @@ def predict(
     no_lang_auto_detect,
     agree,
 ):
     if agree == True:
         if language not in supported_languages:
             gr.Warning(
@@ -389,11 +414,13 @@ def predict(
                     None,
                     None,
                     None,
                 )
         return (
-            gr.make_waveform(
-                audio="output.wav",
-            ),
             "output.wav",
             metrics_text,
             speaker_wav,
@@ -428,10 +455,10 @@ links = """
 |                                 |                                         |
 | ------------------------------- | --------------------------------------- |
-| 🐸💬 **CoquiTTS**                | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
-| 💼 **Documentation**            | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
-| 👩‍💻 **Questions**                | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
-| 🗯 **Community**         | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv)  |
 """
@@ -456,7 +483,7 @@ examples = [
     [
         "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
         "fr",
-        "examples/male.wav",
         None,
         False,
         False,
@@ -476,7 +503,7 @@ examples = [
     [
         "Cuando tenía seis años, vi una vez una imagen magnífica",
         "es",
-        "examples/male.wav",
         None,
         False,
         False,
@@ -496,7 +523,7 @@ examples = [
     [
         "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
         "pl",
-        "examples/male.wav",
         None,
         False,
         False,
@@ -536,7 +563,7 @@ examples = [
     [
         "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
         "nl",
-        "examples/male.wav",
         None,
         False,
         False,
@@ -586,7 +613,7 @@ examples = [
         [
         "Egyszer hat éves koromban láttam egy csodálatos képet",
         "hu",
-        "examples/male.wav",
         None,
         False,
         True,
@@ -649,14 +676,14 @@ with gr.Blocks(analytics_enabled=False) as demo:
             )
             ref_gr = gr.Audio(
                 label="Reference Audio",
-                info="Click on the ✎ button to upload your own target speaker audio",
                 type="filepath",
                 value="examples/female.wav",
             )
             mic_gr = gr.Audio(
-                source="microphone",
                 type="filepath",
-                info="Use your microphone to record audio",
                 label="Use Microphone for Reference",
             )
             use_mic_gr = gr.Checkbox(
@@ -700,4 +727,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
     tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
 demo.queue()
-demo.launch(debug=True, show_api=True)

 import sys
 import io, os, stat
 import subprocess
+try:
+    import pandas
+    if int(pandas.__version__.split('.')[0]) < 2:
+        print("Upgrading pandas for Gradio 6 compatibility...")
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas>=2.0", "numpy<2.0"])
+        print("Pandas upgraded. Restarting script...")
+        os.execv(sys.executable, ['python'] + sys.argv)
+except Exception as e:
+    print(f"Pandas update check failed: {e}")
 import random
 from zipfile import ZipFile
 import uuid
 import torch
 import torchaudio
+# --- AJOUT ZERO GPU ---
+import spaces
+# ----------------------
 #download for mecab
+# os.system('python -m unidic download')
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
 from TTS.api import TTS
 from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts, XttsAudioConfig, XttsArgs
 from TTS.utils.generic_utils import get_user_data_dir
+from TTS.config.shared_configs import BaseDatasetConfig
 HF_TOKEN = os.environ.get("HF_TOKEN")
 config.load_json(os.path.join(model_path, "config.json"))
 model = Xtts.init_from_config(config)
+torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs])
+# --- MODIFICATION CRITIQUE : Desactiver DeepSpeed et retirer model.cuda() global ---
 model.load_checkpoint(
     config,
     checkpoint_path=os.path.join(model_path, "model.pth"),
     vocab_path=os.path.join(model_path, "vocab.json"),
     eval=True,
+    use_deepspeed=False, # DeepSpeed crash sur CPU-init de ZeroGPU, on le désactive.
 )
+# model.cuda() # SUPPRIMÉ : Ne pas charger sur GPU au démarrage global
+# -----------------------------------------------------------------------------------
 # This is for debugging purposes only
 DEVICE_ASSERT_DETECTED = 0
 supported_languages = config.languages
+# --- AJOUT DU DECORATEUR ZERO GPU ---
+@spaces.GPU
 def predict(
     prompt,
     language,
     no_lang_auto_detect,
     agree,
 ):
+    # --- CHARGEMENT DYNAMIQUE GPU ---
+    model.cuda()
+    # --------------------------------
     if agree == True:
         if language not in supported_languages:
             gr.Warning(
                     None,
                     None,
                     None,
+                    None,
                 )
         return (
+            None,
+            # gr.make_waveform(
+            #     audio="output.wav",
+            # ),
             "output.wav",
             metrics_text,
             speaker_wav,
 |                                 |                                         |
 | ------------------------------- | --------------------------------------- |
+| 🐸💬 **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
+| 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
+| 👩‍💻 **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
+| 🗯 **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv)  |
 """
     [
         "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
         "fr",
+        "examples/male_fixed.wav",
         None,
         False,
         False,
     [
         "Cuando tenía seis años, vi una vez una imagen magnífica",
         "es",
+        "examples/male_fixed.wav",
         None,
         False,
         False,
     [
         "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
         "pl",
+        "examples/male_fixed.wav",
         None,
         False,
         False,
     [
         "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
         "nl",
+        "examples/male_fixed.wav",
         None,
         False,
         False,
         [
         "Egyszer hat éves koromban láttam egy csodálatos képet",
         "hu",
+        "examples/male_fixed.wav",
         None,
         False,
         True,
             )
             ref_gr = gr.Audio(
                 label="Reference Audio",
+                # info="Click on the ✎ button to upload your own target speaker audio",
                 type="filepath",
                 value="examples/female.wav",
             )
             mic_gr = gr.Audio(
+                sources=["microphone"],
                 type="filepath",
+                # info="Use your microphone to record audio",
                 label="Use Microphone for Reference",
             )
             use_mic_gr = gr.Checkbox(
     tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
 demo.queue()
+demo.launch(
+    debug=True,
+    # show_api=True
+)