Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,17 @@
|
|
| 1 |
import sys
|
| 2 |
import io, os, stat
|
| 3 |
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import random
|
| 5 |
from zipfile import ZipFile
|
| 6 |
import uuid
|
|
@@ -8,9 +19,12 @@ import time
|
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
#download for mecab
|
| 13 |
-
os.system('python -m unidic download')
|
| 14 |
|
| 15 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 16 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
@@ -30,8 +44,9 @@ from pydub import AudioSegment
|
|
| 30 |
|
| 31 |
from TTS.api import TTS
|
| 32 |
from TTS.tts.configs.xtts_config import XttsConfig
|
| 33 |
-
from TTS.tts.models.xtts import Xtts
|
| 34 |
from TTS.utils.generic_utils import get_user_data_dir
|
|
|
|
| 35 |
|
| 36 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 37 |
|
|
@@ -61,14 +76,18 @@ config = XttsConfig()
|
|
| 61 |
config.load_json(os.path.join(model_path, "config.json"))
|
| 62 |
|
| 63 |
model = Xtts.init_from_config(config)
|
|
|
|
|
|
|
|
|
|
| 64 |
model.load_checkpoint(
|
| 65 |
config,
|
| 66 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
| 67 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
| 68 |
eval=True,
|
| 69 |
-
use_deepspeed=
|
| 70 |
)
|
| 71 |
-
model.cuda()
|
|
|
|
| 72 |
|
| 73 |
# This is for debugging purposes only
|
| 74 |
DEVICE_ASSERT_DETECTED = 0
|
|
@@ -77,6 +96,8 @@ DEVICE_ASSERT_LANG = None
|
|
| 77 |
|
| 78 |
supported_languages = config.languages
|
| 79 |
|
|
|
|
|
|
|
| 80 |
def predict(
|
| 81 |
prompt,
|
| 82 |
language,
|
|
@@ -87,6 +108,10 @@ def predict(
|
|
| 87 |
no_lang_auto_detect,
|
| 88 |
agree,
|
| 89 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
if agree == True:
|
| 91 |
if language not in supported_languages:
|
| 92 |
gr.Warning(
|
|
@@ -389,11 +414,13 @@ def predict(
|
|
| 389 |
None,
|
| 390 |
None,
|
| 391 |
None,
|
|
|
|
| 392 |
)
|
| 393 |
return (
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
|
|
|
| 397 |
"output.wav",
|
| 398 |
metrics_text,
|
| 399 |
speaker_wav,
|
|
@@ -428,10 +455,10 @@ links = """
|
|
| 428 |
|
| 429 |
| | |
|
| 430 |
| ------------------------------- | --------------------------------------- |
|
| 431 |
-
| 🐸💬 **CoquiTTS**
|
| 432 |
-
| 💼 **Documentation**
|
| 433 |
-
| 👩💻 **Questions**
|
| 434 |
-
| 🗯 **Community**
|
| 435 |
|
| 436 |
|
| 437 |
"""
|
|
@@ -456,7 +483,7 @@ examples = [
|
|
| 456 |
[
|
| 457 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 458 |
"fr",
|
| 459 |
-
"examples/
|
| 460 |
None,
|
| 461 |
False,
|
| 462 |
False,
|
|
@@ -476,7 +503,7 @@ examples = [
|
|
| 476 |
[
|
| 477 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 478 |
"es",
|
| 479 |
-
"examples/
|
| 480 |
None,
|
| 481 |
False,
|
| 482 |
False,
|
|
@@ -496,7 +523,7 @@ examples = [
|
|
| 496 |
[
|
| 497 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 498 |
"pl",
|
| 499 |
-
"examples/
|
| 500 |
None,
|
| 501 |
False,
|
| 502 |
False,
|
|
@@ -536,7 +563,7 @@ examples = [
|
|
| 536 |
[
|
| 537 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 538 |
"nl",
|
| 539 |
-
"examples/
|
| 540 |
None,
|
| 541 |
False,
|
| 542 |
False,
|
|
@@ -586,7 +613,7 @@ examples = [
|
|
| 586 |
[
|
| 587 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 588 |
"hu",
|
| 589 |
-
"examples/
|
| 590 |
None,
|
| 591 |
False,
|
| 592 |
True,
|
|
@@ -649,14 +676,14 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 649 |
)
|
| 650 |
ref_gr = gr.Audio(
|
| 651 |
label="Reference Audio",
|
| 652 |
-
info="Click on the ✎ button to upload your own target speaker audio",
|
| 653 |
type="filepath",
|
| 654 |
value="examples/female.wav",
|
| 655 |
)
|
| 656 |
mic_gr = gr.Audio(
|
| 657 |
-
|
| 658 |
type="filepath",
|
| 659 |
-
info="Use your microphone to record audio",
|
| 660 |
label="Use Microphone for Reference",
|
| 661 |
)
|
| 662 |
use_mic_gr = gr.Checkbox(
|
|
@@ -700,4 +727,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 700 |
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
|
| 701 |
|
| 702 |
demo.queue()
|
| 703 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import io, os, stat
|
| 3 |
import subprocess
|
| 4 |
+
|
| 5 |
+
try:
|
| 6 |
+
import pandas
|
| 7 |
+
if int(pandas.__version__.split('.')[0]) < 2:
|
| 8 |
+
print("Upgrading pandas for Gradio 6 compatibility...")
|
| 9 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas>=2.0", "numpy<2.0"])
|
| 10 |
+
print("Pandas upgraded. Restarting script...")
|
| 11 |
+
os.execv(sys.executable, ['python'] + sys.argv)
|
| 12 |
+
except Exception as e:
|
| 13 |
+
print(f"Pandas update check failed: {e}")
|
| 14 |
+
|
| 15 |
import random
|
| 16 |
from zipfile import ZipFile
|
| 17 |
import uuid
|
|
|
|
| 19 |
import torch
|
| 20 |
import torchaudio
|
| 21 |
|
| 22 |
+
# --- AJOUT ZERO GPU ---
|
| 23 |
+
import spaces
|
| 24 |
+
# ----------------------
|
| 25 |
|
| 26 |
#download for mecab
|
| 27 |
+
# os.system('python -m unidic download')
|
| 28 |
|
| 29 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 30 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
|
| 44 |
|
| 45 |
from TTS.api import TTS
|
| 46 |
from TTS.tts.configs.xtts_config import XttsConfig
|
| 47 |
+
from TTS.tts.models.xtts import Xtts, XttsAudioConfig, XttsArgs
|
| 48 |
from TTS.utils.generic_utils import get_user_data_dir
|
| 49 |
+
from TTS.config.shared_configs import BaseDatasetConfig
|
| 50 |
|
| 51 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 52 |
|
|
|
|
| 76 |
config.load_json(os.path.join(model_path, "config.json"))
|
| 77 |
|
| 78 |
model = Xtts.init_from_config(config)
|
| 79 |
+
torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs])
|
| 80 |
+
|
| 81 |
+
# --- MODIFICATION CRITIQUE : Desactiver DeepSpeed et retirer model.cuda() global ---
|
| 82 |
model.load_checkpoint(
|
| 83 |
config,
|
| 84 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
| 85 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
| 86 |
eval=True,
|
| 87 |
+
use_deepspeed=False, # DeepSpeed crash sur CPU-init de ZeroGPU, on le désactive.
|
| 88 |
)
|
| 89 |
+
# model.cuda() # SUPPRIMÉ : Ne pas charger sur GPU au démarrage global
|
| 90 |
+
# -----------------------------------------------------------------------------------
|
| 91 |
|
| 92 |
# This is for debugging purposes only
|
| 93 |
DEVICE_ASSERT_DETECTED = 0
|
|
|
|
| 96 |
|
| 97 |
supported_languages = config.languages
|
| 98 |
|
| 99 |
+
# --- AJOUT DU DECORATEUR ZERO GPU ---
|
| 100 |
+
@spaces.GPU
|
| 101 |
def predict(
|
| 102 |
prompt,
|
| 103 |
language,
|
|
|
|
| 108 |
no_lang_auto_detect,
|
| 109 |
agree,
|
| 110 |
):
|
| 111 |
+
# --- CHARGEMENT DYNAMIQUE GPU ---
|
| 112 |
+
model.cuda()
|
| 113 |
+
# --------------------------------
|
| 114 |
+
|
| 115 |
if agree == True:
|
| 116 |
if language not in supported_languages:
|
| 117 |
gr.Warning(
|
|
|
|
| 414 |
None,
|
| 415 |
None,
|
| 416 |
None,
|
| 417 |
+
None,
|
| 418 |
)
|
| 419 |
return (
|
| 420 |
+
None,
|
| 421 |
+
# gr.make_waveform(
|
| 422 |
+
# audio="output.wav",
|
| 423 |
+
# ),
|
| 424 |
"output.wav",
|
| 425 |
metrics_text,
|
| 426 |
speaker_wav,
|
|
|
|
| 455 |
|
| 456 |
| | |
|
| 457 |
| ------------------------------- | --------------------------------------- |
|
| 458 |
+
| 🐸💬 **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
|
| 459 |
+
| 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
|
| 460 |
+
| 👩💻 **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
|
| 461 |
+
| 🗯 **Community** | [](https://discord.gg/5eXr5seRrv) |
|
| 462 |
|
| 463 |
|
| 464 |
"""
|
|
|
|
| 483 |
[
|
| 484 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 485 |
"fr",
|
| 486 |
+
"examples/male_fixed.wav",
|
| 487 |
None,
|
| 488 |
False,
|
| 489 |
False,
|
|
|
|
| 503 |
[
|
| 504 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 505 |
"es",
|
| 506 |
+
"examples/male_fixed.wav",
|
| 507 |
None,
|
| 508 |
False,
|
| 509 |
False,
|
|
|
|
| 523 |
[
|
| 524 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 525 |
"pl",
|
| 526 |
+
"examples/male_fixed.wav",
|
| 527 |
None,
|
| 528 |
False,
|
| 529 |
False,
|
|
|
|
| 563 |
[
|
| 564 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 565 |
"nl",
|
| 566 |
+
"examples/male_fixed.wav",
|
| 567 |
None,
|
| 568 |
False,
|
| 569 |
False,
|
|
|
|
| 613 |
[
|
| 614 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 615 |
"hu",
|
| 616 |
+
"examples/male_fixed.wav",
|
| 617 |
None,
|
| 618 |
False,
|
| 619 |
True,
|
|
|
|
| 676 |
)
|
| 677 |
ref_gr = gr.Audio(
|
| 678 |
label="Reference Audio",
|
| 679 |
+
# info="Click on the ✎ button to upload your own target speaker audio",
|
| 680 |
type="filepath",
|
| 681 |
value="examples/female.wav",
|
| 682 |
)
|
| 683 |
mic_gr = gr.Audio(
|
| 684 |
+
sources=["microphone"],
|
| 685 |
type="filepath",
|
| 686 |
+
# info="Use your microphone to record audio",
|
| 687 |
label="Use Microphone for Reference",
|
| 688 |
)
|
| 689 |
use_mic_gr = gr.Checkbox(
|
|
|
|
| 727 |
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
|
| 728 |
|
| 729 |
demo.queue()
|
| 730 |
+
demo.launch(
|
| 731 |
+
debug=True,
|
| 732 |
+
# show_api=True
|
| 733 |
+
)
|