hasanbasbunar commited on
Commit
c3afa70
·
verified ·
1 Parent(s): 07d3ed0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -20
app.py CHANGED
@@ -1,6 +1,17 @@
1
  import sys
2
  import io, os, stat
3
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
4
  import random
5
  from zipfile import ZipFile
6
  import uuid
@@ -8,9 +19,12 @@ import time
8
  import torch
9
  import torchaudio
10
 
 
 
 
11
 
12
  #download for mecab
13
- os.system('python -m unidic download')
14
 
15
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
16
  os.environ["COQUI_TOS_AGREED"] = "1"
@@ -30,8 +44,9 @@ from pydub import AudioSegment
30
 
31
  from TTS.api import TTS
32
  from TTS.tts.configs.xtts_config import XttsConfig
33
- from TTS.tts.models.xtts import Xtts
34
  from TTS.utils.generic_utils import get_user_data_dir
 
35
 
36
  HF_TOKEN = os.environ.get("HF_TOKEN")
37
 
@@ -61,14 +76,18 @@ config = XttsConfig()
61
  config.load_json(os.path.join(model_path, "config.json"))
62
 
63
  model = Xtts.init_from_config(config)
 
 
 
64
  model.load_checkpoint(
65
  config,
66
  checkpoint_path=os.path.join(model_path, "model.pth"),
67
  vocab_path=os.path.join(model_path, "vocab.json"),
68
  eval=True,
69
- use_deepspeed=True,
70
  )
71
- model.cuda()
 
72
 
73
  # This is for debugging purposes only
74
  DEVICE_ASSERT_DETECTED = 0
@@ -77,6 +96,8 @@ DEVICE_ASSERT_LANG = None
77
 
78
  supported_languages = config.languages
79
 
 
 
80
  def predict(
81
  prompt,
82
  language,
@@ -87,6 +108,10 @@ def predict(
87
  no_lang_auto_detect,
88
  agree,
89
  ):
 
 
 
 
90
  if agree == True:
91
  if language not in supported_languages:
92
  gr.Warning(
@@ -389,11 +414,13 @@ def predict(
389
  None,
390
  None,
391
  None,
 
392
  )
393
  return (
394
- gr.make_waveform(
395
- audio="output.wav",
396
- ),
 
397
  "output.wav",
398
  metrics_text,
399
  speaker_wav,
@@ -428,10 +455,10 @@ links = """
428
 
429
  | | |
430
  | ------------------------------- | --------------------------------------- |
431
- | 🐸💬 **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
432
- | 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
433
- | 👩‍💻 **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
434
- | 🗯 **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
435
 
436
 
437
  """
@@ -456,7 +483,7 @@ examples = [
456
  [
457
  "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
458
  "fr",
459
- "examples/male.wav",
460
  None,
461
  False,
462
  False,
@@ -476,7 +503,7 @@ examples = [
476
  [
477
  "Cuando tenía seis años, vi una vez una imagen magnífica",
478
  "es",
479
- "examples/male.wav",
480
  None,
481
  False,
482
  False,
@@ -496,7 +523,7 @@ examples = [
496
  [
497
  "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
498
  "pl",
499
- "examples/male.wav",
500
  None,
501
  False,
502
  False,
@@ -536,7 +563,7 @@ examples = [
536
  [
537
  "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
538
  "nl",
539
- "examples/male.wav",
540
  None,
541
  False,
542
  False,
@@ -586,7 +613,7 @@ examples = [
586
  [
587
  "Egyszer hat éves koromban láttam egy csodálatos képet",
588
  "hu",
589
- "examples/male.wav",
590
  None,
591
  False,
592
  True,
@@ -649,14 +676,14 @@ with gr.Blocks(analytics_enabled=False) as demo:
649
  )
650
  ref_gr = gr.Audio(
651
  label="Reference Audio",
652
- info="Click on the ✎ button to upload your own target speaker audio",
653
  type="filepath",
654
  value="examples/female.wav",
655
  )
656
  mic_gr = gr.Audio(
657
- source="microphone",
658
  type="filepath",
659
- info="Use your microphone to record audio",
660
  label="Use Microphone for Reference",
661
  )
662
  use_mic_gr = gr.Checkbox(
@@ -700,4 +727,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
700
  tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
701
 
702
  demo.queue()
703
- demo.launch(debug=True, show_api=True)
 
 
 
 
1
  import sys
2
  import io, os, stat
3
  import subprocess
4
+
5
+ try:
6
+ import pandas
7
+ if int(pandas.__version__.split('.')[0]) < 2:
8
+ print("Upgrading pandas for Gradio 6 compatibility...")
9
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas>=2.0", "numpy<2.0"])
10
+ print("Pandas upgraded. Restarting script...")
11
+ os.execv(sys.executable, ['python'] + sys.argv)
12
+ except Exception as e:
13
+ print(f"Pandas update check failed: {e}")
14
+
15
  import random
16
  from zipfile import ZipFile
17
  import uuid
 
19
  import torch
20
  import torchaudio
21
 
22
+ # --- AJOUT ZERO GPU ---
23
+ import spaces
24
+ # ----------------------
25
 
26
  #download for mecab
27
+ # os.system('python -m unidic download')
28
 
29
  # By using XTTS you agree to CPML license https://coqui.ai/cpml
30
  os.environ["COQUI_TOS_AGREED"] = "1"
 
44
 
45
  from TTS.api import TTS
46
  from TTS.tts.configs.xtts_config import XttsConfig
47
+ from TTS.tts.models.xtts import Xtts, XttsAudioConfig, XttsArgs
48
  from TTS.utils.generic_utils import get_user_data_dir
49
+ from TTS.config.shared_configs import BaseDatasetConfig
50
 
51
  HF_TOKEN = os.environ.get("HF_TOKEN")
52
 
 
76
  config.load_json(os.path.join(model_path, "config.json"))
77
 
78
  model = Xtts.init_from_config(config)
79
+ torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs])
80
+
81
+ # --- MODIFICATION CRITIQUE : Desactiver DeepSpeed et retirer model.cuda() global ---
82
  model.load_checkpoint(
83
  config,
84
  checkpoint_path=os.path.join(model_path, "model.pth"),
85
  vocab_path=os.path.join(model_path, "vocab.json"),
86
  eval=True,
87
+ use_deepspeed=False, # DeepSpeed crash sur CPU-init de ZeroGPU, on le désactive.
88
  )
89
+ # model.cuda() # SUPPRIMÉ : Ne pas charger sur GPU au démarrage global
90
+ # -----------------------------------------------------------------------------------
91
 
92
  # This is for debugging purposes only
93
  DEVICE_ASSERT_DETECTED = 0
 
96
 
97
  supported_languages = config.languages
98
 
99
+ # --- AJOUT DU DECORATEUR ZERO GPU ---
100
+ @spaces.GPU
101
  def predict(
102
  prompt,
103
  language,
 
108
  no_lang_auto_detect,
109
  agree,
110
  ):
111
+ # --- CHARGEMENT DYNAMIQUE GPU ---
112
+ model.cuda()
113
+ # --------------------------------
114
+
115
  if agree == True:
116
  if language not in supported_languages:
117
  gr.Warning(
 
414
  None,
415
  None,
416
  None,
417
+ None,
418
  )
419
  return (
420
+ None,
421
+ # gr.make_waveform(
422
+ # audio="output.wav",
423
+ # ),
424
  "output.wav",
425
  metrics_text,
426
  speaker_wav,
 
455
 
456
  | | |
457
  | ------------------------------- | --------------------------------------- |
458
+ | 🐸💬 **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
459
+ | 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
460
+ | 👩‍💻 **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
461
+ | 🗯 **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
462
 
463
 
464
  """
 
483
  [
484
  "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
485
  "fr",
486
+ "examples/male_fixed.wav",
487
  None,
488
  False,
489
  False,
 
503
  [
504
  "Cuando tenía seis años, vi una vez una imagen magnífica",
505
  "es",
506
+ "examples/male_fixed.wav",
507
  None,
508
  False,
509
  False,
 
523
  [
524
  "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
525
  "pl",
526
+ "examples/male_fixed.wav",
527
  None,
528
  False,
529
  False,
 
563
  [
564
  "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
565
  "nl",
566
+ "examples/male_fixed.wav",
567
  None,
568
  False,
569
  False,
 
613
  [
614
  "Egyszer hat éves koromban láttam egy csodálatos képet",
615
  "hu",
616
+ "examples/male_fixed.wav",
617
  None,
618
  False,
619
  True,
 
676
  )
677
  ref_gr = gr.Audio(
678
  label="Reference Audio",
679
+ # info="Click on the ✎ button to upload your own target speaker audio",
680
  type="filepath",
681
  value="examples/female.wav",
682
  )
683
  mic_gr = gr.Audio(
684
+ sources=["microphone"],
685
  type="filepath",
686
+ # info="Use your microphone to record audio",
687
  label="Use Microphone for Reference",
688
  )
689
  use_mic_gr = gr.Checkbox(
 
727
  tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
728
 
729
  demo.queue()
730
+ demo.launch(
731
+ debug=True,
732
+ # show_api=True
733
+ )