immad84 commited on
Commit
a279c11
·
verified ·
1 Parent(s): 7a10ff5

update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -24
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  import gradio as gr
6
  from TTS.api import TTS
7
 
8
- # Patch torch.load for compatibility
9
  old_torch_load = torch.load
10
  def patched_torch_load(*args, **kwargs):
11
  kwargs["weights_only"] = False
@@ -15,13 +15,17 @@ torch.load = patched_torch_load
15
  # Accept Coqui TOS
16
  os.environ["COQUI_TOS_AGREED"] = "1"
17
 
18
- # Model name
 
 
 
 
 
 
 
19
  MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
20
  print("Loading model:", MODEL)
21
-
22
- # Detect if GPU available (Hugging Face ZeroGPU = CPU only)
23
- use_gpu = torch.cuda.is_available()
24
- tts = TTS(MODEL, gpu=use_gpu)
25
 
26
  # Supported languages
27
  LANGS = [
@@ -29,32 +33,31 @@ LANGS = [
29
  "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
30
  ]
31
 
32
- # Default speaker if no wav provided
33
- DEFAULT_SPEAKER = tts.speakers[0] if tts.speakers else None
34
- print("Default speaker:", DEFAULT_SPEAKER)
35
-
36
- def generate_audio(text, language, speaker_wav):
37
  if not text or len(text.strip()) < 2:
38
  return None
39
 
40
  out_path = tempfile.mktemp(suffix=".wav")
41
 
42
- if speaker_wav:
43
- # Use reference WAV for voice cloning
44
- tts.tts_to_file(
45
- text=text,
46
- speaker_wav=speaker_wav,
47
- language=language,
48
- file_path=out_path
49
- )
50
  else:
51
- # Use built-in default voice
 
 
 
 
 
52
  tts.tts_to_file(
53
  text=text,
54
- speaker=DEFAULT_SPEAKER,
55
  language=language,
56
  file_path=out_path
57
  )
 
 
58
 
59
  return out_path
60
 
@@ -63,10 +66,11 @@ demo = gr.Interface(
63
  inputs=[
64
  gr.Textbox(lines=3, label="Text"),
65
  gr.Dropdown(LANGS, value="en", label="Language"),
66
- gr.Audio(label="Speaker reference (optional, WAV)", type="filepath")
67
  ],
68
- outputs=gr.Audio(type="filepath", label="Generated speech"),
69
- title="XTTS-v2 (Multilingual + Voice Cloning)",
 
70
  allow_flagging="never",
71
  )
72
 
 
5
  import gradio as gr
6
  from TTS.api import TTS
7
 
8
+ # Patch torch.load for compatibility with older Coqui checkpoints
9
  old_torch_load = torch.load
10
  def patched_torch_load(*args, **kwargs):
11
  kwargs["weights_only"] = False
 
15
  # Accept Coqui TOS
16
  os.environ["COQUI_TOS_AGREED"] = "1"
17
 
18
+ # Ensure speakers folder exists
19
+ SPEAKER_DIR = "speakers"
20
+ os.makedirs(SPEAKER_DIR, exist_ok=True)
21
+
22
+ # Get device
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+
25
+ # Model
26
  MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
27
  print("Loading model:", MODEL)
28
+ tts = TTS(MODEL).to(device)
 
 
 
29
 
30
  # Supported languages
31
  LANGS = [
 
33
  "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
34
  ]
35
 
36
+ def generate_audio(text, language, speaker_file):
 
 
 
 
37
  if not text or len(text.strip()) < 2:
38
  return None
39
 
40
  out_path = tempfile.mktemp(suffix=".wav")
41
 
42
+ # Resolve speaker path (either from upload or from speakers folder)
43
+ speaker_path = None
44
+ if speaker_file:
45
+ speaker_path = speaker_file
 
 
 
 
46
  else:
47
+ # Default to first speaker file in folder if exists
48
+ files = [f for f in os.listdir(SPEAKER_DIR) if f.lower().endswith(".wav")]
49
+ if files:
50
+ speaker_path = os.path.join(SPEAKER_DIR, files[0])
51
+
52
+ if speaker_path:
53
  tts.tts_to_file(
54
  text=text,
55
+ speaker_wav=speaker_path,
56
  language=language,
57
  file_path=out_path
58
  )
59
+ else:
60
+ return None
61
 
62
  return out_path
63
 
 
66
  inputs=[
67
  gr.Textbox(lines=3, label="Text"),
68
  gr.Dropdown(LANGS, value="en", label="Language"),
69
+ gr.Audio(label="Upload speaker reference (optional)", type="filepath")
70
  ],
71
+ outputs=gr.Audio(type="filepath", label="Generated Speech"),
72
+ title="XTTS-v2 Voice Cloning",
73
+ description=f"Drop WAV files into `{SPEAKER_DIR}` folder for reusable speaker voices.",
74
  allow_flagging="never",
75
  )
76