asbgig commited on
Commit
8d9fcd0
·
verified ·
1 Parent(s): 1c8e78d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -57
app.py CHANGED
@@ -1,15 +1,24 @@
1
- # app.py — TalkClone (HF Space, one-column, footer hidden, binds to $PORT)
2
 
3
  import os, re, tempfile
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
7
 
8
- # Accept Coqui license non-interactively (required on Spaces)
9
  os.environ.setdefault("COQUI_TOS_AGREED", "1")
10
 
11
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
 
 
 
 
 
 
 
 
 
 
13
  _tts = None
14
  def get_tts():
15
  """Lazy-load TTS; try GPU if available, else CPU."""
@@ -18,6 +27,11 @@ def get_tts():
18
  return _tts
19
  try:
20
  import torch
 
 
 
 
 
21
  use_gpu = torch.cuda.is_available()
22
  except Exception:
23
  use_gpu = False
@@ -28,12 +42,6 @@ def get_tts():
28
  _tts = TTS(MODEL_NAME)
29
  return _tts
30
 
31
- LANGS = [
32
- ("English", "en"), ("Urdu", "ur"), ("Hindi", "hi"), ("Arabic", "ar"),
33
- ("French", "fr"), ("German", "de"), ("Spanish", "es"), ("Italian", "it"),
34
- ("Portuguese", "pt"), ("Turkish", "tr"),
35
- ]
36
-
37
  def clean_text(t: str) -> str:
38
  return " ".join((t or "").strip().split())
39
 
@@ -45,28 +53,45 @@ def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed):
45
  tts.tts_to_file(text=txt, file_path=out_path,
46
  speaker_wav=wav_path, language=lang)
47
 
48
- def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
49
  if ref_audio is None:
50
- raise gr.Error("Please upload a reference voice sample (10–60 seconds, clean speech).")
51
  text = clean_text(text)
52
  if not text:
53
  raise gr.Error("Please enter some text.")
54
 
 
 
 
 
 
55
  wav_path = ref_audio
 
 
56
  chunks = [text]
57
  if split_sentences:
58
- chunks = [s.strip() for s in re.split(r'(?<=[.!?؟۔])\s+', text) if s.strip()]
 
 
 
 
 
 
 
 
59
 
60
  tts = get_tts()
61
  out_wavs = []
62
  with tempfile.TemporaryDirectory() as td:
 
63
  for i, chunk in enumerate(chunks, 1):
64
- progress((i-1)/max(len(chunks),1), desc=f"Synthesizing {i}/{len(chunks)}")
65
  part_path = os.path.join(td, f"part_{i}.wav")
66
- synth_to_file_safe(tts, chunk, part_path, wav_path, language_code, speed)
67
  data, sr = sf.read(part_path)
68
  out_wavs.append((data, sr))
69
 
 
70
  if len(out_wavs) == 1:
71
  final_data, sr = out_wavs[0]
72
  else:
@@ -77,58 +102,59 @@ def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=g
77
  sf.write(final_path, final_data, sr)
78
  return final_path
79
 
80
- # One-column & hide footer/API/settings
81
- HIDE_CSS = """
82
- .gradio-container { max-width: 880px !important; margin: 0 auto; }
83
- footer, .footer, #footer, [data-testid="block-analytics"], [data-testid="embed-info"] { display:none !important; }
84
- a[href*="gradio.live"], a[href*="gradio.app"], a[href*="hf.space"] { display:none !important; }
85
- button[aria-label="Settings"] { display:none !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  """
87
 
88
  with gr.Blocks(
89
  title="TalkClone - Voice Cloning & TTS",
90
- css=HIDE_CSS,
91
  analytics_enabled=False
92
  ) as demo:
93
- gr.Markdown("## TalkClone — Turn Text into Speech from a Reference Voice")
94
- gr.Markdown(
95
- "Upload a short **reference voice** (10–60s), choose **language**, enter **text**, click **Generate**.\n"
96
- "**Tip:** Long texts are split by sentence for reliability; shorter sentences synthesize faster."
97
- )
98
-
99
- ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath")
100
- # Use codes to avoid tuple issues in some Gradio builds
101
- language = gr.Dropdown(choices=[code for _, code in LANGS], value="en", label="Language")
102
- text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…")
103
- speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
104
- split = gr.Checkbox(value=True, label="Auto split long text by sentence")
105
- submit = gr.Button("Generate", variant="primary")
106
-
107
- output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False)
108
- download = gr.File(label="Download audio")
109
-
110
- def run_and_return(text, ref_audio, language, speed, split):
111
- path = tts_clone(text, ref_audio, language, speed, split)
112
- return path, path
113
-
114
- submit.click(run_and_return,
115
- inputs=[text, ref_audio, language, speed, split],
116
- outputs=[output, download])
117
 
118
  if __name__ == "__main__":
119
  port = int(os.environ.get("PORT", "7860"))
120
  try:
121
- demo.queue().launch(
122
- server_name="0.0.0.0",
123
- server_port=port,
124
- show_error=True,
125
- show_api=False,
126
- )
127
  except TypeError:
128
- # For very old/new Gradio where queue() signature differs
129
- demo.launch(
130
- server_name="0.0.0.0",
131
- server_port=port,
132
- show_error=True,
133
- show_api=False,
134
- )
 
1
+ # app.py — TalkClone (HF Space, 1-column, custom styles, CPU-friendly)
2
 
3
  import os, re, tempfile
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
7
 
8
+ # Agree to Coqui CPML non-interactively on Spaces
9
  os.environ.setdefault("COQUI_TOS_AGREED", "1")
10
 
11
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
 
13
+ # Show labels, send codes
14
+ LANGS = [
15
+ ("English","en"), ("Urdu","ur"), ("Hindi","hi"), ("Arabic","ar"),
16
+ ("French","fr"), ("German","de"), ("Spanish","es"), ("Italian","it"),
17
+ ("Portuguese","pt"), ("Turkish","tr"),
18
+ ]
19
+ LANG_LABELS = [name for name, _ in LANGS]
20
+ LANG_MAP = {name: code for name, code in LANGS}
21
+
22
  _tts = None
23
  def get_tts():
24
  """Lazy-load TTS; try GPU if available, else CPU."""
 
27
  return _tts
28
  try:
29
  import torch
30
+ # Use all available CPU threads on Basic (usually 2 vCPU)
31
+ try:
32
+ torch.set_num_threads(max(1, min(4, os.cpu_count() or 2)))
33
+ except Exception:
34
+ pass
35
  use_gpu = torch.cuda.is_available()
36
  except Exception:
37
  use_gpu = False
 
42
  _tts = TTS(MODEL_NAME)
43
  return _tts
44
 
 
 
 
 
 
 
45
  def clean_text(t: str) -> str:
46
  return " ".join((t or "").strip().split())
47
 
 
53
  tts.tts_to_file(text=txt, file_path=out_path,
54
  speaker_wav=wav_path, language=lang)
55
 
56
+ def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
57
  if ref_audio is None:
58
+ raise gr.Error("Upload a reference voice (10–60s, clean speech).")
59
  text = clean_text(text)
60
  if not text:
61
  raise gr.Error("Please enter some text.")
62
 
63
+ # Limit extremely long jobs on CPU Basic
64
+ if len(text) > 1400 and not split_sentences:
65
+ raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.")
66
+
67
+ lang = LANG_MAP.get(lang_label, "en")
68
  wav_path = ref_audio
69
+
70
+ # Sentence split + also break very long sentences into ~180–220 chars
71
  chunks = [text]
72
  if split_sentences:
73
+ rough = [s.strip() for s in re.split(r'(?<=[.!?؟۔])\s+', text) if s.strip()]
74
+ chunks = []
75
+ for s in rough:
76
+ if len(s) <= 220:
77
+ chunks.append(s)
78
+ else:
79
+ # soft wrap long lines
80
+ for i in range(0, len(s), 200):
81
+ chunks.append(s[i:i+200])
82
 
83
  tts = get_tts()
84
  out_wavs = []
85
  with tempfile.TemporaryDirectory() as td:
86
+ total = max(len(chunks), 1)
87
  for i, chunk in enumerate(chunks, 1):
88
+ progress((i-1)/total, desc=f"Synthesizing {i}/{total}")
89
  part_path = os.path.join(td, f"part_{i}.wav")
90
+ synth_to_file_safe(tts, chunk, part_path, wav_path, lang, speed)
91
  data, sr = sf.read(part_path)
92
  out_wavs.append((data, sr))
93
 
94
+ # Concatenate
95
  if len(out_wavs) == 1:
96
  final_data, sr = out_wavs[0]
97
  else:
 
102
  sf.write(final_path, final_data, sr)
103
  return final_path
104
 
105
+ # ==== Styles (1 column + colors + hide HF/Gradio UI chrome) ====
106
+ CUSTOM_CSS = """
107
+ .gradio-container { max-width: 860px !important; margin: 0 auto; }
108
+
109
+ #wrap, #ref, #lang, #txt, #spd, #split, #out_audio, #dl {
110
+ background: #f8fafc !important; /* slate-50 */
111
+ border: 1px solid #e5e7eb !important; /* gray-200 */
112
+ border-radius: 14px !important;
113
+ padding: 14px !important;
114
+ }
115
+
116
+ /* Primary button color */
117
+ #gen button, #gen { background: #10b981 !important; color: #fff !important; }
118
+ #gen button:hover { filter: brightness(0.95); }
119
+
120
+ /* Hide footer/API/Settings & obvious Space links */
121
+ footer, .footer, #footer,
122
+ a[href*="gradio.live"], a[href*="gradio.app"], a[href*="/api"], a[href*="hf.space"],
123
+ button[aria-label="Settings"],
124
+ [data-testid="block-analytics"], [data-testid="embed-info"] { display: none !important; }
125
  """
126
 
127
  with gr.Blocks(
128
  title="TalkClone - Voice Cloning & TTS",
129
+ css=CUSTOM_CSS,
130
  analytics_enabled=False
131
  ) as demo:
132
+ with gr.Column(elem_id="wrap"):
133
+ gr.Markdown("## TalkClone — Text-to-Speech with Voice Cloning")
134
+ gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. "
135
+ "On CPU Basic, keep text short or enable **Auto split** for speed.")
136
+
137
+ ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref")
138
+ language = gr.Dropdown(choices=LANG_LABELS, value="English", label="Language", elem_id="lang")
139
+ text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt")
140
+ speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd")
141
+ split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split")
142
+ submit = gr.Button("Generate", variant="primary", elem_id="gen")
143
+
144
+ output = gr.Audio(label="Cloned Speech", type="filepath", interactive=False, elem_id="out_audio")
145
+ download = gr.File(label="Download audio", elem_id="dl")
146
+
147
+ def run_and_return(text, ref_audio, language, speed, split):
148
+ p = tts_clone(text, ref_audio, language, speed, split)
149
+ return p, p
150
+
151
+ submit.click(run_and_return,
152
+ inputs=[text, ref_audio, language, speed, split],
153
+ outputs=[output, download])
 
 
154
 
155
  if __name__ == "__main__":
156
  port = int(os.environ.get("PORT", "7860"))
157
  try:
158
+ demo.queue().launch(server_name="0.0.0.0", server_port=port, show_error=True, show_api=False)
 
 
 
 
 
159
  except TypeError:
160
+ demo.launch(server_name="0.0.0.0", server_port=port, show_error=True, show_api=False)