asbgig commited on
Commit
9aaaf3c
·
verified ·
1 Parent(s): 256aa14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -22
app.py CHANGED
@@ -1,6 +1,6 @@
1
- # app.py — TalkClone (HF Space, 1-column, custom styles, CPU-friendly)
2
 
3
- import os, re, tempfile
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
@@ -10,11 +10,25 @@ os.environ.setdefault("COQUI_TOS_AGREED", "1")
10
 
11
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
 
13
- # Show labels, send codes
14
  LANGS = [
15
- ("English","en"), ("Urdu","ur"), ("Hindi","hi"), ("Arabic","ar"),
16
- ("French","fr"), ("German","de"), ("Spanish","es"), ("Italian","it"),
17
- ("Portuguese","pt"), ("Turkish","tr"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ]
19
  LANG_LABELS = [name for name, _ in LANGS]
20
  LANG_MAP = {name: code for name, code in LANGS}
@@ -27,7 +41,6 @@ def get_tts():
27
  return _tts
28
  try:
29
  import torch
30
- # Use all available CPU threads on Basic (usually 2 vCPU)
31
  try:
32
  torch.set_num_threads(max(1, min(4, os.cpu_count() or 2)))
33
  except Exception:
@@ -35,6 +48,7 @@ def get_tts():
35
  use_gpu = torch.cuda.is_available()
36
  except Exception:
37
  use_gpu = False
 
38
  from TTS.api import TTS
39
  try:
40
  _tts = TTS(MODEL_NAME, gpu=use_gpu)
@@ -60,28 +74,29 @@ def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.P
60
  if not text:
61
  raise gr.Error("Please enter some text.")
62
 
63
- # Limit extremely long jobs on CPU Basic
64
  if len(text) > 1400 and not split_sentences:
65
  raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.")
66
 
67
  lang = LANG_MAP.get(lang_label, "en")
68
  wav_path = ref_audio
69
 
70
- # Sentence split + also break very long sentences into ~180–220 chars
71
  chunks = [text]
72
  if split_sentences:
73
- rough = [s.strip() for s in re.split(r'(?<=[.!?؟۔])\s+', text) if s.strip()]
74
  chunks = []
75
  for s in rough:
76
  if len(s) <= 220:
77
  chunks.append(s)
78
  else:
79
- # soft wrap long lines
80
  for i in range(0, len(s), 200):
81
  chunks.append(s[i:i+200])
82
 
83
  tts = get_tts()
84
  out_wavs = []
 
 
85
  with tempfile.TemporaryDirectory() as td:
86
  total = max(len(chunks), 1)
87
  for i, chunk in enumerate(chunks, 1):
@@ -91,16 +106,19 @@ def tts_clone(text, ref_audio, lang_label, speed, split_sentences, progress=gr.P
91
  data, sr = sf.read(part_path)
92
  out_wavs.append((data, sr))
93
 
94
- # Concatenate
95
- if len(out_wavs) == 1:
96
- final_data, sr = out_wavs[0]
97
- else:
98
- sr = out_wavs[0][1]
99
- final_data = np.concatenate([d for d, _ in out_wavs], axis=0)
100
 
101
- final_path = os.path.join(td, "output.wav")
102
- sf.write(final_path, final_data, sr)
103
- return final_path
 
 
 
104
 
105
  # ==== Styles (1 column + colors + hide HF/Gradio UI chrome) ====
106
  CUSTOM_CSS = """
@@ -113,6 +131,9 @@ CUSTOM_CSS = """
113
  padding: 14px !important;
114
  }
115
 
 
 
 
116
  /* Primary button color */
117
  #gen button, #gen { background: #10b981 !important; color: #fff !important; }
118
  #gen button:hover { filter: brightness(0.95); }
@@ -132,10 +153,10 @@ with gr.Blocks(
132
  with gr.Column(elem_id="wrap"):
133
  gr.Markdown("## TalkClone — Text-to-Speech with Voice Cloning")
134
  gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. "
135
- "On CPU Basic, keep text short or enable **Auto split** for speed.")
136
 
137
  ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref")
138
- language = gr.Dropdown(choices=LANG_LABELS, value="English", label="Language", elem_id="lang")
139
  text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt")
140
  speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd")
141
  split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split")
 
1
+ # app.py — TalkClone (HF Space, 1-column, persistent output, CPU-friendly)
2
 
3
+ import os, re, tempfile, shutil
4
  import numpy as np
5
  import soundfile as sf
6
  import gradio as gr
 
10
 
11
  MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
 
13
+ # Show labels, send codes (XTTS v2 supported only)
14
  LANGS = [
15
+ ("English", "en"),
16
+ ("Spanish", "es"),
17
+ ("French", "fr"),
18
+ ("German", "de"),
19
+ ("Italian", "it"),
20
+ ("Portuguese", "pt"),
21
+ ("Polish", "pl"),
22
+ ("Turkish", "tr"),
23
+ ("Russian", "ru"),
24
+ ("Dutch", "nl"),
25
+ ("Czech", "cs"),
26
+ ("Arabic", "ar"),
27
+ ("Chinese (Simplified)", "zh-cn"),
28
+ ("Hungarian", "hu"),
29
+ ("Korean", "ko"),
30
+ ("Japanese","ja"),
31
+ ("Hindi", "hi"),
32
  ]
33
  LANG_LABELS = [name for name, _ in LANGS]
34
  LANG_MAP = {name: code for name, code in LANGS}
 
41
  return _tts
42
  try:
43
  import torch
 
44
  try:
45
  torch.set_num_threads(max(1, min(4, os.cpu_count() or 2)))
46
  except Exception:
 
48
  use_gpu = torch.cuda.is_available()
49
  except Exception:
50
  use_gpu = False
51
+
52
  from TTS.api import TTS
53
  try:
54
  _tts = TTS(MODEL_NAME, gpu=use_gpu)
 
74
  if not text:
75
  raise gr.Error("Please enter some text.")
76
 
77
+ # Limit extremely long jobs on free CPU
78
  if len(text) > 1400 and not split_sentences:
79
  raise gr.Error("Text is very long. Enable 'Auto split' or paste a shorter chunk on CPU.")
80
 
81
  lang = LANG_MAP.get(lang_label, "en")
82
  wav_path = ref_audio
83
 
84
+ # Sentence split + also break very long sentences into ~200 chars
85
  chunks = [text]
86
  if split_sentences:
87
+ rough = [s.strip() for s in re.split(r'(?<=[.!?؟。.。،،]|[\u0964\u0965])\s+', text) if s.strip()]
88
  chunks = []
89
  for s in rough:
90
  if len(s) <= 220:
91
  chunks.append(s)
92
  else:
 
93
  for i in range(0, len(s), 200):
94
  chunks.append(s[i:i+200])
95
 
96
  tts = get_tts()
97
  out_wavs = []
98
+
99
+ # Use a temp dir for parts, but write the FINAL file to a persistent temp path
100
  with tempfile.TemporaryDirectory() as td:
101
  total = max(len(chunks), 1)
102
  for i, chunk in enumerate(chunks, 1):
 
106
  data, sr = sf.read(part_path)
107
  out_wavs.append((data, sr))
108
 
109
+ # Concatenate and save to a persistent temp file that survives function return
110
+ if len(out_wavs) == 1:
111
+ final_data, sr = out_wavs[0]
112
+ else:
113
+ sr = out_wavs[0][1]
114
+ final_data = np.concatenate([d for d, _ in out_wavs], axis=0)
115
 
116
+ persistent_tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
117
+ persistent_tmp_path = persistent_tmp.name
118
+ persistent_tmp.close() # path remains; we write to it next
119
+ sf.write(persistent_tmp_path, final_data, sr)
120
+
121
+ return persistent_tmp_path
122
 
123
  # ==== Styles (1 column + colors + hide HF/Gradio UI chrome) ====
124
  CUSTOM_CSS = """
 
131
  padding: 14px !important;
132
  }
133
 
134
+ /* Make the component surfaces non-white */
135
+ #ref, #out_audio, #dl { background: #eef2ff !important; } /* indigo-50-ish */
136
+
137
  /* Primary button color */
138
  #gen button, #gen { background: #10b981 !important; color: #fff !important; }
139
  #gen button:hover { filter: brightness(0.95); }
 
153
  with gr.Column(elem_id="wrap"):
154
  gr.Markdown("## TalkClone — Text-to-Speech with Voice Cloning")
155
  gr.Markdown("Upload a short **reference voice** (10–60s), choose **language**, enter **text**, then **Generate**. "
156
+ "On free CPU, keep text short or enable **Auto split** for speed.")
157
 
158
  ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath", elem_id="ref")
159
+ language = gr.Dropdown(choices=[name for name, _ in LANGS], value="English", label="Language", elem_id="lang")
160
  text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…", elem_id="txt")
161
  speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed", elem_id="spd")
162
  split = gr.Checkbox(value=True, label="Auto split long text by sentence", elem_id="split")