Update app.py
Browse files
app.py
CHANGED
|
@@ -10,55 +10,40 @@ os.environ.setdefault("COQUI_TOS_AGREED", "1")
|
|
| 10 |
|
| 11 |
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 12 |
|
| 13 |
-
# Lazy-load TTS so the Space starts quickly and fails less often
|
| 14 |
_tts = None
|
| 15 |
def get_tts():
|
|
|
|
| 16 |
global _tts
|
| 17 |
if _tts is not None:
|
| 18 |
return _tts
|
| 19 |
-
# Try GPU if torch+CUDA is present; otherwise fall back to CPU.
|
| 20 |
try:
|
| 21 |
import torch
|
| 22 |
use_gpu = torch.cuda.is_available()
|
| 23 |
except Exception:
|
| 24 |
use_gpu = False
|
| 25 |
-
|
| 26 |
from TTS.api import TTS
|
| 27 |
try:
|
| 28 |
-
# Some versions accept gpu=…
|
| 29 |
_tts = TTS(MODEL_NAME, gpu=use_gpu)
|
| 30 |
except TypeError:
|
| 31 |
_tts = TTS(MODEL_NAME)
|
| 32 |
return _tts
|
| 33 |
|
| 34 |
LANGS = [
|
| 35 |
-
("English", "en"),
|
| 36 |
-
("
|
| 37 |
-
("
|
| 38 |
-
("Arabic", "ar"),
|
| 39 |
-
("French", "fr"),
|
| 40 |
-
("German", "de"),
|
| 41 |
-
("Spanish", "es"),
|
| 42 |
-
("Italian", "it"),
|
| 43 |
-
("Portuguese", "pt"),
|
| 44 |
-
("Turkish", "tr"),
|
| 45 |
]
|
| 46 |
|
| 47 |
def clean_text(t: str) -> str:
|
| 48 |
return " ".join((t or "").strip().split())
|
| 49 |
|
| 50 |
def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed):
|
| 51 |
-
# XTTS variants differ on "speed" support
|
| 52 |
try:
|
| 53 |
-
tts.tts_to_file(
|
| 54 |
-
|
| 55 |
-
speaker_wav=wav_path, language=lang, speed=speed
|
| 56 |
-
)
|
| 57 |
except TypeError:
|
| 58 |
-
tts.tts_to_file(
|
| 59 |
-
|
| 60 |
-
speaker_wav=wav_path, language=lang
|
| 61 |
-
)
|
| 62 |
|
| 63 |
def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
|
| 64 |
if ref_audio is None:
|
|
@@ -70,11 +55,9 @@ def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=g
|
|
| 70 |
wav_path = ref_audio
|
| 71 |
chunks = [text]
|
| 72 |
if split_sentences:
|
| 73 |
-
# Split on sentence boundaries including Urdu/Arabic punctuation
|
| 74 |
chunks = [s.strip() for s in re.split(r'(?<=[.!?؟۔])\s+', text) if s.strip()]
|
| 75 |
|
| 76 |
tts = get_tts()
|
| 77 |
-
|
| 78 |
out_wavs = []
|
| 79 |
with tempfile.TemporaryDirectory() as td:
|
| 80 |
for i, chunk in enumerate(chunks, 1):
|
|
@@ -94,14 +77,11 @@ def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=g
|
|
| 94 |
sf.write(final_path, final_data, sr)
|
| 95 |
return final_path
|
| 96 |
|
| 97 |
-
#
|
| 98 |
HIDE_CSS = """
|
| 99 |
-
/* compact one-column center */
|
| 100 |
.gradio-container { max-width: 880px !important; margin: 0 auto; }
|
| 101 |
-
/* hide footer & badges & embed/info areas */
|
| 102 |
footer, .footer, #footer, [data-testid="block-analytics"], [data-testid="embed-info"] { display:none !important; }
|
| 103 |
a[href*="gradio.live"], a[href*="gradio.app"], a[href*="hf.space"] { display:none !important; }
|
| 104 |
-
/* hide settings button in many themes */
|
| 105 |
button[aria-label="Settings"] { display:none !important; }
|
| 106 |
"""
|
| 107 |
|
|
@@ -117,7 +97,8 @@ with gr.Blocks(
|
|
| 117 |
)
|
| 118 |
|
| 119 |
ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath")
|
| 120 |
-
|
|
|
|
| 121 |
text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…")
|
| 122 |
speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
|
| 123 |
split = gr.Checkbox(value=True, label="Auto split long text by sentence")
|
|
@@ -130,18 +111,24 @@ with gr.Blocks(
|
|
| 130 |
path = tts_clone(text, ref_audio, language, speed, split)
|
| 131 |
return path, path
|
| 132 |
|
| 133 |
-
submit.click(
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
outputs=[output, download]
|
| 137 |
-
)
|
| 138 |
|
| 139 |
if __name__ == "__main__":
|
| 140 |
-
# IMPORTANT on Spaces: bind to the port Spaces gives you
|
| 141 |
port = int(os.environ.get("PORT", "7860"))
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
|
| 12 |
|
|
|
|
| 13 |
_tts = None
|
| 14 |
def get_tts():
|
| 15 |
+
"""Lazy-load TTS; try GPU if available, else CPU."""
|
| 16 |
global _tts
|
| 17 |
if _tts is not None:
|
| 18 |
return _tts
|
|
|
|
| 19 |
try:
|
| 20 |
import torch
|
| 21 |
use_gpu = torch.cuda.is_available()
|
| 22 |
except Exception:
|
| 23 |
use_gpu = False
|
|
|
|
| 24 |
from TTS.api import TTS
|
| 25 |
try:
|
|
|
|
| 26 |
_tts = TTS(MODEL_NAME, gpu=use_gpu)
|
| 27 |
except TypeError:
|
| 28 |
_tts = TTS(MODEL_NAME)
|
| 29 |
return _tts
|
| 30 |
|
| 31 |
LANGS = [
|
| 32 |
+
("English", "en"), ("Urdu", "ur"), ("Hindi", "hi"), ("Arabic", "ar"),
|
| 33 |
+
("French", "fr"), ("German", "de"), ("Spanish", "es"), ("Italian", "it"),
|
| 34 |
+
("Portuguese", "pt"), ("Turkish", "tr"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
]
|
| 36 |
|
| 37 |
def clean_text(t: str) -> str:
|
| 38 |
return " ".join((t or "").strip().split())
|
| 39 |
|
| 40 |
def synth_to_file_safe(tts, txt, out_path, wav_path, lang, speed):
|
|
|
|
| 41 |
try:
|
| 42 |
+
tts.tts_to_file(text=txt, file_path=out_path,
|
| 43 |
+
speaker_wav=wav_path, language=lang, speed=speed)
|
|
|
|
|
|
|
| 44 |
except TypeError:
|
| 45 |
+
tts.tts_to_file(text=txt, file_path=out_path,
|
| 46 |
+
speaker_wav=wav_path, language=lang)
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def tts_clone(text, ref_audio, language_code, speed, split_sentences, progress=gr.Progress(track_tqdm=True)):
|
| 49 |
if ref_audio is None:
|
|
|
|
| 55 |
wav_path = ref_audio
|
| 56 |
chunks = [text]
|
| 57 |
if split_sentences:
|
|
|
|
| 58 |
chunks = [s.strip() for s in re.split(r'(?<=[.!?؟۔])\s+', text) if s.strip()]
|
| 59 |
|
| 60 |
tts = get_tts()
|
|
|
|
| 61 |
out_wavs = []
|
| 62 |
with tempfile.TemporaryDirectory() as td:
|
| 63 |
for i, chunk in enumerate(chunks, 1):
|
|
|
|
| 77 |
sf.write(final_path, final_data, sr)
|
| 78 |
return final_path
|
| 79 |
|
| 80 |
+
# One-column & hide footer/API/settings
|
| 81 |
HIDE_CSS = """
|
|
|
|
| 82 |
.gradio-container { max-width: 880px !important; margin: 0 auto; }
|
|
|
|
| 83 |
footer, .footer, #footer, [data-testid="block-analytics"], [data-testid="embed-info"] { display:none !important; }
|
| 84 |
a[href*="gradio.live"], a[href*="gradio.app"], a[href*="hf.space"] { display:none !important; }
|
|
|
|
| 85 |
button[aria-label="Settings"] { display:none !important; }
|
| 86 |
"""
|
| 87 |
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
ref_audio = gr.Audio(label="Reference Voice (WAV/MP3)", type="filepath")
|
| 100 |
+
# Use codes to avoid tuple issues in some Gradio builds
|
| 101 |
+
language = gr.Dropdown(choices=[code for _, code in LANGS], value="en", label="Language")
|
| 102 |
text = gr.Textbox(label="Text", lines=6, placeholder="Type or paste your text here…")
|
| 103 |
speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
|
| 104 |
split = gr.Checkbox(value=True, label="Auto split long text by sentence")
|
|
|
|
| 111 |
path = tts_clone(text, ref_audio, language, speed, split)
|
| 112 |
return path, path
|
| 113 |
|
| 114 |
+
submit.click(run_and_return,
|
| 115 |
+
inputs=[text, ref_audio, language, speed, split],
|
| 116 |
+
outputs=[output, download])
|
|
|
|
|
|
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
|
|
|
| 119 |
port = int(os.environ.get("PORT", "7860"))
|
| 120 |
+
try:
|
| 121 |
+
demo.queue().launch(
|
| 122 |
+
server_name="0.0.0.0",
|
| 123 |
+
server_port=port,
|
| 124 |
+
show_error=True,
|
| 125 |
+
show_api=False,
|
| 126 |
+
)
|
| 127 |
+
except TypeError:
|
| 128 |
+
# For very old/new Gradio where queue() signature differs
|
| 129 |
+
demo.launch(
|
| 130 |
+
server_name="0.0.0.0",
|
| 131 |
+
server_port=port,
|
| 132 |
+
show_error=True,
|
| 133 |
+
show_api=False,
|
| 134 |
+
)
|