Spaces:

StaticFace
/

TTS

Sleeping

App Files Files Community

StaticFace commited on Feb 6

Commit

4b61f47

verified ·

1 Parent(s): b7aaf3f

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -13

app.py CHANGED Viewed

@@ -1,4 +1,17 @@
 import os
 import sys
 import tempfile
 import gradio as gr
@@ -8,9 +21,6 @@ from huggingface_hub import snapshot_download
 MODEL_REPO = "KevinAHM/pocket-tts-onnx"
-os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
-os.environ.setdefault("OMP_NUM_THREADS", "2")
 repo_dir = snapshot_download(repo_id=MODEL_REPO)
 os.chdir(repo_dir)
 sys.path.insert(0, repo_dir)
@@ -19,49 +29,68 @@ from pocket_tts_onnx import PocketTTSOnnx
 tts_cache = {}
-def get_tts(temperature: float, lsd_steps: int):
-    key = (float(temperature), int(lsd_steps))
     if key not in tts_cache:
-        tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
     return tts_cache[key]
-def synthesize(ref_audio_path, text, temperature, lsd_steps):
     text = (text or "").strip()
     if not ref_audio_path:
         raise gr.Error("Upload a reference audio file.")
     if not text:
         raise gr.Error("Enter some text.")
-    tts = get_tts(temperature, lsd_steps)
     audio = tts.generate(text=text, voice=ref_audio_path)
-    sr = getattr(tts, "sample_rate", 24000)
     audio_np = np.asarray(audio)
     if audio_np.ndim > 1:
         audio_np = audio_np.squeeze()
     out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
     sf.write(out_path, audio_np, sr)
-    return out_path
 with gr.Blocks() as demo:
     gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text → get playable output audio.")
     with gr.Row():
         ref_audio = gr.Audio(label="Reference Audio", type="filepath")
         text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
     with gr.Row():
         temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
-        lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
     generate = gr.Button("Generate", variant="primary")
     out_audio = gr.Audio(label="Output Audio", type="filepath")
     generate.click(
         fn=synthesize,
-        inputs=[ref_audio, text, temperature, lsd_steps],
-        outputs=[out_audio],
         api_name="generate",
     )
 if __name__ == "__main__":
     demo.launch()

 import os
+CPU_THREADS = 16
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["OPENBLAS_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["NUMEXPR_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["ORT_INTRA_OP_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["ORT_INTER_OP_NUM_THREADS"] = "1"
 import sys
 import tempfile
 import gradio as gr
 MODEL_REPO = "KevinAHM/pocket-tts-onnx"
 repo_dir = snapshot_download(repo_id=MODEL_REPO)
 os.chdir(repo_dir)
 sys.path.insert(0, repo_dir)
 tts_cache = {}
+def get_tts(precision: str, temperature: float, lsd_steps: int):
+    key = (precision, float(temperature), int(lsd_steps))
     if key not in tts_cache:
+        tts_cache[key] = PocketTTSOnnx(
+            precision=precision,
+            temperature=float(temperature),
+            lsd_steps=int(lsd_steps),
+            device="cpu",
+        )
     return tts_cache[key]
+def synthesize(ref_audio_path, text, precision, temperature, lsd_steps):
     text = (text or "").strip()
     if not ref_audio_path:
         raise gr.Error("Upload a reference audio file.")
     if not text:
         raise gr.Error("Enter some text.")
+    tts = get_tts(precision, temperature, int(lsd_steps))
     audio = tts.generate(text=text, voice=ref_audio_path)
+    sr = getattr(tts, "SAMPLE_RATE", 24000)
     audio_np = np.asarray(audio)
     if audio_np.ndim > 1:
         audio_np = audio_np.squeeze()
     out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
     sf.write(out_path, audio_np, sr)
+    info = (
+        f"CPU_THREADS = {CPU_THREADS}\n"
+        f"precision = {precision}\n"
+        f"temperature = {tts.temperature}\n"
+        f"lsd_steps (effective) = {tts.lsd_steps}\n"
+        f"sample_rate = {sr}"
+    )
+    return out_path, info
 with gr.Blocks() as demo:
     gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text → get playable output audio.")
+    info_box = gr.Textbox(label="Runtime Info", value=f"CPU_THREADS = {CPU_THREADS}", lines=5)
     with gr.Row():
         ref_audio = gr.Audio(label="Reference Audio", type="filepath")
         text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
     with gr.Row():
+        precision = gr.Dropdown(["int8", "fp32"], value="int8", label="Precision")
         temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
+        lsd_steps = gr.Slider(1, 20, value=10, step=1, label="LSD Steps")
     generate = gr.Button("Generate", variant="primary")
     out_audio = gr.Audio(label="Output Audio", type="filepath")
     generate.click(
         fn=synthesize,
+        inputs=[ref_audio, text, precision, temperature, lsd_steps],
+        outputs=[out_audio, info_box],
         api_name="generate",
     )
 if __name__ == "__main__":
+    demo.queue(concurrency_count=1, max_size=16)
     demo.launch()