Spaces:

StaticFace
/

TTS

Sleeping

App Files Files Community

StaticFace commited on Feb 6

Commit

29082ed

verified ·

1 Parent(s): f000001

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -19

app.py CHANGED Viewed

@@ -11,28 +11,19 @@ MODEL_REPO = "KevinAHM/pocket-tts-onnx"
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 os.environ.setdefault("OMP_NUM_THREADS", "2")
-_repo_dir = snapshot_download(
-    repo_id=MODEL_REPO,
-    allow_patterns=[
-        "pocket_tts_onnx.py",
-        "onnx/*",
-        "tokenizer.model",
-        "text_conditioner.onnx",
-        "reference_sample.wav",
-        "requirements.txt",
-    ],
-)
-sys.path.insert(0, _repo_dir)
 from pocket_tts_onnx import PocketTTSOnnx
-_tts_cache = {}
 def get_tts(temperature: float, lsd_steps: int):
     key = (float(temperature), int(lsd_steps))
-    if key not in _tts_cache:
-        _tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
-    return _tts_cache[key]
 def synthesize(ref_audio_path, text, temperature, lsd_steps):
     text = (text or "").strip()
@@ -47,7 +38,6 @@ def synthesize(ref_audio_path, text, temperature, lsd_steps):
     sr = getattr(tts, "sample_rate", 24000)
     audio_np = np.asarray(audio)
     if audio_np.ndim > 1:
         audio_np = audio_np.squeeze()
@@ -56,7 +46,7 @@ def synthesize(ref_audio_path, text, temperature, lsd_steps):
     return out_path
 with gr.Blocks() as demo:
-    gr.Markdown("# Pocket TTS ONNX (Voice Cloning)\nUpload a short reference voice sample, type text, and generate audio.")
     with gr.Row():
         ref_audio = gr.Audio(label="Reference Audio", type="filepath")
         text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
@@ -64,7 +54,7 @@ with gr.Blocks() as demo:
         temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
         lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
     generate = gr.Button("Generate", variant="primary")
-    out_audio = gr.Audio(label="Output", type="filepath")
     generate.click(
         fn=synthesize,

 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 os.environ.setdefault("OMP_NUM_THREADS", "2")
+repo_dir = snapshot_download(repo_id=MODEL_REPO)
+os.chdir(repo_dir)
+sys.path.insert(0, repo_dir)
 from pocket_tts_onnx import PocketTTSOnnx
+tts_cache = {}
 def get_tts(temperature: float, lsd_steps: int):
     key = (float(temperature), int(lsd_steps))
+    if key not in tts_cache:
+        tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
+    return tts_cache[key]
 def synthesize(ref_audio_path, text, temperature, lsd_steps):
     text = (text or "").strip()
     sr = getattr(tts, "sample_rate", 24000)
     audio_np = np.asarray(audio)
     if audio_np.ndim > 1:
         audio_np = audio_np.squeeze()
     return out_path
 with gr.Blocks() as demo:
+    gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text → get playable output audio.")
     with gr.Row():
         ref_audio = gr.Audio(label="Reference Audio", type="filepath")
         text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
         temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
         lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
     generate = gr.Button("Generate", variant="primary")
+    out_audio = gr.Audio(label="Output Audio", type="filepath")
     generate.click(
         fn=synthesize,