StaticFace commited on
Commit
ffccc5e
·
verified ·
1 Parent(s): a77e673

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import tempfile
4
+ import gradio as gr
5
+ import numpy as np
6
+ import soundfile as sf
7
+ from huggingface_hub import snapshot_download
8
+
9
+ MODEL_REPO = "KevinAHM/pocket-tts-onnx"
10
+
11
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
+ os.environ.setdefault("OMP_NUM_THREADS", "2")
13
+
14
+ _repo_dir = snapshot_download(
15
+ repo_id=MODEL_REPO,
16
+ allow_patterns=[
17
+ "pocket_tts_onnx.py",
18
+ "onnx/*",
19
+ "tokenizer.model",
20
+ "text_conditioner.onnx",
21
+ "reference_sample.wav",
22
+ "requirements.txt",
23
+ ],
24
+ )
25
+ sys.path.insert(0, _repo_dir)
26
+
27
+ from pocket_tts_onnx import PocketTTSOnnx
28
+
29
+ _tts_cache = {}
30
+
31
+ def get_tts(temperature: float, lsd_steps: int):
32
+ key = (float(temperature), int(lsd_steps))
33
+ if key not in _tts_cache:
34
+ _tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
35
+ return _tts_cache[key]
36
+
37
+ def synthesize(ref_audio_path, text, temperature, lsd_steps):
38
+ text = (text or "").strip()
39
+ if not ref_audio_path:
40
+ raise gr.Error("Upload a reference audio file.")
41
+ if not text:
42
+ raise gr.Error("Enter some text.")
43
+
44
+ tts = get_tts(temperature, lsd_steps)
45
+
46
+ audio = tts.generate(text=text, voice=ref_audio_path)
47
+
48
+ sr = getattr(tts, "sample_rate", 24000)
49
+ audio_np = np.asarray(audio)
50
+
51
+ if audio_np.ndim > 1:
52
+ audio_np = audio_np.squeeze()
53
+
54
+ out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
55
+ sf.write(out_path, audio_np, sr)
56
+ return out_path
57
+
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("# Pocket TTS ONNX (Voice Cloning)\nUpload a short reference voice sample, type text, and generate audio.")
60
+ with gr.Row():
61
+ ref_audio = gr.Audio(label="Reference Audio", type="filepath")
62
+ text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
63
+ with gr.Row():
64
+ temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
65
+ lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
66
+ generate = gr.Button("Generate", variant="primary")
67
+ out_audio = gr.Audio(label="Output", type="filepath")
68
+
69
+ generate.click(
70
+ fn=synthesize,
71
+ inputs=[ref_audio, text, temperature, lsd_steps],
72
+ outputs=[out_audio],
73
+ api_name="generate",
74
+ )
75
+
76
+ if __name__ == "__main__":
77
+ demo.launch()