StaticFace commited on
Commit
4b61f47
·
verified ·
1 Parent(s): b7aaf3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -13
app.py CHANGED
@@ -1,4 +1,17 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import sys
3
  import tempfile
4
  import gradio as gr
@@ -8,9 +21,6 @@ from huggingface_hub import snapshot_download
8
 
9
  MODEL_REPO = "KevinAHM/pocket-tts-onnx"
10
 
11
- os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
- os.environ.setdefault("OMP_NUM_THREADS", "2")
13
-
14
  repo_dir = snapshot_download(repo_id=MODEL_REPO)
15
  os.chdir(repo_dir)
16
  sys.path.insert(0, repo_dir)
@@ -19,49 +29,68 @@ from pocket_tts_onnx import PocketTTSOnnx
19
 
20
  tts_cache = {}
21
 
22
- def get_tts(temperature: float, lsd_steps: int):
23
- key = (float(temperature), int(lsd_steps))
24
  if key not in tts_cache:
25
- tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
 
 
 
 
 
26
  return tts_cache[key]
27
 
28
- def synthesize(ref_audio_path, text, temperature, lsd_steps):
29
  text = (text or "").strip()
30
  if not ref_audio_path:
31
  raise gr.Error("Upload a reference audio file.")
32
  if not text:
33
  raise gr.Error("Enter some text.")
34
 
35
- tts = get_tts(temperature, lsd_steps)
36
 
37
  audio = tts.generate(text=text, voice=ref_audio_path)
38
 
39
- sr = getattr(tts, "sample_rate", 24000)
40
  audio_np = np.asarray(audio)
41
  if audio_np.ndim > 1:
42
  audio_np = audio_np.squeeze()
43
 
44
  out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
45
  sf.write(out_path, audio_np, sr)
46
- return out_path
 
 
 
 
 
 
 
 
47
 
48
  with gr.Blocks() as demo:
49
  gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text → get playable output audio.")
 
 
50
  with gr.Row():
51
  ref_audio = gr.Audio(label="Reference Audio", type="filepath")
52
  text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
 
53
  with gr.Row():
 
54
  temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
55
- lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
 
56
  generate = gr.Button("Generate", variant="primary")
57
  out_audio = gr.Audio(label="Output Audio", type="filepath")
58
 
59
  generate.click(
60
  fn=synthesize,
61
- inputs=[ref_audio, text, temperature, lsd_steps],
62
- outputs=[out_audio],
63
  api_name="generate",
64
  )
65
 
66
  if __name__ == "__main__":
 
67
  demo.launch()
 
1
  import os
2
+
3
+ CPU_THREADS = 16
4
+
5
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
6
+
7
+ os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
8
+ os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
9
+ os.environ["OPENBLAS_NUM_THREADS"] = str(CPU_THREADS)
10
+ os.environ["NUMEXPR_NUM_THREADS"] = str(CPU_THREADS)
11
+
12
+ os.environ["ORT_INTRA_OP_NUM_THREADS"] = str(CPU_THREADS)
13
+ os.environ["ORT_INTER_OP_NUM_THREADS"] = "1"
14
+
15
  import sys
16
  import tempfile
17
  import gradio as gr
 
21
 
22
  MODEL_REPO = "KevinAHM/pocket-tts-onnx"
23
 
 
 
 
24
  repo_dir = snapshot_download(repo_id=MODEL_REPO)
25
  os.chdir(repo_dir)
26
  sys.path.insert(0, repo_dir)
 
29
 
30
  tts_cache = {}
31
 
32
+ def get_tts(precision: str, temperature: float, lsd_steps: int):
33
+ key = (precision, float(temperature), int(lsd_steps))
34
  if key not in tts_cache:
35
+ tts_cache[key] = PocketTTSOnnx(
36
+ precision=precision,
37
+ temperature=float(temperature),
38
+ lsd_steps=int(lsd_steps),
39
+ device="cpu",
40
+ )
41
  return tts_cache[key]
42
 
43
+ def synthesize(ref_audio_path, text, precision, temperature, lsd_steps):
44
  text = (text or "").strip()
45
  if not ref_audio_path:
46
  raise gr.Error("Upload a reference audio file.")
47
  if not text:
48
  raise gr.Error("Enter some text.")
49
 
50
+ tts = get_tts(precision, temperature, int(lsd_steps))
51
 
52
  audio = tts.generate(text=text, voice=ref_audio_path)
53
 
54
+ sr = getattr(tts, "SAMPLE_RATE", 24000)
55
  audio_np = np.asarray(audio)
56
  if audio_np.ndim > 1:
57
  audio_np = audio_np.squeeze()
58
 
59
  out_path = os.path.join(tempfile.gettempdir(), "pocket_tts_out.wav")
60
  sf.write(out_path, audio_np, sr)
61
+
62
+ info = (
63
+ f"CPU_THREADS = {CPU_THREADS}\n"
64
+ f"precision = {precision}\n"
65
+ f"temperature = {tts.temperature}\n"
66
+ f"lsd_steps (effective) = {tts.lsd_steps}\n"
67
+ f"sample_rate = {sr}"
68
+ )
69
+ return out_path, info
70
 
71
  with gr.Blocks() as demo:
72
  gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text → get playable output audio.")
73
+ info_box = gr.Textbox(label="Runtime Info", value=f"CPU_THREADS = {CPU_THREADS}", lines=5)
74
+
75
  with gr.Row():
76
  ref_audio = gr.Audio(label="Reference Audio", type="filepath")
77
  text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
78
+
79
  with gr.Row():
80
+ precision = gr.Dropdown(["int8", "fp32"], value="int8", label="Precision")
81
  temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
82
+ lsd_steps = gr.Slider(1, 20, value=10, step=1, label="LSD Steps")
83
+
84
  generate = gr.Button("Generate", variant="primary")
85
  out_audio = gr.Audio(label="Output Audio", type="filepath")
86
 
87
  generate.click(
88
  fn=synthesize,
89
+ inputs=[ref_audio, text, precision, temperature, lsd_steps],
90
+ outputs=[out_audio, info_box],
91
  api_name="generate",
92
  )
93
 
94
  if __name__ == "__main__":
95
+ demo.queue(concurrency_count=1, max_size=16)
96
  demo.launch()