StaticFace commited on
Commit
29082ed
·
verified ·
1 Parent(s): f000001

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -19
app.py CHANGED
@@ -11,28 +11,19 @@ MODEL_REPO = "KevinAHM/pocket-tts-onnx"
11
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
  os.environ.setdefault("OMP_NUM_THREADS", "2")
13
 
14
- _repo_dir = snapshot_download(
15
- repo_id=MODEL_REPO,
16
- allow_patterns=[
17
- "pocket_tts_onnx.py",
18
- "onnx/*",
19
- "tokenizer.model",
20
- "text_conditioner.onnx",
21
- "reference_sample.wav",
22
- "requirements.txt",
23
- ],
24
- )
25
- sys.path.insert(0, _repo_dir)
26
 
27
  from pocket_tts_onnx import PocketTTSOnnx
28
 
29
- _tts_cache = {}
30
 
31
  def get_tts(temperature: float, lsd_steps: int):
32
  key = (float(temperature), int(lsd_steps))
33
- if key not in _tts_cache:
34
- _tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
35
- return _tts_cache[key]
36
 
37
  def synthesize(ref_audio_path, text, temperature, lsd_steps):
38
  text = (text or "").strip()
@@ -47,7 +38,6 @@ def synthesize(ref_audio_path, text, temperature, lsd_steps):
47
 
48
  sr = getattr(tts, "sample_rate", 24000)
49
  audio_np = np.asarray(audio)
50
-
51
  if audio_np.ndim > 1:
52
  audio_np = audio_np.squeeze()
53
 
@@ -56,7 +46,7 @@ def synthesize(ref_audio_path, text, temperature, lsd_steps):
56
  return out_path
57
 
58
  with gr.Blocks() as demo:
59
- gr.Markdown("# Pocket TTS ONNX (Voice Cloning)\nUpload a short reference voice sample, type text, and generate audio.")
60
  with gr.Row():
61
  ref_audio = gr.Audio(label="Reference Audio", type="filepath")
62
  text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
@@ -64,7 +54,7 @@ with gr.Blocks() as demo:
64
  temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
65
  lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
66
  generate = gr.Button("Generate", variant="primary")
67
- out_audio = gr.Audio(label="Output", type="filepath")
68
 
69
  generate.click(
70
  fn=synthesize,
 
11
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
  os.environ.setdefault("OMP_NUM_THREADS", "2")
13
 
14
+ repo_dir = snapshot_download(repo_id=MODEL_REPO)
15
+ os.chdir(repo_dir)
16
+ sys.path.insert(0, repo_dir)
 
 
 
 
 
 
 
 
 
17
 
18
  from pocket_tts_onnx import PocketTTSOnnx
19
 
20
+ tts_cache = {}
21
 
22
  def get_tts(temperature: float, lsd_steps: int):
23
  key = (float(temperature), int(lsd_steps))
24
+ if key not in tts_cache:
25
+ tts_cache[key] = PocketTTSOnnx(temperature=float(temperature), lsd_steps=int(lsd_steps))
26
+ return tts_cache[key]
27
 
28
  def synthesize(ref_audio_path, text, temperature, lsd_steps):
29
  text = (text or "").strip()
 
38
 
39
  sr = getattr(tts, "sample_rate", 24000)
40
  audio_np = np.asarray(audio)
 
41
  if audio_np.ndim > 1:
42
  audio_np = audio_np.squeeze()
43
 
 
46
  return out_path
47
 
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("# Pocket TTS ONNX (KevinAHM)\nUpload reference audio + text get playable output audio.")
50
  with gr.Row():
51
  ref_audio = gr.Audio(label="Reference Audio", type="filepath")
52
  text = gr.Textbox(label="Text", lines=6, value="Hello, this is a test of voice cloning.")
 
54
  temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="Temperature")
55
  lsd_steps = gr.Slider(1, 10, value=10, step=1, label="LSD Steps")
56
  generate = gr.Button("Generate", variant="primary")
57
+ out_audio = gr.Audio(label="Output Audio", type="filepath")
58
 
59
  generate.click(
60
  fn=synthesize,