Percy3822 commited on
Commit
a9c381a
·
verified ·
1 Parent(s): e930fdf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -2,6 +2,8 @@
2
  import asyncio
3
  import json
4
  import os
 
 
5
  import time
6
  from pathlib import Path
7
  from typing import Dict, Optional, Tuple
@@ -51,16 +53,30 @@ def _safe_unlink(path: Path):
51
  pass
52
 
53
  # -------------------------
54
- # Piper CLI integration
55
  # -------------------------
56
 
57
- PIPER_BIN = os.getenv("PIPER_BIN", "piper")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
60
- HF_REV = os.getenv("PIPER_VOICES_REV", "main") # set a commit hash here if you want pinning
61
 
62
- # sanity thresholds (bytes) to detect corrupt downloads
63
- MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000")) # >= ~5MB (real models are 10s–100s MB)
64
  MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000")) # >= 1KB
65
 
66
  # (lang, country, family, quality, basename)
@@ -103,16 +119,16 @@ def _file_ok(p: Path, min_bytes: int) -> bool:
103
  return False
104
 
105
  def _read_sr_from_cfg(cfg_path: Path) -> int:
106
- import json as _json
107
  try:
108
  with open(cfg_path, "r", encoding="utf-8") as f:
109
- j = _json.load(f)
110
  sr = int(j.get("sample_rate", 22050))
 
111
  return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050
112
  except Exception:
113
  return 22050
114
 
115
- def ensure_voice(voice_id: str) -> Dict[str, Path | int]:
116
  """Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR."""
117
  # Aliases
118
  if voice_id.lower() in ("en-us", "en_us", "english"):
@@ -146,6 +162,10 @@ def ensure_voice(voice_id: str) -> Dict[str, Path | int]:
146
  sr = _read_sr_from_cfg(cfg)
147
  return {"model": model, "config": cfg, "sr": sr}
148
 
 
 
 
 
149
  def build_piper_cmd(
150
  text: str, voice_id: str, to_stdout: bool,
151
  out_path: Optional[Path] = None,
@@ -153,7 +173,7 @@ def build_piper_cmd(
153
  ) -> list:
154
  vc = ensure_voice(voice_id)
155
  cmd = [
156
- PIPER_BIN,
157
  "-m", str(vc["model"]),
158
  "-c", str(vc["config"]),
159
  "-q",
@@ -162,12 +182,12 @@ def build_piper_cmd(
162
  "--noise_w", str(noise_w),
163
  ]
164
  if to_stdout:
165
- # Stream RAW PCM (16-bit little-endian). Simpler to play on clients.
166
  cmd += ["--raw", "-f", "-"]
167
  else:
168
  if out_path is None:
169
  raise ValueError("out_path required when to_stdout=False")
170
- # File output: Piper writes WAV by default.
171
  cmd += ["-f", str(out_path)]
172
  return cmd
173
 
@@ -177,7 +197,8 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
177
  proc = await asyncio.create_subprocess_exec(
178
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
179
  )
180
- proc.stdin.write((text + "\n").encode("utf-8")) # newline to terminate
 
181
  await proc.stdin.drain()
182
  proc.stdin.close()
183
  await proc.wait()
@@ -185,9 +206,8 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
185
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
186
  raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
187
 
188
- async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w, sr: int):
189
  """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
190
- # We already announced 'ready' with the correct sr in the init step.
191
  cmd = build_piper_cmd(text, voice, to_stdout=True,
192
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
193
  proc = await asyncio.create_subprocess_exec(
@@ -372,6 +392,7 @@ async def speak_wav_get(
372
  background_tasks.add_task(_safe_unlink, out_path)
373
  return FileResponse(out_path, media_type="audio/wav", filename=out_path.name)
374
 
 
375
  @app.get("/debug/voices")
376
  def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
377
  out = {"dir": str(VOICES_DIR), "voices": []}
@@ -403,12 +424,13 @@ def debug_voices(redownload: bool = Query(False, description="Force re-download
403
  info["redownload_error"] = str(e)
404
  return out
405
 
 
406
  @app.websocket("/ws/tts")
407
  async def ws_tts(ws: WebSocket):
408
  await ws.accept()
409
  voice = DEFAULT_VOICE
410
  length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
411
- voice_sr = 22050 # will be overwritten by ensure_voice
412
 
413
  try:
414
  while True:
@@ -430,15 +452,15 @@ async def ws_tts(ws: WebSocket):
430
  await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
431
  await ws.close()
432
  return
433
- # announce the correct SR so the client opens the audio device properly
434
  await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH}))
435
  elif ev == "speak":
436
  text = (data.get("text") or "").strip()
437
  if not text:
438
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
439
  continue
440
- await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w, sr=voice_sr)
441
- # ignore other events
442
  except WebSocketDisconnect:
443
  return
444
  except Exception as e:
 
2
  import asyncio
3
  import json
4
  import os
5
+ import sys
6
+ import shutil
7
  import time
8
  from pathlib import Path
9
  from typing import Dict, Optional, Tuple
 
53
  pass
54
 
55
  # -------------------------
56
+ # Piper command resolution
57
  # -------------------------
58
 
59
+ def resolve_piper_cmd():
60
+ env = os.getenv("PIPER_BIN")
61
+ if env:
62
+ return env.split()
63
+ path = shutil.which("piper")
64
+ if path:
65
+ return [path]
66
+ # fallback to module runner if console script isn't on PATH
67
+ return [sys.executable, "-m", "piper"]
68
+
69
+ PIPER_CMD = resolve_piper_cmd()
70
+
71
+ # -------------------------
72
+ # Voice download & checks
73
+ # -------------------------
74
 
75
  HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
76
+ HF_REV = os.getenv("PIPER_VOICES_REV", "main") # optionally pin a commit hash
77
 
78
+ # sanity thresholds (bytes)
79
+ MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000")) # >= ~5MB (real models are much larger)
80
  MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000")) # >= 1KB
81
 
82
  # (lang, country, family, quality, basename)
 
119
  return False
120
 
121
  def _read_sr_from_cfg(cfg_path: Path) -> int:
 
122
  try:
123
  with open(cfg_path, "r", encoding="utf-8") as f:
124
+ j = json.load(f)
125
  sr = int(j.get("sample_rate", 22050))
126
+ # keep it reasonable
127
  return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050
128
  except Exception:
129
  return 22050
130
 
131
+ def ensure_voice(voice_id: str) -> Dict[str, object]:
132
  """Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR."""
133
  # Aliases
134
  if voice_id.lower() in ("en-us", "en_us", "english"):
 
162
  sr = _read_sr_from_cfg(cfg)
163
  return {"model": model, "config": cfg, "sr": sr}
164
 
165
+ # -------------------------
166
+ # Piper exec helpers
167
+ # -------------------------
168
+
169
  def build_piper_cmd(
170
  text: str, voice_id: str, to_stdout: bool,
171
  out_path: Optional[Path] = None,
 
173
  ) -> list:
174
  vc = ensure_voice(voice_id)
175
  cmd = [
176
+ *PIPER_CMD,
177
  "-m", str(vc["model"]),
178
  "-c", str(vc["config"]),
179
  "-q",
 
182
  "--noise_w", str(noise_w),
183
  ]
184
  if to_stdout:
185
+ # Stream RAW PCM (16-bit little-endian)
186
  cmd += ["--raw", "-f", "-"]
187
  else:
188
  if out_path is None:
189
  raise ValueError("out_path required when to_stdout=False")
190
+ # File output: piper writes a WAV
191
  cmd += ["-f", str(out_path)]
192
  return cmd
193
 
 
197
  proc = await asyncio.create_subprocess_exec(
198
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
199
  )
200
+ # terminate the utterance with newline
201
+ proc.stdin.write((text + "\n").encode("utf-8"))
202
  await proc.stdin.drain()
203
  proc.stdin.close()
204
  await proc.wait()
 
206
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
207
  raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
208
 
209
+ async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w):
210
  """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
 
211
  cmd = build_piper_cmd(text, voice, to_stdout=True,
212
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
213
  proc = await asyncio.create_subprocess_exec(
 
392
  background_tasks.add_task(_safe_unlink, out_path)
393
  return FileResponse(out_path, media_type="audio/wav", filename=out_path.name)
394
 
395
+ # --- Diagnostics: inspect/refresh downloaded voices ---
396
  @app.get("/debug/voices")
397
  def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
398
  out = {"dir": str(VOICES_DIR), "voices": []}
 
424
  info["redownload_error"] = str(e)
425
  return out
426
 
427
+ # --- Live streaming WS ---
428
  @app.websocket("/ws/tts")
429
  async def ws_tts(ws: WebSocket):
430
  await ws.accept()
431
  voice = DEFAULT_VOICE
432
  length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
433
+ voice_sr = 22050 # will be set from config on init
434
 
435
  try:
436
  while True:
 
452
  await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
453
  await ws.close()
454
  return
455
+ # Announce the correct SR so the client opens the device properly
456
  await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH}))
457
  elif ev == "speak":
458
  text = (data.get("text") or "").strip()
459
  if not text:
460
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
461
  continue
462
+ await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w)
463
+ # ignore others
464
  except WebSocketDisconnect:
465
  return
466
  except Exception as e: