piper

Sleeping

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

a9c381a

verified ·

1 Parent(s): e930fdf

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -18

app.py CHANGED Viewed

@@ -2,6 +2,8 @@
 import asyncio
 import json
 import os
 import time
 from pathlib import Path
 from typing import Dict, Optional, Tuple
@@ -51,16 +53,30 @@ def _safe_unlink(path: Path):
         pass
 # -------------------------
-# Piper CLI integration
 # -------------------------
-PIPER_BIN = os.getenv("PIPER_BIN", "piper")
 HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
-HF_REV       = os.getenv("PIPER_VOICES_REV", "main")  # set a commit hash here if you want pinning
-# sanity thresholds (bytes) to detect corrupt downloads
-MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000"))  # >= ~5MB (real models are 10s–100s MB)
 MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000"))     # >= 1KB
 # (lang, country, family, quality, basename)
@@ -103,16 +119,16 @@ def _file_ok(p: Path, min_bytes: int) -> bool:
         return False
 def _read_sr_from_cfg(cfg_path: Path) -> int:
-    import json as _json
     try:
         with open(cfg_path, "r", encoding="utf-8") as f:
-            j = _json.load(f)
         sr = int(j.get("sample_rate", 22050))
         return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050
     except Exception:
         return 22050
-def ensure_voice(voice_id: str) -> Dict[str, Path | int]:
     """Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR."""
     # Aliases
     if voice_id.lower() in ("en-us", "en_us", "english"):
@@ -146,6 +162,10 @@ def ensure_voice(voice_id: str) -> Dict[str, Path | int]:
     sr = _read_sr_from_cfg(cfg)
     return {"model": model, "config": cfg, "sr": sr}
 def build_piper_cmd(
     text: str, voice_id: str, to_stdout: bool,
     out_path: Optional[Path] = None,
@@ -153,7 +173,7 @@ def build_piper_cmd(
 ) -> list:
     vc = ensure_voice(voice_id)
     cmd = [
-        PIPER_BIN,
         "-m", str(vc["model"]),
         "-c", str(vc["config"]),
         "-q",
@@ -162,12 +182,12 @@ def build_piper_cmd(
         "--noise_w",      str(noise_w),
     ]
     if to_stdout:
-        # Stream RAW PCM (16-bit little-endian). Simpler to play on clients.
         cmd += ["--raw", "-f", "-"]
     else:
         if out_path is None:
             raise ValueError("out_path required when to_stdout=False")
-        # File output: Piper writes WAV by default.
         cmd += ["-f", str(out_path)]
     return cmd
@@ -177,7 +197,8 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
     proc = await asyncio.create_subprocess_exec(
         *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
     )
-    proc.stdin.write((text + "\n").encode("utf-8"))  # newline to terminate
     await proc.stdin.drain()
     proc.stdin.close()
     await proc.wait()
@@ -185,9 +206,8 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
         stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
         raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
-async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w, sr: int):
     """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
-    # We already announced 'ready' with the correct sr in the init step.
     cmd = build_piper_cmd(text, voice, to_stdout=True,
                           length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
     proc = await asyncio.create_subprocess_exec(
@@ -372,6 +392,7 @@ async def speak_wav_get(
         background_tasks.add_task(_safe_unlink, out_path)
     return FileResponse(out_path, media_type="audio/wav", filename=out_path.name)
 @app.get("/debug/voices")
 def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
     out = {"dir": str(VOICES_DIR), "voices": []}
@@ -403,12 +424,13 @@ def debug_voices(redownload: bool = Query(False, description="Force re-download
                 info["redownload_error"] = str(e)
     return out
 @app.websocket("/ws/tts")
 async def ws_tts(ws: WebSocket):
     await ws.accept()
     voice = DEFAULT_VOICE
     length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
-    voice_sr = 22050  # will be overwritten by ensure_voice
     try:
         while True:
@@ -430,15 +452,15 @@ async def ws_tts(ws: WebSocket):
                     await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
                     await ws.close()
                     return
-                # announce the correct SR so the client opens the audio device properly
                 await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH}))
             elif ev == "speak":
                 text = (data.get("text") or "").strip()
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
-                await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w, sr=voice_sr)
-            # ignore other events
     except WebSocketDisconnect:
         return
     except Exception as e:

 import asyncio
 import json
 import os
+import sys
+import shutil
 import time
 from pathlib import Path
 from typing import Dict, Optional, Tuple
         pass
 # -------------------------
+# Piper command resolution
 # -------------------------
+def resolve_piper_cmd():
+    env = os.getenv("PIPER_BIN")
+    if env:
+        return env.split()
+    path = shutil.which("piper")
+    if path:
+        return [path]
+    # fallback to module runner if console script isn't on PATH
+    return [sys.executable, "-m", "piper"]
+PIPER_CMD = resolve_piper_cmd()
+# -------------------------
+# Voice download & checks
+# -------------------------
 HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
+HF_REV       = os.getenv("PIPER_VOICES_REV", "main")  # optionally pin a commit hash
+# sanity thresholds (bytes)
+MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000"))  # >= ~5MB (real models are much larger)
 MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000"))     # >= 1KB
 # (lang, country, family, quality, basename)
         return False
 def _read_sr_from_cfg(cfg_path: Path) -> int:
     try:
         with open(cfg_path, "r", encoding="utf-8") as f:
+            j = json.load(f)
         sr = int(j.get("sample_rate", 22050))
+        # keep it reasonable
         return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050
     except Exception:
         return 22050
+def ensure_voice(voice_id: str) -> Dict[str, object]:
     """Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR."""
     # Aliases
     if voice_id.lower() in ("en-us", "en_us", "english"):
     sr = _read_sr_from_cfg(cfg)
     return {"model": model, "config": cfg, "sr": sr}
+# -------------------------
+# Piper exec helpers
+# -------------------------
 def build_piper_cmd(
     text: str, voice_id: str, to_stdout: bool,
     out_path: Optional[Path] = None,
 ) -> list:
     vc = ensure_voice(voice_id)
     cmd = [
+        *PIPER_CMD,
         "-m", str(vc["model"]),
         "-c", str(vc["config"]),
         "-q",
         "--noise_w",      str(noise_w),
     ]
     if to_stdout:
+        # Stream RAW PCM (16-bit little-endian)
         cmd += ["--raw", "-f", "-"]
     else:
         if out_path is None:
             raise ValueError("out_path required when to_stdout=False")
+        # File output: piper writes a WAV
         cmd += ["-f", str(out_path)]
     return cmd
     proc = await asyncio.create_subprocess_exec(
         *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
     )
+    # terminate the utterance with newline
+    proc.stdin.write((text + "\n").encode("utf-8"))
     await proc.stdin.drain()
     proc.stdin.close()
     await proc.wait()
         stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
         raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
+async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w):
     """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
     cmd = build_piper_cmd(text, voice, to_stdout=True,
                           length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
     proc = await asyncio.create_subprocess_exec(
         background_tasks.add_task(_safe_unlink, out_path)
     return FileResponse(out_path, media_type="audio/wav", filename=out_path.name)
+# --- Diagnostics: inspect/refresh downloaded voices ---
 @app.get("/debug/voices")
 def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
     out = {"dir": str(VOICES_DIR), "voices": []}
                 info["redownload_error"] = str(e)
     return out
+# --- Live streaming WS ---
 @app.websocket("/ws/tts")
 async def ws_tts(ws: WebSocket):
     await ws.accept()
     voice = DEFAULT_VOICE
     length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
+    voice_sr = 22050  # will be set from config on init
     try:
         while True:
                     await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
                     await ws.close()
                     return
+                # Announce the correct SR so the client opens the device properly
                 await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH}))
             elif ev == "speak":
                 text = (data.get("text") or "").strip()
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
+                await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w)
+            # ignore others
     except WebSocketDisconnect:
         return
     except Exception as e: