piper

Sleeping

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

468186c

verified ·

1 Parent(s): ad3982a

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -7

app.py CHANGED Viewed

@@ -68,6 +68,18 @@ def resolve_piper_cmd():
 PIPER_CMD = resolve_piper_cmd()
 # -------------------------
 # Voice download & checks
 # -------------------------
@@ -87,7 +99,6 @@ VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = {
 }
 DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
-DEFAULT_CH    = 1  # mono
 _http = None
 def http():
@@ -179,6 +190,7 @@ def build_piper_cmd(
         "--length-scale", str(length_scale),
         "--noise-scale",  str(noise_scale),
         "--noise-w",      str(noise_w),
     ]
     if to_stdout:
         # Stream RAW PCM (16-bit little-endian)
@@ -186,7 +198,7 @@ def build_piper_cmd(
     else:
         if out_path is None:
             raise ValueError("out_path required when to_stdout=False")
-        # File output: piper writes a WAV
         cmd += ["-f", str(out_path)]
     return cmd
@@ -205,8 +217,20 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
         stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
         raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
-async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w):
-    """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
     cmd = build_piper_cmd(text, voice, to_stdout=True,
                           length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
     proc = await asyncio.create_subprocess_exec(
@@ -234,13 +258,29 @@ async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale
     stderr_task = asyncio.create_task(pump_stderr())
     total = 0
     try:
         while True:
             chunk = await proc.stdout.read(4096)
             if not chunk:
                 break
-            total += len(chunk)
-            await ws.send_bytes(chunk)
         await proc.wait()
         await stderr_task
@@ -468,7 +508,7 @@ async def ws_tts(ws: WebSocket):
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
-                await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w)
             # ignore others
     except WebSocketDisconnect:
         return

 PIPER_CMD = resolve_piper_cmd()
+# -------------------------
+# Streaming / synthesis tuning
+# -------------------------
+# Intra-sentence silence added by Piper between sentences (seconds)
+SENTENCE_SILENCE = float(os.getenv("PIPER_SENTENCE_SILENCE", "0.05"))  # 50 ms
+# Server-to-client WS frame size target in milliseconds (steady pacing)
+STREAM_BATCH_MS  = int(os.getenv("STREAM_BATCH_MS", "100"))            # ~100 ms frames
+DEFAULT_CH = 1  # mono
 # -------------------------
 # Voice download & checks
 # -------------------------
 }
 DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
 _http = None
 def http():
         "--length-scale", str(length_scale),
         "--noise-scale",  str(noise_scale),
         "--noise-w",      str(noise_w),
+        "--sentence-silence", str(SENTENCE_SILENCE),
     ]
     if to_stdout:
         # Stream RAW PCM (16-bit little-endian)
     else:
         if out_path is None:
             raise ValueError("out_path required when to_stdout=False")
+        # File output: Piper writes a WAV
         cmd += ["-f", str(out_path)]
     return cmd
         stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
         raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
+async def piper_stream_raw(
+    text: str,
+    voice: str,
+    ws: WebSocket,
+    sr: int,
+    channels: int,
+    length_scale: float,
+    noise_scale: float,
+    noise_w: float,
+):
+    """
+    Stream RAW PCM frames over WS at steady ~STREAM_BATCH_MS cadence.
+    Send stderr as 'log' events; signal 'done' at completion.
+    """
     cmd = build_piper_cmd(text, voice, to_stdout=True,
                           length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
     proc = await asyncio.create_subprocess_exec(
     stderr_task = asyncio.create_task(pump_stderr())
     total = 0
+    # framing
+    bytes_per_ms = max(1, int(sr * channels * 2 / 1000))
+    frame_bytes = max(bytes_per_ms, int(STREAM_BATCH_MS * bytes_per_ms))
+    buf = bytearray()
     try:
         while True:
             chunk = await proc.stdout.read(4096)
             if not chunk:
+                # flush remainder
+                if buf:
+                    await ws.send_bytes(bytes(buf))
+                    total += len(buf)
+                    buf.clear()
                 break
+            buf.extend(chunk)
+            # send in steady frames
+            while len(buf) >= frame_bytes:
+                await ws.send_bytes(buf[:frame_bytes])
+                total += frame_bytes
+                del buf[:frame_bytes]
         await proc.wait()
         await stderr_task
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
+                await piper_stream_raw(text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w)
             # ignore others
     except WebSocketDisconnect:
         return