Update app.py
Browse files
app.py
CHANGED
|
@@ -68,6 +68,18 @@ def resolve_piper_cmd():
|
|
| 68 |
|
| 69 |
PIPER_CMD = resolve_piper_cmd()
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
# -------------------------
|
| 72 |
# Voice download & checks
|
| 73 |
# -------------------------
|
|
@@ -87,7 +99,6 @@ VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = {
|
|
| 87 |
}
|
| 88 |
|
| 89 |
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
|
| 90 |
-
DEFAULT_CH = 1 # mono
|
| 91 |
|
| 92 |
_http = None
|
| 93 |
def http():
|
|
@@ -179,6 +190,7 @@ def build_piper_cmd(
|
|
| 179 |
"--length-scale", str(length_scale),
|
| 180 |
"--noise-scale", str(noise_scale),
|
| 181 |
"--noise-w", str(noise_w),
|
|
|
|
| 182 |
]
|
| 183 |
if to_stdout:
|
| 184 |
# Stream RAW PCM (16-bit little-endian)
|
|
@@ -186,7 +198,7 @@ def build_piper_cmd(
|
|
| 186 |
else:
|
| 187 |
if out_path is None:
|
| 188 |
raise ValueError("out_path required when to_stdout=False")
|
| 189 |
-
# File output:
|
| 190 |
cmd += ["-f", str(out_path)]
|
| 191 |
return cmd
|
| 192 |
|
|
@@ -205,8 +217,20 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
|
|
| 205 |
stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
|
| 206 |
raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
|
| 207 |
|
| 208 |
-
async def piper_stream_raw(
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
cmd = build_piper_cmd(text, voice, to_stdout=True,
|
| 211 |
length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
|
| 212 |
proc = await asyncio.create_subprocess_exec(
|
|
@@ -234,13 +258,29 @@ async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale
|
|
| 234 |
stderr_task = asyncio.create_task(pump_stderr())
|
| 235 |
total = 0
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
try:
|
| 238 |
while True:
|
| 239 |
chunk = await proc.stdout.read(4096)
|
| 240 |
if not chunk:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
break
|
| 242 |
-
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
await proc.wait()
|
| 246 |
await stderr_task
|
|
@@ -468,7 +508,7 @@ async def ws_tts(ws: WebSocket):
|
|
| 468 |
if not text:
|
| 469 |
await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
|
| 470 |
continue
|
| 471 |
-
await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w)
|
| 472 |
# ignore others
|
| 473 |
except WebSocketDisconnect:
|
| 474 |
return
|
|
|
|
| 68 |
|
| 69 |
PIPER_CMD = resolve_piper_cmd()
|
| 70 |
|
| 71 |
+
# -------------------------
|
| 72 |
+
# Streaming / synthesis tuning
|
| 73 |
+
# -------------------------
|
| 74 |
+
|
| 75 |
+
# Intra-sentence silence added by Piper between sentences (seconds)
|
| 76 |
+
SENTENCE_SILENCE = float(os.getenv("PIPER_SENTENCE_SILENCE", "0.05")) # 50 ms
|
| 77 |
+
|
| 78 |
+
# Server-to-client WS frame size target in milliseconds (steady pacing)
|
| 79 |
+
STREAM_BATCH_MS = int(os.getenv("STREAM_BATCH_MS", "100")) # ~100 ms frames
|
| 80 |
+
|
| 81 |
+
DEFAULT_CH = 1 # mono
|
| 82 |
+
|
| 83 |
# -------------------------
|
| 84 |
# Voice download & checks
|
| 85 |
# -------------------------
|
|
|
|
| 99 |
}
|
| 100 |
|
| 101 |
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
|
|
|
|
| 102 |
|
| 103 |
_http = None
|
| 104 |
def http():
|
|
|
|
| 190 |
"--length-scale", str(length_scale),
|
| 191 |
"--noise-scale", str(noise_scale),
|
| 192 |
"--noise-w", str(noise_w),
|
| 193 |
+
"--sentence-silence", str(SENTENCE_SILENCE),
|
| 194 |
]
|
| 195 |
if to_stdout:
|
| 196 |
# Stream RAW PCM (16-bit little-endian)
|
|
|
|
| 198 |
else:
|
| 199 |
if out_path is None:
|
| 200 |
raise ValueError("out_path required when to_stdout=False")
|
| 201 |
+
# File output: Piper writes a WAV
|
| 202 |
cmd += ["-f", str(out_path)]
|
| 203 |
return cmd
|
| 204 |
|
|
|
|
| 217 |
stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
|
| 218 |
raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
|
| 219 |
|
| 220 |
+
async def piper_stream_raw(
|
| 221 |
+
text: str,
|
| 222 |
+
voice: str,
|
| 223 |
+
ws: WebSocket,
|
| 224 |
+
sr: int,
|
| 225 |
+
channels: int,
|
| 226 |
+
length_scale: float,
|
| 227 |
+
noise_scale: float,
|
| 228 |
+
noise_w: float,
|
| 229 |
+
):
|
| 230 |
+
"""
|
| 231 |
+
Stream RAW PCM frames over WS at steady ~STREAM_BATCH_MS cadence.
|
| 232 |
+
Send stderr as 'log' events; signal 'done' at completion.
|
| 233 |
+
"""
|
| 234 |
cmd = build_piper_cmd(text, voice, to_stdout=True,
|
| 235 |
length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
|
| 236 |
proc = await asyncio.create_subprocess_exec(
|
|
|
|
| 258 |
stderr_task = asyncio.create_task(pump_stderr())
|
| 259 |
total = 0
|
| 260 |
|
| 261 |
+
# framing
|
| 262 |
+
bytes_per_ms = max(1, int(sr * channels * 2 / 1000))
|
| 263 |
+
frame_bytes = max(bytes_per_ms, int(STREAM_BATCH_MS * bytes_per_ms))
|
| 264 |
+
buf = bytearray()
|
| 265 |
+
|
| 266 |
try:
|
| 267 |
while True:
|
| 268 |
chunk = await proc.stdout.read(4096)
|
| 269 |
if not chunk:
|
| 270 |
+
# flush remainder
|
| 271 |
+
if buf:
|
| 272 |
+
await ws.send_bytes(bytes(buf))
|
| 273 |
+
total += len(buf)
|
| 274 |
+
buf.clear()
|
| 275 |
break
|
| 276 |
+
|
| 277 |
+
buf.extend(chunk)
|
| 278 |
+
|
| 279 |
+
# send in steady frames
|
| 280 |
+
while len(buf) >= frame_bytes:
|
| 281 |
+
await ws.send_bytes(buf[:frame_bytes])
|
| 282 |
+
total += frame_bytes
|
| 283 |
+
del buf[:frame_bytes]
|
| 284 |
|
| 285 |
await proc.wait()
|
| 286 |
await stderr_task
|
|
|
|
| 508 |
if not text:
|
| 509 |
await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
|
| 510 |
continue
|
| 511 |
+
await piper_stream_raw(text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w)
|
| 512 |
# ignore others
|
| 513 |
except WebSocketDisconnect:
|
| 514 |
return
|