Percy3822 commited on
Commit
468186c
·
verified ·
1 Parent(s): ad3982a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -7
app.py CHANGED
@@ -68,6 +68,18 @@ def resolve_piper_cmd():
68
 
69
  PIPER_CMD = resolve_piper_cmd()
70
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # -------------------------
72
  # Voice download & checks
73
  # -------------------------
@@ -87,7 +99,6 @@ VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = {
87
  }
88
 
89
  DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
90
- DEFAULT_CH = 1 # mono
91
 
92
  _http = None
93
  def http():
@@ -179,6 +190,7 @@ def build_piper_cmd(
179
  "--length-scale", str(length_scale),
180
  "--noise-scale", str(noise_scale),
181
  "--noise-w", str(noise_w),
 
182
  ]
183
  if to_stdout:
184
  # Stream RAW PCM (16-bit little-endian)
@@ -186,7 +198,7 @@ def build_piper_cmd(
186
  else:
187
  if out_path is None:
188
  raise ValueError("out_path required when to_stdout=False")
189
- # File output: piper writes a WAV
190
  cmd += ["-f", str(out_path)]
191
  return cmd
192
 
@@ -205,8 +217,20 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
205
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
206
  raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
207
 
208
- async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w):
209
- """Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
 
 
 
 
 
 
 
 
 
 
 
 
210
  cmd = build_piper_cmd(text, voice, to_stdout=True,
211
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
212
  proc = await asyncio.create_subprocess_exec(
@@ -234,13 +258,29 @@ async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale
234
  stderr_task = asyncio.create_task(pump_stderr())
235
  total = 0
236
 
 
 
 
 
 
237
  try:
238
  while True:
239
  chunk = await proc.stdout.read(4096)
240
  if not chunk:
 
 
 
 
 
241
  break
242
- total += len(chunk)
243
- await ws.send_bytes(chunk)
 
 
 
 
 
 
244
 
245
  await proc.wait()
246
  await stderr_task
@@ -468,7 +508,7 @@ async def ws_tts(ws: WebSocket):
468
  if not text:
469
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
470
  continue
471
- await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w)
472
  # ignore others
473
  except WebSocketDisconnect:
474
  return
 
68
 
69
  PIPER_CMD = resolve_piper_cmd()
70
 
71
+ # -------------------------
72
+ # Streaming / synthesis tuning
73
+ # -------------------------
74
+
75
+ # Intra-sentence silence added by Piper between sentences (seconds)
76
+ SENTENCE_SILENCE = float(os.getenv("PIPER_SENTENCE_SILENCE", "0.05")) # 50 ms
77
+
78
+ # Server-to-client WS frame size target in milliseconds (steady pacing)
79
+ STREAM_BATCH_MS = int(os.getenv("STREAM_BATCH_MS", "100")) # ~100 ms frames
80
+
81
+ DEFAULT_CH = 1 # mono
82
+
83
  # -------------------------
84
  # Voice download & checks
85
  # -------------------------
 
99
  }
100
 
101
  DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
 
102
 
103
  _http = None
104
  def http():
 
190
  "--length-scale", str(length_scale),
191
  "--noise-scale", str(noise_scale),
192
  "--noise-w", str(noise_w),
193
+ "--sentence-silence", str(SENTENCE_SILENCE),
194
  ]
195
  if to_stdout:
196
  # Stream RAW PCM (16-bit little-endian)
 
198
  else:
199
  if out_path is None:
200
  raise ValueError("out_path required when to_stdout=False")
201
+ # File output: Piper writes a WAV
202
  cmd += ["-f", str(out_path)]
203
  return cmd
204
 
 
217
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
218
  raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
219
 
220
+ async def piper_stream_raw(
221
+ text: str,
222
+ voice: str,
223
+ ws: WebSocket,
224
+ sr: int,
225
+ channels: int,
226
+ length_scale: float,
227
+ noise_scale: float,
228
+ noise_w: float,
229
+ ):
230
+ """
231
+ Stream RAW PCM frames over WS at steady ~STREAM_BATCH_MS cadence.
232
+ Send stderr as 'log' events; signal 'done' at completion.
233
+ """
234
  cmd = build_piper_cmd(text, voice, to_stdout=True,
235
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
236
  proc = await asyncio.create_subprocess_exec(
 
258
  stderr_task = asyncio.create_task(pump_stderr())
259
  total = 0
260
 
261
+ # framing
262
+ bytes_per_ms = max(1, int(sr * channels * 2 / 1000))
263
+ frame_bytes = max(bytes_per_ms, int(STREAM_BATCH_MS * bytes_per_ms))
264
+ buf = bytearray()
265
+
266
  try:
267
  while True:
268
  chunk = await proc.stdout.read(4096)
269
  if not chunk:
270
+ # flush remainder
271
+ if buf:
272
+ await ws.send_bytes(bytes(buf))
273
+ total += len(buf)
274
+ buf.clear()
275
  break
276
+
277
+ buf.extend(chunk)
278
+
279
+ # send in steady frames
280
+ while len(buf) >= frame_bytes:
281
+ await ws.send_bytes(buf[:frame_bytes])
282
+ total += frame_bytes
283
+ del buf[:frame_bytes]
284
 
285
  await proc.wait()
286
  await stderr_task
 
508
  if not text:
509
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
510
  continue
511
+ await piper_stream_raw(text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w)
512
  # ignore others
513
  except WebSocketDisconnect:
514
  return