ShadowHunter222 commited on
Commit
4979fb4
Β·
verified Β·
1 Parent(s): 6937fbe

Upload 9 files

Browse files
Files changed (5) hide show
  1. app.py +155 -425
  2. chatterbox_wrapper.py +2 -0
  3. config.py +3 -7
  4. her_prompt.wav +3 -0
  5. ivr_female_prompt.wav +3 -0
app.py CHANGED
@@ -1,16 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
- import http.client
3
  import io
4
  import json
5
  import logging
6
  import queue as stdlib_queue
7
  import threading
8
  import time
 
9
  import urllib.parse
 
10
  import uuid
11
  from concurrent.futures import ThreadPoolExecutor
12
- from dataclasses import dataclass
13
- from typing import Any, Generator, Optional
14
 
15
  import numpy as np
16
  import soundfile as sf
@@ -95,36 +111,38 @@ async def cors_middleware(request: Request, call_next):
95
 
96
  async def _resolve_voice(
97
  voice_ref: Optional[UploadFile],
98
- voice_name: Optional[str],
99
  wrapper: ChatterboxWrapper,
100
  ) -> VoiceProfile:
101
- """Return a VoiceProfile from uploaded audio or built-in voice selection."""
102
- if voice_ref is None or voice_ref.filename == "":
 
 
 
 
 
 
 
 
103
  try:
104
- return wrapper.get_builtin_voice(voice_name)
 
 
105
  except ValueError as e:
106
  raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
107
 
108
- audio_bytes = await voice_ref.read()
109
- if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
110
- raise HTTPException(status_code=413, detail="Voice file too large (max 10 MB)")
111
- if len(audio_bytes) == 0:
112
- raise HTTPException(status_code=400, detail="Empty voice file")
113
-
114
- loop = asyncio.get_running_loop()
115
  try:
116
- return await loop.run_in_executor(
117
- tts_executor, wrapper.encode_voice_from_bytes, audio_bytes
118
- )
119
- except ValueError as e:
120
  raise HTTPException(status_code=400, detail=str(e))
121
- except Exception as e:
122
- logger.error(f"Voice encoding failed: {e}")
123
- raise HTTPException(
124
- status_code=400,
125
- detail=f"Could not process voice file: {str(e)}. "
126
- f"Supported formats: WAV, MP3, MPEG, M4A, OGG, FLAC, WebM."
127
- )
128
 
129
 
130
  # ═══════════════════════════════════════════════════════════════════
@@ -152,165 +170,32 @@ def _encode_mp3_chunk(audio: np.ndarray) -> bytes:
152
  return data
153
 
154
 
155
- @dataclass(frozen=True)
156
- class _ChunkPacket:
157
- index: int
158
- data: bytes
159
- lane: str
160
- produced_at: float
161
 
162
 
163
- def _internal_headers(
164
- *,
165
- content_type: Optional[str] = "application/json",
166
- accept: str = "audio/mpeg",
167
- ) -> dict[str, str]:
168
- headers: dict[str, str] = {"Accept": accept, "Connection": "keep-alive"}
169
- if content_type:
170
- headers["Content-Type"] = content_type
171
  if Config.INTERNAL_SHARED_SECRET:
172
  headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
173
  return headers
174
 
175
 
176
- class _HelperHttpClient:
177
- """Small persistent HTTP client for helper server keep-alive calls."""
178
-
179
- def __init__(self, base_url: str, default_timeout: float):
180
- parsed = urllib.parse.urlparse((base_url or "").strip())
181
- if parsed.scheme not in {"http", "https"} or not parsed.hostname:
182
- raise ValueError(f"Invalid helper URL: {base_url!r}")
183
-
184
- self._scheme = parsed.scheme
185
- self._host = parsed.hostname
186
- self._port = parsed.port
187
- self._base_path = (parsed.path or "").rstrip("/")
188
- self._default_timeout = max(1.0, float(default_timeout))
189
- self._conn: Optional[http.client.HTTPConnection] = None
190
-
191
- def __enter__(self):
192
- return self
193
-
194
- def __exit__(self, exc_type, exc, tb):
195
- self.close()
196
-
197
- def close(self):
198
- if self._conn is not None:
199
- try:
200
- self._conn.close()
201
- except Exception:
202
- pass
203
- self._conn = None
204
-
205
- def _target(self, path: str, query: Optional[str] = None) -> str:
206
- normalized = path if path.startswith("/") else f"/{path}"
207
- target = f"{self._base_path}{normalized}"
208
- if query:
209
- target = f"{target}?{query}"
210
- return target
211
-
212
- def _make_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
213
- if self._scheme == "https":
214
- return http.client.HTTPSConnection(self._host, self._port, timeout=timeout_sec)
215
- return http.client.HTTPConnection(self._host, self._port, timeout=timeout_sec)
216
-
217
- def _ensure_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
218
- if self._conn is None:
219
- self._conn = self._make_connection(timeout_sec)
220
- else:
221
- self._conn.timeout = timeout_sec
222
- return self._conn
223
-
224
- def _request(
225
- self,
226
- method: str,
227
- path: str,
228
- *,
229
- body: Optional[bytes] = None,
230
- headers: Optional[dict[str, str]] = None,
231
- timeout_sec: Optional[float] = None,
232
- query: Optional[str] = None,
233
- ) -> tuple[int, bytes, dict[str, str]]:
234
- timeout = max(1.0, float(timeout_sec or self._default_timeout))
235
- target = self._target(path, query=query)
236
- req_headers = headers or {}
237
-
238
- conn = self._ensure_connection(timeout)
239
- try:
240
- conn.request(method=method, url=target, body=body, headers=req_headers)
241
- resp = conn.getresponse()
242
- payload = resp.read()
243
- resp_headers = {k.lower(): v for k, v in resp.getheaders()}
244
- except Exception:
245
- # Force reconnect on next attempt if socket is stale/reset.
246
- self.close()
247
- raise
248
-
249
- if resp.status >= 400:
250
- snippet = payload[:256].decode("utf-8", errors="replace")
251
- raise RuntimeError(
252
- f"helper {method} {target} returned {resp.status}: {snippet}"
253
- )
254
- return resp.status, payload, resp_headers
255
-
256
- def request_chunk(self, payload: dict[str, Any], timeout_sec: float) -> bytes:
257
- _, data, _ = self._request(
258
- "POST",
259
- "/internal/chunk/synthesize",
260
- body=json.dumps(payload).encode("utf-8"),
261
- headers=_internal_headers(content_type="application/json", accept="audio/mpeg"),
262
- timeout_sec=timeout_sec,
263
- )
264
- return data
265
-
266
- def register_voice(self, stream_id: str, audio_bytes: bytes, timeout_sec: float) -> str:
267
- query = urllib.parse.urlencode({"stream_id": stream_id})
268
- _, data, _ = self._request(
269
- "POST",
270
- "/internal/voice/register",
271
- query=query,
272
- body=audio_bytes,
273
- headers=_internal_headers(
274
- content_type="application/octet-stream",
275
- accept="application/json",
276
- ),
277
- timeout_sec=timeout_sec,
278
- )
279
- payload = json.loads(data.decode("utf-8"))
280
- voice_key = (payload.get("voice_key") or "").strip()
281
- if not voice_key:
282
- raise RuntimeError("helper voice registration returned no voice_key")
283
- return voice_key
284
-
285
- def cancel_stream(self, stream_id: str, timeout_sec: float = 3.0):
286
- self._request(
287
- "POST",
288
- f"/internal/chunk/cancel/{stream_id}",
289
- body=b"",
290
- headers=_internal_headers(),
291
- timeout_sec=timeout_sec,
292
- )
293
-
294
- def complete_stream(self, stream_id: str, timeout_sec: float = 3.0):
295
- self._request(
296
- "POST",
297
- f"/internal/chunk/complete/{stream_id}",
298
- body=b"",
299
- headers=_internal_headers(),
300
- timeout_sec=timeout_sec,
301
- )
302
-
303
-
304
  def _helper_request_chunk(
305
  helper_base_url: str,
306
  payload: dict,
307
  timeout_sec: float,
308
- helper_client: Optional[_HelperHttpClient] = None,
309
  ) -> bytes:
310
- if helper_client is not None:
311
- return helper_client.request_chunk(payload, timeout_sec=timeout_sec)
312
- with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
313
- return helper_client_single.request_chunk(payload, timeout_sec=timeout_sec)
 
 
 
 
 
 
314
 
315
 
316
  def _helper_register_voice(
@@ -318,45 +203,44 @@ def _helper_register_voice(
318
  stream_id: str,
319
  audio_bytes: bytes,
320
  timeout_sec: float,
321
- helper_client: Optional[_HelperHttpClient] = None,
322
  ) -> str:
323
  """Register reference voice on helper once, return voice_key for chunk calls."""
324
- if helper_client is not None:
325
- return helper_client.register_voice(
326
- stream_id=stream_id,
327
- audio_bytes=audio_bytes,
328
- timeout_sec=timeout_sec,
329
- )
330
- with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
331
- return helper_client_single.register_voice(
332
- stream_id=stream_id,
333
- audio_bytes=audio_bytes,
334
- timeout_sec=timeout_sec,
335
- )
 
 
 
 
 
 
336
 
337
 
338
  def _helper_cancel_stream(helper_base_url: str, stream_id: str):
339
  """Best-effort cancellation signal to helper."""
340
  try:
341
- with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
342
- helper_client.cancel_stream(stream_id=stream_id, timeout_sec=3.0)
 
 
 
 
 
 
 
343
  except Exception:
344
  pass
345
 
346
 
347
- def _helper_complete_stream(helper_base_url: str, stream_id: str):
348
- """Best-effort stream completion cleanup on helper.
349
-
350
- Falls back to cancel for backwards compatibility if helper does not expose
351
- the completion endpoint yet.
352
- """
353
- try:
354
- with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
355
- helper_client.complete_stream(stream_id=stream_id, timeout_sec=3.0)
356
- except Exception:
357
- _helper_cancel_stream(helper_base_url, stream_id)
358
-
359
-
360
  # ═══════════════════════════════════════════════════════════════════
361
  # Endpoints
362
  # ═══════════════════════════════════════════════════════════════════
@@ -364,19 +248,12 @@ def _helper_complete_stream(helper_base_url: str, stream_id: str):
364
  @app.get("/health")
365
  async def health(warm_up: bool = False):
366
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
367
- with _internal_cancel_lock:
368
- _purge_internal_stream_state_locked()
369
- cancelled_count = len(_internal_cancelled_streams)
370
- voice_state_count = len(_internal_stream_voice_keys)
371
-
372
  status = {
373
  "status": "healthy" if wrapper else "loading",
374
  "model_loaded": wrapper is not None,
375
  "model_dtype": Config.MODEL_DTYPE,
376
  "streaming_supported": True,
377
  "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
378
- "internal_cancelled_streams": cancelled_count,
379
- "internal_stream_voice_states": voice_state_count,
380
  }
381
  if warm_up and wrapper:
382
  try:
@@ -388,23 +265,40 @@ async def health(warm_up: bool = False):
388
  return status
389
 
390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
  @app.get("/voices")
393
  async def list_voices():
 
394
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
395
  if not wrapper:
396
  raise HTTPException(503, "Model not loaded")
397
-
398
- voices = wrapper.list_builtin_voices()
399
  return {
400
- "count": len(voices),
401
- "default_voice": wrapper.default_voice_name,
402
- "voices": voices,
403
- "usage": {
404
- "form_field": "voice_name",
405
- "json_field": "voice",
406
- "note": "If voice_ref is uploaded, it overrides voice_name.",
407
- },
408
  }
409
 
410
 
@@ -454,51 +348,9 @@ async def text_to_speech(
454
  # ═══════════════════════════════════════════════════════════════════
455
 
456
  _active_streams: dict[str, threading.Event] = {}
457
- # stream_id -> expires_at epoch seconds
458
- _internal_cancelled_streams: dict[str, float] = {}
459
  _internal_cancel_lock = threading.Lock()
460
- # stream_id -> (voice_keys, expires_at)
461
- _internal_stream_voice_keys: dict[str, tuple[set[str], float]] = {}
462
-
463
- # stream_id -> helper base URLs (used to cancel helpers quickly on /tts/stop)
464
- _stream_helper_routes: dict[str, set[str]] = {}
465
- _stream_routes_lock = threading.Lock()
466
-
467
-
468
- def _purge_internal_stream_state_locked(now: Optional[float] = None):
469
- now_ts = now if now is not None else time.time()
470
-
471
- expired_cancel_ids = [
472
- sid for sid, expires_at in _internal_cancelled_streams.items()
473
- if expires_at <= now_ts
474
- ]
475
- for sid in expired_cancel_ids:
476
- _internal_cancelled_streams.pop(sid, None)
477
-
478
- expired_voice_state_ids = [
479
- sid for sid, (_, expires_at) in _internal_stream_voice_keys.items()
480
- if expires_at <= now_ts
481
- ]
482
- for sid in expired_voice_state_ids:
483
- _internal_stream_voice_keys.pop(sid, None)
484
-
485
-
486
- def _touch_internal_stream_voice_keys_locked(stream_id: str):
487
- if not stream_id:
488
- return
489
- entry = _internal_stream_voice_keys.get(stream_id)
490
- if entry is None:
491
- return
492
- keys, _ = entry
493
- _internal_stream_voice_keys[stream_id] = (
494
- keys,
495
- time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
496
- )
497
-
498
-
499
- def _clear_internal_stream_state_locked(stream_id: str):
500
- _internal_cancelled_streams.pop(stream_id, None)
501
- _internal_stream_voice_keys.pop(stream_id, None)
502
 
503
 
504
  # ═══════════════════════════════════════════════════════════════════
@@ -598,7 +450,7 @@ def _pipeline_stream_generator(
598
  _active_streams.pop(stream_id, None)
599
 
600
 
601
- def _parallel_two_way_stream_generator(
602
  wrapper: ChatterboxWrapper,
603
  text: str,
604
  local_voice: VoiceProfile,
@@ -608,43 +460,26 @@ def _parallel_two_way_stream_generator(
608
  stream_id: str,
609
  helper_base_url: str,
610
  ) -> Generator[bytes, None, None]:
611
- """Additive 2-way split streamer (primary + helper).
612
-
613
- Routing pattern:
614
- - chunk 0,2,4... -> primary (local)
615
- - chunk 1,3,5... -> helper
616
- """
617
  cancel_event = threading.Event()
618
  _active_streams[stream_id] = cancel_event
619
 
620
- helper_base_url = (helper_base_url or "").strip()
621
- helper_route_set = {helper_base_url} if helper_base_url else set()
622
- if helper_route_set:
623
- with _stream_routes_lock:
624
- _stream_helper_routes[stream_id] = set(helper_route_set)
625
-
626
  clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
627
  chunks = text_processor.split_for_streaming(clean_text)
628
  total_chunks = len(chunks)
629
  if total_chunks == 0:
630
- with _stream_routes_lock:
631
- _stream_helper_routes.pop(stream_id, None)
632
  _active_streams.pop(stream_id, None)
633
  return
634
 
635
  lock = threading.Lock()
636
  cond = threading.Condition(lock)
637
- ready: dict[int, _ChunkPacket] = {}
638
  first_error: Optional[Exception] = None
639
  workers_done = 0
640
- expected_workers = 2
641
- stream_completed = False
642
 
643
- def _publish(packet: _ChunkPacket):
644
  with cond:
645
- # First write wins for an index to avoid duplicate retry races.
646
- if packet.index not in ready:
647
- ready[packet.index] = packet
648
  cond.notify_all()
649
 
650
  def _set_error(err: Exception):
@@ -669,46 +504,23 @@ def _parallel_two_way_stream_generator(
669
  )
670
  return _encode_mp3_chunk(audio)
671
 
672
- def _local_worker():
673
  try:
674
  for idx in range(0, total_chunks, 2):
675
  if cancel_event.is_set():
676
  break
677
  data = _synth_local(chunks[idx])
678
- _publish(
679
- _ChunkPacket(
680
- index=idx,
681
- data=data,
682
- lane="primary",
683
- produced_at=time.perf_counter(),
684
- )
685
- )
686
  except Exception as e:
687
  _set_error(e)
688
  finally:
689
  _worker_done()
690
 
691
- def _helper_worker():
692
- helper_available = bool(helper_base_url)
693
  helper_voice_key: Optional[str] = None
694
- helper_timeout = max(1.0, Config.HELPER_TIMEOUT_SEC)
695
- helper_client: Optional[_HelperHttpClient] = None
696
-
697
  try:
698
- if helper_available:
699
- try:
700
- helper_client = _HelperHttpClient(
701
- helper_base_url,
702
- default_timeout=helper_timeout,
703
- )
704
- except Exception as conn_err:
705
- helper_available = False
706
- logger.warning(
707
- f"[{stream_id}] helper keep-alive init failed ({conn_err}); "
708
- "using local fallback for helper lane"
709
- )
710
-
711
- if helper_available and helper_voice_bytes:
712
  attempts = 2 if Config.HELPER_RETRY_ONCE else 1
713
  last_err: Optional[Exception] = None
714
  for _ in range(attempts):
@@ -717,25 +529,19 @@ def _parallel_two_way_stream_generator(
717
  helper_base_url=helper_base_url,
718
  stream_id=stream_id,
719
  audio_bytes=helper_voice_bytes,
720
- timeout_sec=helper_timeout,
721
- helper_client=helper_client,
722
  )
723
  last_err = None
724
  break
725
  except Exception as reg_err:
726
  last_err = reg_err
727
  continue
728
-
729
  if last_err is not None:
730
  helper_available = False
731
  logger.warning(
732
- f"[{stream_id}] helper voice registration failed; "
733
- "falling back to local synthesis for helper lane"
734
  )
735
- elif not helper_available:
736
- logger.info(
737
- f"[{stream_id}] helper URL not configured; using local fallback"
738
- )
739
 
740
  for idx in range(1, total_chunks, 2):
741
  if cancel_event.is_set():
@@ -760,17 +566,9 @@ def _parallel_two_way_stream_generator(
760
  helper_data = _helper_request_chunk(
761
  helper_base_url=helper_base_url,
762
  payload=payload,
763
- timeout_sec=helper_timeout,
764
- helper_client=helper_client,
765
- )
766
- _publish(
767
- _ChunkPacket(
768
- index=idx,
769
- data=helper_data,
770
- lane="helper",
771
- produced_at=time.perf_counter(),
772
- )
773
  )
 
774
  last_err = None
775
  break
776
  except Exception as helper_err:
@@ -782,31 +580,22 @@ def _parallel_two_way_stream_generator(
782
 
783
  helper_available = False
784
  logger.warning(
785
- f"[{stream_id}] helper failed at chunk {idx}; "
786
- "falling back to local synthesis for remaining helper chunks"
787
  )
788
 
789
- # Local fallback for helper lane
790
  data = _synth_local(chunks[idx])
791
- _publish(
792
- _ChunkPacket(
793
- index=idx,
794
- data=data,
795
- lane="helper-local-fallback",
796
- produced_at=time.perf_counter(),
797
- )
798
- )
799
  except Exception as e:
800
  _set_error(e)
801
  finally:
802
- if helper_client is not None:
803
- helper_client.close()
804
  _worker_done()
805
 
806
- local_thread = threading.Thread(target=_local_worker, daemon=True)
807
- helper_thread = threading.Thread(target=_helper_worker, daemon=True)
808
- local_thread.start()
809
- helper_thread.start()
810
 
811
  next_idx = 0
812
  try:
@@ -816,7 +605,7 @@ def _parallel_two_way_stream_generator(
816
  next_idx not in ready
817
  and first_error is None
818
  and not cancel_event.is_set()
819
- and workers_done < expected_workers
820
  ):
821
  cond.wait(timeout=0.1)
822
 
@@ -824,12 +613,11 @@ def _parallel_two_way_stream_generator(
824
  break
825
 
826
  if next_idx in ready:
827
- packet = ready.pop(next_idx)
828
- buffered_chunks = len(ready)
829
  elif first_error is not None:
830
  logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
831
  break
832
- elif workers_done >= expected_workers:
833
  logger.error(
834
  f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
835
  )
@@ -837,39 +625,13 @@ def _parallel_two_way_stream_generator(
837
  else:
838
  continue
839
 
840
- logger.debug(
841
- "[%s] stitch emit chunk %s/%s from %s (buffered=%s)",
842
- stream_id,
843
- next_idx + 1,
844
- total_chunks,
845
- packet.lane,
846
- buffered_chunks,
847
- )
848
- yield packet.data
849
  next_idx += 1
850
- stream_completed = (
851
- next_idx >= total_chunks
852
- and first_error is None
853
- and not cancel_event.is_set()
854
- )
855
  finally:
856
  cancel_event.set()
857
-
858
- # For fast stop/cancel, signal helpers first; for normal completion, wait for
859
- # workers to flush and then ask helpers to clear stream state.
860
- if not stream_completed:
861
- for base_url in helper_route_set:
862
- _helper_cancel_stream(base_url, stream_id)
863
-
864
- local_thread.join(timeout=1.0)
865
- helper_thread.join(timeout=1.0)
866
-
867
- if stream_completed:
868
- for base_url in helper_route_set:
869
- _helper_complete_stream(base_url, stream_id)
870
-
871
- with _stream_routes_lock:
872
- _stream_helper_routes.pop(stream_id, None)
873
  _active_streams.pop(stream_id, None)
874
 
875
 
@@ -923,7 +685,7 @@ async def parallel_stream_text_to_speech(
923
  repetition_penalty: float = Form(Config.REPETITION_PENALTY),
924
  helper_url: Optional[str] = Form(None),
925
  ):
926
- """Additive 2-way split stream mode (primary + helper)."""
927
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
928
  if not wrapper:
929
  raise HTTPException(503, "Model not loaded")
@@ -949,13 +711,16 @@ async def parallel_stream_text_to_speech(
949
  logger.error(f"Parallel voice encoding failed: {e}")
950
  raise HTTPException(400, "Could not process voice file for parallel mode")
951
  else:
 
 
952
  try:
953
  selected_voice_id = wrapper.resolve_voice_id(voice_name)
954
  local_voice = wrapper.get_builtin_voice(selected_voice_id)
955
  except ValueError as e:
956
  raise HTTPException(status_code=400, detail=str(e))
957
 
958
- # Ensure helper uses the same selected built-in voice.
 
959
  if selected_voice_id != wrapper.default_voice_name:
960
  helper_voice_bytes = wrapper.get_builtin_voice_bytes(selected_voice_id)
961
  if not helper_voice_bytes:
@@ -968,12 +733,12 @@ async def parallel_stream_text_to_speech(
968
  if not resolved_helper:
969
  raise HTTPException(
970
  400,
971
- "No helper configured. Set CB_HELPER_BASE_URL or pass helper_url.",
972
  )
973
 
974
  stream_id = uuid.uuid4().hex[:12]
975
  return StreamingResponse(
976
- _parallel_two_way_stream_generator(
977
  wrapper=wrapper,
978
  text=text,
979
  local_voice=local_voice,
@@ -988,7 +753,7 @@ async def parallel_stream_text_to_speech(
988
  "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
989
  "Transfer-Encoding": "chunked",
990
  "X-Stream-Id": stream_id,
991
- "X-Streaming-Type": "parallel-2way",
992
  "Cache-Control": "no-cache",
993
  },
994
  )
@@ -1051,13 +816,8 @@ async def internal_voice_register(http_request: Request):
1051
  stream_id = (http_request.query_params.get("stream_id") or "").strip()
1052
  if stream_id:
1053
  with _internal_cancel_lock:
1054
- _purge_internal_stream_state_locked()
1055
- keys, _ = _internal_stream_voice_keys.get(stream_id, (set(), 0.0))
1056
  keys.add(voice_key)
1057
- _internal_stream_voice_keys[stream_id] = (
1058
- keys,
1059
- time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
1060
- )
1061
 
1062
  return {"status": "registered", "voice_key": voice_key}
1063
 
@@ -1074,10 +834,8 @@ async def internal_chunk_synthesize(
1074
  raise HTTPException(403, "Forbidden")
1075
 
1076
  with _internal_cancel_lock:
1077
- _purge_internal_stream_state_locked()
1078
  if request.stream_id in _internal_cancelled_streams:
1079
  raise HTTPException(409, "Stream already cancelled")
1080
- _touch_internal_stream_voice_keys_locked(request.stream_id)
1081
 
1082
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
1083
  if not wrapper:
@@ -1129,28 +887,11 @@ async def internal_chunk_cancel(stream_id: str, http_request: Request):
1129
  raise HTTPException(403, "Forbidden")
1130
 
1131
  with _internal_cancel_lock:
1132
- _purge_internal_stream_state_locked()
1133
- _internal_cancelled_streams[stream_id] = (
1134
- time.time() + max(1, Config.INTERNAL_CANCEL_TTL_SEC)
1135
- )
1136
  _internal_stream_voice_keys.pop(stream_id, None)
1137
  return {"status": "cancelled", "stream_id": stream_id}
1138
 
1139
 
1140
- @app.post("/internal/chunk/complete/{stream_id}")
1141
- async def internal_chunk_complete(stream_id: str, http_request: Request):
1142
- """Best-effort immediate cleanup after stream completes normally."""
1143
- if Config.INTERNAL_SHARED_SECRET:
1144
- provided = http_request.headers.get("X-Internal-Secret", "")
1145
- if provided != Config.INTERNAL_SHARED_SECRET:
1146
- raise HTTPException(403, "Forbidden")
1147
-
1148
- with _internal_cancel_lock:
1149
- _purge_internal_stream_state_locked()
1150
- _clear_internal_stream_state_locked(stream_id)
1151
- return {"status": "completed", "stream_id": stream_id}
1152
-
1153
-
1154
  @app.post("/v1/audio/speech")
1155
  async def openai_compatible_tts(request: TTSJsonRequest):
1156
  """OpenAI-compatible streaming endpoint (JSON body, no file upload).
@@ -1195,10 +936,6 @@ async def stop_stream(stream_id: str):
1195
  event = _active_streams.get(stream_id)
1196
  if event:
1197
  event.set()
1198
- with _stream_routes_lock:
1199
- helper_routes = set(_stream_helper_routes.pop(stream_id, set()))
1200
- for helper_url in helper_routes:
1201
- _helper_cancel_stream(helper_url, stream_id)
1202
  logger.info(f"Stream {stream_id} cancelled by client")
1203
  return {"status": "stopped", "stream_id": stream_id}
1204
  return {"status": "not_found", "stream_id": stream_id}
@@ -1207,16 +944,9 @@ async def stop_stream(stream_id: str):
1207
  @app.post("/tts/stop")
1208
  async def stop_all_streams():
1209
  """Emergency stop: cancel ALL active TTS streams."""
1210
- active_items = list(_active_streams.items())
1211
- count = len(active_items)
1212
- with _stream_routes_lock:
1213
- stream_routes = {sid: set(urls) for sid, urls in _stream_helper_routes.items()}
1214
- _stream_helper_routes.clear()
1215
-
1216
- for sid, event in active_items:
1217
  event.set()
1218
- for helper_url in stream_routes.get(sid, set()):
1219
- _helper_cancel_stream(helper_url, sid)
1220
  _active_streams.clear()
1221
  logger.info(f"Stopped all streams ({count} active)")
1222
  return {"status": "stopped_all", "count": count}
 
1
+ """
2
+ Chatterbox Turbo TTS -- FastAPI Server
3
+ ======================================
4
+ Production-ready API with true real-time MP3 streaming,
5
+ in-memory voice cloning, and fully non-blocking inference.
6
+
7
+ Endpoints:
8
+ GET /health -> health check + optional warmup
9
+ GET /info -> model info, supported tags, parameters
10
+ POST /tts -> full audio response (WAV/MP3/FLAC)
11
+ POST /tts/stream -> chunked MP3 streaming (MediaSource-ready)
12
+ POST /tts/true-stream -> alias for /tts/stream (Kokoro compat)
13
+ POST /tts/stop/{stream_id}-> cancel a specific active stream
14
+ POST /tts/stop -> cancel ALL active streams
15
+ POST /v1/audio/speech -> OpenAI-compatible streaming
16
+ """
17
  import asyncio
 
18
  import io
19
  import json
20
  import logging
21
  import queue as stdlib_queue
22
  import threading
23
  import time
24
+ import urllib.error
25
  import urllib.parse
26
+ import urllib.request
27
  import uuid
28
  from concurrent.futures import ThreadPoolExecutor
29
+ from typing import Generator, Optional
 
30
 
31
  import numpy as np
32
  import soundfile as sf
 
111
 
112
  async def _resolve_voice(
113
  voice_ref: Optional[UploadFile],
114
+ voice_name: str,
115
  wrapper: ChatterboxWrapper,
116
  ) -> VoiceProfile:
117
+ """Return a VoiceProfile from uploaded audio, built-in voice name, or default."""
118
+ # 1) If a file was uploaded, encode it (highest priority)
119
+ if voice_ref is not None and voice_ref.filename:
120
+ audio_bytes = await voice_ref.read()
121
+ if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
122
+ raise HTTPException(status_code=413, detail="Voice file too large (max 10 MB)")
123
+ if len(audio_bytes) == 0:
124
+ raise HTTPException(status_code=400, detail="Empty voice file")
125
+
126
+ loop = asyncio.get_running_loop()
127
  try:
128
+ return await loop.run_in_executor(
129
+ tts_executor, wrapper.encode_voice_from_bytes, audio_bytes
130
+ )
131
  except ValueError as e:
132
  raise HTTPException(status_code=400, detail=str(e))
133
+ except Exception as e:
134
+ logger.error(f"Voice encoding failed: {e}")
135
+ raise HTTPException(
136
+ status_code=400,
137
+ detail=f"Could not process voice file: {str(e)}. "
138
+ f"Supported formats: WAV, MP3, MPEG, M4A, OGG, FLAC, WebM."
139
+ )
140
 
141
+ # 2) Resolve by built-in voice name (returns cached profile β€” no encoding)
 
 
 
 
 
 
142
  try:
143
+ return wrapper.get_builtin_voice(voice_name)
144
+ except (ValueError, KeyError) as e:
 
 
145
  raise HTTPException(status_code=400, detail=str(e))
 
 
 
 
 
 
 
146
 
147
 
148
  # ═══════════════════════════════════════════════════════════════════
 
170
  return data
171
 
172
 
173
+ def _build_helper_endpoint(base_url: str, path: str) -> str:
174
+ return f"{base_url.rstrip('/')}{path}"
 
 
 
 
175
 
176
 
177
+ def _internal_headers() -> dict[str, str]:
178
+ headers = {"Content-Type": "application/json", "Accept": "audio/mpeg"}
 
 
 
 
 
 
179
  if Config.INTERNAL_SHARED_SECRET:
180
  headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
181
  return headers
182
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  def _helper_request_chunk(
185
  helper_base_url: str,
186
  payload: dict,
187
  timeout_sec: float,
 
188
  ) -> bytes:
189
+ url = _build_helper_endpoint(helper_base_url, "/internal/chunk/synthesize")
190
+ body = json.dumps(payload).encode("utf-8")
191
+ req = urllib.request.Request(
192
+ url=url,
193
+ data=body,
194
+ headers=_internal_headers(),
195
+ method="POST",
196
+ )
197
+ with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
198
+ return resp.read()
199
 
200
 
201
  def _helper_register_voice(
 
203
  stream_id: str,
204
  audio_bytes: bytes,
205
  timeout_sec: float,
 
206
  ) -> str:
207
  """Register reference voice on helper once, return voice_key for chunk calls."""
208
+ query = urllib.parse.urlencode({"stream_id": stream_id})
209
+ url = _build_helper_endpoint(helper_base_url, f"/internal/voice/register?{query}")
210
+ headers = {"Content-Type": "application/octet-stream", "Accept": "application/json"}
211
+ if Config.INTERNAL_SHARED_SECRET:
212
+ headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
213
+
214
+ req = urllib.request.Request(
215
+ url=url,
216
+ data=audio_bytes,
217
+ headers=headers,
218
+ method="POST",
219
+ )
220
+ with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
221
+ data = json.loads(resp.read().decode("utf-8"))
222
+ voice_key = (data.get("voice_key") or "").strip()
223
+ if not voice_key:
224
+ raise RuntimeError("helper voice registration returned no voice_key")
225
+ return voice_key
226
 
227
 
228
  def _helper_cancel_stream(helper_base_url: str, stream_id: str):
229
  """Best-effort cancellation signal to helper."""
230
  try:
231
+ url = _build_helper_endpoint(helper_base_url, f"/internal/chunk/cancel/{stream_id}")
232
+ req = urllib.request.Request(
233
+ url=url,
234
+ data=b"",
235
+ headers=_internal_headers(),
236
+ method="POST",
237
+ )
238
+ with urllib.request.urlopen(req, timeout=3.0):
239
+ pass
240
  except Exception:
241
  pass
242
 
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # ═══════════════════════════════════════════════════════════════════
245
  # Endpoints
246
  # ═══════════════════════════════════════════════════════════════════
 
248
  @app.get("/health")
249
  async def health(warm_up: bool = False):
250
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
 
 
 
 
 
251
  status = {
252
  "status": "healthy" if wrapper else "loading",
253
  "model_loaded": wrapper is not None,
254
  "model_dtype": Config.MODEL_DTYPE,
255
  "streaming_supported": True,
256
  "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
 
 
257
  }
258
  if warm_up and wrapper:
259
  try:
 
265
  return status
266
 
267
 
268
+ @app.get("/info")
269
+ async def info():
270
+ return {
271
+ "model": Config.MODEL_ID,
272
+ "dtype": Config.MODEL_DTYPE,
273
+ "sample_rate": Config.SAMPLE_RATE,
274
+ "paralinguistic_tags": list(Config.PARALINGUISTIC_TAGS),
275
+ "tag_usage": "Insert tags directly in text, e.g. 'That is so funny! [laugh] Anyway…'",
276
+ "parameters": {
277
+ "max_new_tokens": {"default": Config.MAX_NEW_TOKENS, "range": "64–2048"},
278
+ "repetition_penalty": {"default": Config.REPETITION_PENALTY, "range": "1.0–2.0"},
279
+ },
280
+ "voice_cloning": {
281
+ "description": "Upload 3–30s reference WAV/MP3 as 'voice_ref' field",
282
+ "max_upload_mb": Config.MAX_VOICE_UPLOAD_BYTES // (1024 * 1024),
283
+ },
284
+ "parallel_mode": {
285
+ "enabled": Config.ENABLE_PARALLEL_MODE,
286
+ "helper_configured": bool(Config.HELPER_BASE_URL),
287
+ "helper_base_url": Config.HELPER_BASE_URL or None,
288
+ "supports_voice_ref": True,
289
+ },
290
+ }
291
+
292
 
293
  @app.get("/voices")
294
  async def list_voices():
295
+ """Return all built-in voices available for selection."""
296
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
297
  if not wrapper:
298
  raise HTTPException(503, "Model not loaded")
 
 
299
  return {
300
+ "default": wrapper.default_voice_name,
301
+ "voices": wrapper.list_builtin_voices(),
 
 
 
 
 
 
302
  }
303
 
304
 
 
348
  # ═══════════════════════════════════════════════════════════════════
349
 
350
  _active_streams: dict[str, threading.Event] = {}
351
+ _internal_cancelled_streams: set[str] = set()
 
352
  _internal_cancel_lock = threading.Lock()
353
+ _internal_stream_voice_keys: dict[str, set[str]] = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
 
356
  # ═══════════════════════════════════════════════════════════════════
 
450
  _active_streams.pop(stream_id, None)
451
 
452
 
453
+ def _parallel_odd_even_stream_generator(
454
  wrapper: ChatterboxWrapper,
455
  text: str,
456
  local_voice: VoiceProfile,
 
460
  stream_id: str,
461
  helper_base_url: str,
462
  ) -> Generator[bytes, None, None]:
463
+ """Additive odd/even split streamer (primary handles odd, helper handles even)."""
 
 
 
 
 
464
  cancel_event = threading.Event()
465
  _active_streams[stream_id] = cancel_event
466
 
 
 
 
 
 
 
467
  clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
468
  chunks = text_processor.split_for_streaming(clean_text)
469
  total_chunks = len(chunks)
470
  if total_chunks == 0:
 
 
471
  _active_streams.pop(stream_id, None)
472
  return
473
 
474
  lock = threading.Lock()
475
  cond = threading.Condition(lock)
476
+ ready: dict[int, bytes] = {}
477
  first_error: Optional[Exception] = None
478
  workers_done = 0
 
 
479
 
480
+ def _publish(idx: int, data: bytes):
481
  with cond:
482
+ ready[idx] = data
 
 
483
  cond.notify_all()
484
 
485
  def _set_error(err: Exception):
 
504
  )
505
  return _encode_mp3_chunk(audio)
506
 
507
+ def _odd_worker():
508
  try:
509
  for idx in range(0, total_chunks, 2):
510
  if cancel_event.is_set():
511
  break
512
  data = _synth_local(chunks[idx])
513
+ _publish(idx, data)
 
 
 
 
 
 
 
514
  except Exception as e:
515
  _set_error(e)
516
  finally:
517
  _worker_done()
518
 
519
+ def _even_worker():
520
+ helper_available = True
521
  helper_voice_key: Optional[str] = None
 
 
 
522
  try:
523
+ if helper_voice_bytes:
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  attempts = 2 if Config.HELPER_RETRY_ONCE else 1
525
  last_err: Optional[Exception] = None
526
  for _ in range(attempts):
 
529
  helper_base_url=helper_base_url,
530
  stream_id=stream_id,
531
  audio_bytes=helper_voice_bytes,
532
+ timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
 
533
  )
534
  last_err = None
535
  break
536
  except Exception as reg_err:
537
  last_err = reg_err
538
  continue
 
539
  if last_err is not None:
540
  helper_available = False
541
  logger.warning(
542
+ f"[{stream_id}] Helper voice registration failed; "
543
+ "falling back to local synthesis for even chunks"
544
  )
 
 
 
 
545
 
546
  for idx in range(1, total_chunks, 2):
547
  if cancel_event.is_set():
 
566
  helper_data = _helper_request_chunk(
567
  helper_base_url=helper_base_url,
568
  payload=payload,
569
+ timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
 
 
 
 
 
 
 
 
 
570
  )
571
+ _publish(idx, helper_data)
572
  last_err = None
573
  break
574
  except Exception as helper_err:
 
580
 
581
  helper_available = False
582
  logger.warning(
583
+ f"[{stream_id}] Helper failed at chunk {idx}; "
584
+ "falling back to local synthesis for remaining even chunks"
585
  )
586
 
587
+ # Local fallback for even chunks
588
  data = _synth_local(chunks[idx])
589
+ _publish(idx, data)
 
 
 
 
 
 
 
590
  except Exception as e:
591
  _set_error(e)
592
  finally:
 
 
593
  _worker_done()
594
 
595
+ odd_thread = threading.Thread(target=_odd_worker, daemon=True)
596
+ even_thread = threading.Thread(target=_even_worker, daemon=True)
597
+ odd_thread.start()
598
+ even_thread.start()
599
 
600
  next_idx = 0
601
  try:
 
605
  next_idx not in ready
606
  and first_error is None
607
  and not cancel_event.is_set()
608
+ and workers_done < 2
609
  ):
610
  cond.wait(timeout=0.1)
611
 
 
613
  break
614
 
615
  if next_idx in ready:
616
+ data = ready.pop(next_idx)
 
617
  elif first_error is not None:
618
  logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
619
  break
620
+ elif workers_done >= 2:
621
  logger.error(
622
  f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
623
  )
 
625
  else:
626
  continue
627
 
628
+ yield data
 
 
 
 
 
 
 
 
629
  next_idx += 1
 
 
 
 
 
630
  finally:
631
  cancel_event.set()
632
+ _helper_cancel_stream(helper_base_url, stream_id)
633
+ odd_thread.join(timeout=1.0)
634
+ even_thread.join(timeout=1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  _active_streams.pop(stream_id, None)
636
 
637
 
 
685
  repetition_penalty: float = Form(Config.REPETITION_PENALTY),
686
  helper_url: Optional[str] = Form(None),
687
  ):
688
+ """Additive odd/even split stream mode (primary + helper)."""
689
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
690
  if not wrapper:
691
  raise HTTPException(503, "Model not loaded")
 
711
  logger.error(f"Parallel voice encoding failed: {e}")
712
  raise HTTPException(400, "Could not process voice file for parallel mode")
713
  else:
714
+ # Built-in voice selected by name β€” resolve locally and prepare
715
+ # bytes for helper registration so helpers cache the same hash.
716
  try:
717
  selected_voice_id = wrapper.resolve_voice_id(voice_name)
718
  local_voice = wrapper.get_builtin_voice(selected_voice_id)
719
  except ValueError as e:
720
  raise HTTPException(status_code=400, detail=str(e))
721
 
722
+ # Only send bytes to helper if a non-default voice was selected,
723
+ # because the helper's own default is already loaded.
724
  if selected_voice_id != wrapper.default_voice_name:
725
  helper_voice_bytes = wrapper.get_builtin_voice_bytes(selected_voice_id)
726
  if not helper_voice_bytes:
 
733
  if not resolved_helper:
734
  raise HTTPException(
735
  400,
736
+ "Helper URL not configured. Set CB_HELPER_BASE_URL or pass helper_url.",
737
  )
738
 
739
  stream_id = uuid.uuid4().hex[:12]
740
  return StreamingResponse(
741
+ _parallel_odd_even_stream_generator(
742
  wrapper=wrapper,
743
  text=text,
744
  local_voice=local_voice,
 
753
  "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
754
  "Transfer-Encoding": "chunked",
755
  "X-Stream-Id": stream_id,
756
+ "X-Streaming-Type": "parallel-odd-even",
757
  "Cache-Control": "no-cache",
758
  },
759
  )
 
816
  stream_id = (http_request.query_params.get("stream_id") or "").strip()
817
  if stream_id:
818
  with _internal_cancel_lock:
819
+ keys = _internal_stream_voice_keys.setdefault(stream_id, set())
 
820
  keys.add(voice_key)
 
 
 
 
821
 
822
  return {"status": "registered", "voice_key": voice_key}
823
 
 
834
  raise HTTPException(403, "Forbidden")
835
 
836
  with _internal_cancel_lock:
 
837
  if request.stream_id in _internal_cancelled_streams:
838
  raise HTTPException(409, "Stream already cancelled")
 
839
 
840
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
841
  if not wrapper:
 
887
  raise HTTPException(403, "Forbidden")
888
 
889
  with _internal_cancel_lock:
890
+ _internal_cancelled_streams.add(stream_id)
 
 
 
891
  _internal_stream_voice_keys.pop(stream_id, None)
892
  return {"status": "cancelled", "stream_id": stream_id}
893
 
894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  @app.post("/v1/audio/speech")
896
  async def openai_compatible_tts(request: TTSJsonRequest):
897
  """OpenAI-compatible streaming endpoint (JSON body, no file upload).
 
936
  event = _active_streams.get(stream_id)
937
  if event:
938
  event.set()
 
 
 
 
939
  logger.info(f"Stream {stream_id} cancelled by client")
940
  return {"status": "stopped", "stream_id": stream_id}
941
  return {"status": "not_found", "stream_id": stream_id}
 
944
  @app.post("/tts/stop")
945
  async def stop_all_streams():
946
  """Emergency stop: cancel ALL active TTS streams."""
947
+ count = len(_active_streams)
948
+ for sid, event in list(_active_streams.items()):
 
 
 
 
 
949
  event.set()
 
 
950
  _active_streams.clear()
951
  logger.info(f"Stopped all streams ({count} active)")
952
  return {"status": "stopped_all", "count": count}
chatterbox_wrapper.py CHANGED
@@ -50,6 +50,7 @@ _SUPPORTED_AUDIO_EXTENSIONS = {
50
 
51
 
52
  def _slugify(text: str) -> str:
 
53
  buf = []
54
  prev_underscore = False
55
  for ch in text.strip().lower():
@@ -64,6 +65,7 @@ def _slugify(text: str) -> str:
64
  return slug or "voice"
65
 
66
 
 
67
  # ═══════════════════════════════════════════════════════════════════
68
  # Data Structures
69
  # ═══════════════════════════════════════════════════════════════════
 
50
 
51
 
52
  def _slugify(text: str) -> str:
53
+ """Convert a display name to a safe, lowercase identifier."""
54
  buf = []
55
  prev_underscore = False
56
  for ch in text.strip().lower():
 
65
  return slug or "voice"
66
 
67
 
68
+
69
  # ═══════════════════════════════════════════════════════════════════
70
  # Data Structures
71
  # ═══════════════════════════════════════════════════════════════════
config.py CHANGED
@@ -77,14 +77,11 @@ class Config:
77
  # Smaller chunks = faster TTFB (first audio arrives sooner)
78
  # ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
79
  MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
80
- # Additive parallel mode (2-way split: primary + helper).
81
  ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
82
  HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
83
  HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
84
  HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
85
- # Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
86
- INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
87
- INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
88
  # Optional shared secret for internal chunk endpoints.
89
  INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
90
 
@@ -93,14 +90,13 @@ class Config:
93
  PORT: int = int(os.getenv("CB_PORT", "7860"))
94
 
95
  ALLOWED_ORIGINS: list = [
96
- "https://toolboxesai.com",
97
  "https://www.toolboxesai.com",
98
  "www.toolboxesai.com",
99
- "toolboxesai.com",
100
  "http://localhost:8788", "http://127.0.0.1:8788",
101
  "http://localhost:5502", "http://127.0.0.1:5502",
102
  "http://localhost:5501", "http://127.0.0.1:5501",
103
  "http://localhost:5500", "http://127.0.0.1:5500",
104
  "http://localhost:5173", "http://127.0.0.1:5173",
105
  "http://localhost:7860", "http://127.0.0.1:7860",
106
- ]
 
77
  # Smaller chunks = faster TTFB (first audio arrives sooner)
78
  # ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
79
  MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
80
+ # Additive parallel mode (odd/even split across primary/helper).
81
  ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
82
  HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
83
  HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
84
  HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
 
 
 
85
  # Optional shared secret for internal chunk endpoints.
86
  INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
87
 
 
90
  PORT: int = int(os.getenv("CB_PORT", "7860"))
91
 
92
  ALLOWED_ORIGINS: list = [
 
93
  "https://www.toolboxesai.com",
94
  "www.toolboxesai.com",
95
+ "https://toolboxesai.com",
96
  "http://localhost:8788", "http://127.0.0.1:8788",
97
  "http://localhost:5502", "http://127.0.0.1:5502",
98
  "http://localhost:5501", "http://127.0.0.1:5501",
99
  "http://localhost:5500", "http://127.0.0.1:5500",
100
  "http://localhost:5173", "http://127.0.0.1:5173",
101
  "http://localhost:7860", "http://127.0.0.1:7860",
102
+ ]
her_prompt.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eaabbeafe26ad6f78b56dcc32608763eeb69485db074c7136c6818f04a93ced
3
+ size 725328
ivr_female_prompt.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64953bf94657c4334532319fd4f20e9859c31af4445940916b04f129ef1f89e6
3
+ size 2779278