ShadowHunter222 commited on
Commit
9985a87
Β·
verified Β·
1 Parent(s): 7998e8d

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +46 -106
  2. chatterbox_wrapper.py +6 -198
  3. config.py +2 -22
app.py CHANGED
@@ -1,3 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  import io
3
  import json
@@ -219,26 +235,6 @@ def _helper_cancel_stream(helper_base_url: str, stream_id: str):
219
  pass
220
 
221
 
222
- def _helper_complete_stream(helper_base_url: str, stream_id: str):
223
- """Best-effort stream completion cleanup on helper.
224
-
225
- Falls back to cancel for backwards compatibility if helper does not expose
226
- the completion endpoint yet.
227
- """
228
- try:
229
- url = _build_helper_endpoint(helper_base_url, f"/internal/chunk/complete/{stream_id}")
230
- req = urllib.request.Request(
231
- url=url,
232
- data=b"",
233
- headers=_internal_headers(),
234
- method="POST",
235
- )
236
- with urllib.request.urlopen(req, timeout=3.0):
237
- pass
238
- except Exception:
239
- _helper_cancel_stream(helper_base_url, stream_id)
240
-
241
-
242
  # ═══════════════════════════════════════════════════════════════════
243
  # Endpoints
244
  # ═══════════════════════════════════════════════════════════════════
@@ -246,19 +242,12 @@ def _helper_complete_stream(helper_base_url: str, stream_id: str):
246
  @app.get("/health")
247
  async def health(warm_up: bool = False):
248
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
249
- with _internal_cancel_lock:
250
- _purge_internal_stream_state_locked()
251
- cancelled_count = len(_internal_cancelled_streams)
252
- voice_state_count = len(_internal_stream_voice_keys)
253
-
254
  status = {
255
  "status": "healthy" if wrapper else "loading",
256
  "model_loaded": wrapper is not None,
257
  "model_dtype": Config.MODEL_DTYPE,
258
  "streaming_supported": True,
259
  "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
260
- "internal_cancelled_streams": cancelled_count,
261
- "internal_stream_voice_states": voice_state_count,
262
  }
263
  if warm_up and wrapper:
264
  try:
@@ -270,6 +259,31 @@ async def health(warm_up: bool = False):
270
  return status
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  # ── POST /tts ─────────────────────────────────────────────────────
274
 
275
  @app.post("/tts", response_class=Response)
@@ -315,47 +329,9 @@ async def text_to_speech(
315
  # ═══════════════════════════════════════════════════════════════════
316
 
317
  _active_streams: dict[str, threading.Event] = {}
318
- # stream_id -> expires_at epoch seconds
319
- _internal_cancelled_streams: dict[str, float] = {}
320
  _internal_cancel_lock = threading.Lock()
321
- # stream_id -> (voice_keys, expires_at)
322
- _internal_stream_voice_keys: dict[str, tuple[set[str], float]] = {}
323
-
324
-
325
- def _purge_internal_stream_state_locked(now: Optional[float] = None):
326
- now_ts = now if now is not None else time.time()
327
-
328
- expired_cancel_ids = [
329
- sid for sid, expires_at in _internal_cancelled_streams.items()
330
- if expires_at <= now_ts
331
- ]
332
- for sid in expired_cancel_ids:
333
- _internal_cancelled_streams.pop(sid, None)
334
-
335
- expired_voice_state_ids = [
336
- sid for sid, (_, expires_at) in _internal_stream_voice_keys.items()
337
- if expires_at <= now_ts
338
- ]
339
- for sid in expired_voice_state_ids:
340
- _internal_stream_voice_keys.pop(sid, None)
341
-
342
-
343
- def _touch_internal_stream_voice_keys_locked(stream_id: str):
344
- if not stream_id:
345
- return
346
- entry = _internal_stream_voice_keys.get(stream_id)
347
- if entry is None:
348
- return
349
- keys, _ = entry
350
- _internal_stream_voice_keys[stream_id] = (
351
- keys,
352
- time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
353
- )
354
-
355
-
356
- def _clear_internal_stream_state_locked(stream_id: str):
357
- _internal_cancelled_streams.pop(stream_id, None)
358
- _internal_stream_voice_keys.pop(stream_id, None)
359
 
360
 
361
  # ═══════════════════════════════════════════════════════════════════
@@ -481,7 +457,6 @@ def _parallel_odd_even_stream_generator(
481
  ready: dict[int, bytes] = {}
482
  first_error: Optional[Exception] = None
483
  workers_done = 0
484
- stream_completed = False
485
 
486
  def _publish(idx: int, data: bytes):
487
  with cond:
@@ -633,17 +608,9 @@ def _parallel_odd_even_stream_generator(
633
 
634
  yield data
635
  next_idx += 1
636
- stream_completed = (
637
- next_idx >= total_chunks
638
- and first_error is None
639
- and not cancel_event.is_set()
640
- )
641
  finally:
642
  cancel_event.set()
643
- if stream_completed:
644
- _helper_complete_stream(helper_base_url, stream_id)
645
- else:
646
- _helper_cancel_stream(helper_base_url, stream_id)
647
  odd_thread.join(timeout=1.0)
648
  even_thread.join(timeout=1.0)
649
  _active_streams.pop(stream_id, None)
@@ -810,13 +777,8 @@ async def internal_voice_register(http_request: Request):
810
  stream_id = (http_request.query_params.get("stream_id") or "").strip()
811
  if stream_id:
812
  with _internal_cancel_lock:
813
- _purge_internal_stream_state_locked()
814
- keys, _ = _internal_stream_voice_keys.get(stream_id, (set(), 0.0))
815
  keys.add(voice_key)
816
- _internal_stream_voice_keys[stream_id] = (
817
- keys,
818
- time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
819
- )
820
 
821
  return {"status": "registered", "voice_key": voice_key}
822
 
@@ -833,10 +795,8 @@ async def internal_chunk_synthesize(
833
  raise HTTPException(403, "Forbidden")
834
 
835
  with _internal_cancel_lock:
836
- _purge_internal_stream_state_locked()
837
  if request.stream_id in _internal_cancelled_streams:
838
  raise HTTPException(409, "Stream already cancelled")
839
- _touch_internal_stream_voice_keys_locked(request.stream_id)
840
 
841
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
842
  if not wrapper:
@@ -845,9 +805,6 @@ async def internal_chunk_synthesize(
845
  voice_profile = wrapper.default_voice
846
  if request.voice_key:
847
  cached_voice = wrapper._voice_cache.get(request.voice_key)
848
- if cached_voice is None:
849
- # Built-in voices are permanent in wrapper registry even if TTL cache entry expired.
850
- cached_voice = wrapper.get_builtin_voice_by_hash(request.voice_key)
851
  if cached_voice is None:
852
  raise HTTPException(409, "Voice key expired or not found")
853
  voice_profile = cached_voice
@@ -888,28 +845,11 @@ async def internal_chunk_cancel(stream_id: str, http_request: Request):
888
  raise HTTPException(403, "Forbidden")
889
 
890
  with _internal_cancel_lock:
891
- _purge_internal_stream_state_locked()
892
- _internal_cancelled_streams[stream_id] = (
893
- time.time() + max(1, Config.INTERNAL_CANCEL_TTL_SEC)
894
- )
895
  _internal_stream_voice_keys.pop(stream_id, None)
896
  return {"status": "cancelled", "stream_id": stream_id}
897
 
898
 
899
- @app.post("/internal/chunk/complete/{stream_id}")
900
- async def internal_chunk_complete(stream_id: str, http_request: Request):
901
- """Best-effort immediate cleanup after stream completes normally."""
902
- if Config.INTERNAL_SHARED_SECRET:
903
- provided = http_request.headers.get("X-Internal-Secret", "")
904
- if provided != Config.INTERNAL_SHARED_SECRET:
905
- raise HTTPException(403, "Forbidden")
906
-
907
- with _internal_cancel_lock:
908
- _purge_internal_stream_state_locked()
909
- _clear_internal_stream_state_locked(stream_id)
910
- return {"status": "completed", "stream_id": stream_id}
911
-
912
-
913
  @app.post("/v1/audio/speech")
914
  async def openai_compatible_tts(request: TTSJsonRequest):
915
  """OpenAI-compatible streaming endpoint (JSON body, no file upload).
 
1
+ """
2
+ Chatterbox Turbo TTS -- FastAPI Server
3
+ ======================================
4
+ Production-ready API with true real-time MP3 streaming,
5
+ in-memory voice cloning, and fully non-blocking inference.
6
+
7
+ Endpoints:
8
+ GET /health -> health check + optional warmup
9
+ GET /info -> model info, supported tags, parameters
10
+ POST /tts -> full audio response (WAV/MP3/FLAC)
11
+ POST /tts/stream -> chunked MP3 streaming (MediaSource-ready)
12
+ POST /tts/true-stream -> alias for /tts/stream (Kokoro compat)
13
+ POST /tts/stop/{stream_id}-> cancel a specific active stream
14
+ POST /tts/stop -> cancel ALL active streams
15
+ POST /v1/audio/speech -> OpenAI-compatible streaming
16
+ """
17
  import asyncio
18
  import io
19
  import json
 
235
  pass
236
 
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # ═══════════════════════════════════════════════════════════════════
239
  # Endpoints
240
  # ═══════════════════════════════════════════════════════════════════
 
242
  @app.get("/health")
243
  async def health(warm_up: bool = False):
244
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
 
 
 
 
 
245
  status = {
246
  "status": "healthy" if wrapper else "loading",
247
  "model_loaded": wrapper is not None,
248
  "model_dtype": Config.MODEL_DTYPE,
249
  "streaming_supported": True,
250
  "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
 
 
251
  }
252
  if warm_up and wrapper:
253
  try:
 
259
  return status
260
 
261
 
262
+ @app.get("/info")
263
+ async def info():
264
+ return {
265
+ "model": Config.MODEL_ID,
266
+ "dtype": Config.MODEL_DTYPE,
267
+ "sample_rate": Config.SAMPLE_RATE,
268
+ "paralinguistic_tags": list(Config.PARALINGUISTIC_TAGS),
269
+ "tag_usage": "Insert tags directly in text, e.g. 'That is so funny! [laugh] Anyway…'",
270
+ "parameters": {
271
+ "max_new_tokens": {"default": Config.MAX_NEW_TOKENS, "range": "64–2048"},
272
+ "repetition_penalty": {"default": Config.REPETITION_PENALTY, "range": "1.0–2.0"},
273
+ },
274
+ "voice_cloning": {
275
+ "description": "Upload 3–30s reference WAV/MP3 as 'voice_ref' field",
276
+ "max_upload_mb": Config.MAX_VOICE_UPLOAD_BYTES // (1024 * 1024),
277
+ },
278
+ "parallel_mode": {
279
+ "enabled": Config.ENABLE_PARALLEL_MODE,
280
+ "helper_configured": bool(Config.HELPER_BASE_URL),
281
+ "helper_base_url": Config.HELPER_BASE_URL or None,
282
+ "supports_voice_ref": True,
283
+ },
284
+ }
285
+
286
+
287
  # ── POST /tts ─────────────────────────────────────────────────────
288
 
289
  @app.post("/tts", response_class=Response)
 
329
  # ═══════════════════════════════════════════════════════════════════
330
 
331
  _active_streams: dict[str, threading.Event] = {}
332
+ _internal_cancelled_streams: set[str] = set()
 
333
  _internal_cancel_lock = threading.Lock()
334
+ _internal_stream_voice_keys: dict[str, set[str]] = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
 
337
  # ═══════════════════════════════════════════════════════════════════
 
457
  ready: dict[int, bytes] = {}
458
  first_error: Optional[Exception] = None
459
  workers_done = 0
 
460
 
461
  def _publish(idx: int, data: bytes):
462
  with cond:
 
608
 
609
  yield data
610
  next_idx += 1
 
 
 
 
 
611
  finally:
612
  cancel_event.set()
613
+ _helper_cancel_stream(helper_base_url, stream_id)
 
 
 
614
  odd_thread.join(timeout=1.0)
615
  even_thread.join(timeout=1.0)
616
  _active_streams.pop(stream_id, None)
 
777
  stream_id = (http_request.query_params.get("stream_id") or "").strip()
778
  if stream_id:
779
  with _internal_cancel_lock:
780
+ keys = _internal_stream_voice_keys.setdefault(stream_id, set())
 
781
  keys.add(voice_key)
 
 
 
 
782
 
783
  return {"status": "registered", "voice_key": voice_key}
784
 
 
795
  raise HTTPException(403, "Forbidden")
796
 
797
  with _internal_cancel_lock:
 
798
  if request.stream_id in _internal_cancelled_streams:
799
  raise HTTPException(409, "Stream already cancelled")
 
800
 
801
  wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
802
  if not wrapper:
 
805
  voice_profile = wrapper.default_voice
806
  if request.voice_key:
807
  cached_voice = wrapper._voice_cache.get(request.voice_key)
 
 
 
808
  if cached_voice is None:
809
  raise HTTPException(409, "Voice key expired or not found")
810
  voice_profile = cached_voice
 
845
  raise HTTPException(403, "Forbidden")
846
 
847
  with _internal_cancel_lock:
848
+ _internal_cancelled_streams.add(stream_id)
 
 
 
849
  _internal_stream_voice_keys.pop(stream_id, None)
850
  return {"status": "cancelled", "stream_id": stream_id}
851
 
852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
  @app.post("/v1/audio/speech")
854
  async def openai_compatible_tts(request: TTSJsonRequest):
855
  """OpenAI-compatible streaming endpoint (JSON body, no file upload).
chatterbox_wrapper.py CHANGED
@@ -27,7 +27,6 @@ import tempfile
27
  import time
28
  from collections import OrderedDict
29
  from dataclasses import dataclass
30
- from pathlib import Path
31
  from typing import Callable, Generator, Optional
32
 
33
  import librosa
@@ -49,21 +48,6 @@ _SUPPORTED_AUDIO_EXTENSIONS = {
49
  }
50
 
51
 
52
- def _slugify(text: str) -> str:
53
- buf = []
54
- prev_underscore = False
55
- for ch in text.strip().lower():
56
- if ch.isalnum():
57
- buf.append(ch)
58
- prev_underscore = False
59
- else:
60
- if not prev_underscore:
61
- buf.append("_")
62
- prev_underscore = True
63
- slug = "".join(buf).strip("_")
64
- return slug or "voice"
65
-
66
-
67
  # ═══════════════════════════════════════════════════════════════════
68
  # Data Structures
69
  # ═══════════════════════════════════════════════════════════════════
@@ -219,15 +203,8 @@ class ChatterboxWrapper:
219
  ttl_seconds=self.cfg.VOICE_CACHE_TTL_SEC,
220
  )
221
 
222
- self._builtin_voice_profiles: dict[str, VoiceProfile] = {}
223
- self._builtin_voice_bytes: dict[str, bytes] = {}
224
- self._builtin_voice_by_hash: dict[str, VoiceProfile] = {}
225
- self._voice_alias_to_id: dict[str, str] = {}
226
- self._builtin_voice_catalog: list[dict] = []
227
- self._default_voice_id: str = "default"
228
-
229
- logger.info("Loading built-in voices (HF default + local samples) …")
230
- self.default_voice = self._load_builtin_voices()
231
 
232
  logger.info("βœ… ChatterboxWrapper ready")
233
 
@@ -283,185 +260,16 @@ class ChatterboxWrapper:
283
  opts.enable_mem_reuse = True
284
  return opts
285
 
286
- # ─── Built-in voices (HF default + local samples) ────────────
287
 
288
- def _download_hf_default_voice_bytes(self) -> bytes:
289
  path = hf_hub_download(
290
  self.cfg.DEFAULT_VOICE_REPO,
291
  filename=self.cfg.DEFAULT_VOICE_FILE,
292
  cache_dir=self.cfg.MODELS_DIR,
293
  )
294
- return Path(path).read_bytes()
295
-
296
- def _list_local_voice_paths(self) -> list[Path]:
297
- wrapper_dir = Path(__file__).resolve().parent
298
-
299
- # Support both module-level and repo-root deployment layouts.
300
- candidates = []
301
- for d in (wrapper_dir, Path.cwd().resolve(), wrapper_dir.parent):
302
- try:
303
- resolved = d.resolve()
304
- except Exception:
305
- continue
306
- if resolved.is_dir() and resolved not in candidates:
307
- candidates.append(resolved)
308
-
309
- voices: list[Path] = []
310
- seen_real_paths: set[str] = set()
311
- for root in candidates:
312
- try:
313
- entries = sorted(root.iterdir(), key=lambda x: x.name.lower())
314
- except Exception:
315
- continue
316
-
317
- for p in entries:
318
- if not p.is_file():
319
- continue
320
- if p.suffix.lower() not in _SUPPORTED_AUDIO_EXTENSIONS:
321
- continue
322
- real_path = str(p.resolve())
323
- if real_path in seen_real_paths:
324
- continue
325
- seen_real_paths.add(real_path)
326
- voices.append(p)
327
-
328
- return voices
329
-
330
- def _make_unique_voice_id(self, preferred: str) -> str:
331
- base = _slugify(preferred)
332
- candidate = base
333
- idx = 2
334
- while candidate in self._builtin_voice_profiles:
335
- candidate = f"{base}_{idx}"
336
- idx += 1
337
- return candidate
338
-
339
- def _register_builtin_voice(
340
- self,
341
- *,
342
- preferred_id: str,
343
- display_name: str,
344
- source: str,
345
- source_ref: str,
346
- audio_bytes: bytes,
347
- is_default: bool = False,
348
- ) -> str:
349
- if not audio_bytes:
350
- raise ValueError("Voice file is empty")
351
-
352
- voice_id = self._make_unique_voice_id(preferred_id)
353
- audio_hash = hashlib.md5(audio_bytes).hexdigest()
354
-
355
- profile = self._voice_cache.get(audio_hash)
356
- if profile is None:
357
- audio = _load_audio_bytes(audio_bytes, sr=self.cfg.SAMPLE_RATE)
358
- profile = self._encode_audio_array(audio, audio_hash=audio_hash)
359
- self._voice_cache.put(audio_hash, profile)
360
- else:
361
- # Keep hash attached to cached profile for metadata/voice-key usage.
362
- profile.audio_hash = audio_hash
363
-
364
- self._builtin_voice_profiles[voice_id] = profile
365
- self._builtin_voice_bytes[voice_id] = audio_bytes
366
- if audio_hash:
367
- self._builtin_voice_by_hash[audio_hash] = profile
368
-
369
- aliases: list[str] = []
370
- for alias in (voice_id, _slugify(Path(display_name).stem)):
371
- if alias not in self._voice_alias_to_id:
372
- self._voice_alias_to_id[alias] = voice_id
373
- aliases.append(alias)
374
-
375
- if is_default:
376
- self._default_voice_id = voice_id
377
- self._voice_alias_to_id["default"] = voice_id
378
- if "default" not in aliases:
379
- aliases.append("default")
380
-
381
- self._builtin_voice_catalog.append(
382
- {
383
- "id": voice_id,
384
- "display_name": display_name,
385
- "source": source,
386
- "source_ref": source_ref,
387
- "aliases": aliases,
388
- "voice_key": audio_hash,
389
- }
390
- )
391
- return voice_id
392
-
393
- def _load_builtin_voices(self) -> VoiceProfile:
394
- # 1) HF default voice (kept as true default fallback)
395
- hf_bytes = self._download_hf_default_voice_bytes()
396
- self._register_builtin_voice(
397
- preferred_id="default_hf_voice",
398
- display_name=self.cfg.DEFAULT_VOICE_FILE,
399
- source="huggingface",
400
- source_ref=f"{self.cfg.DEFAULT_VOICE_REPO}:{self.cfg.DEFAULT_VOICE_FILE}",
401
- audio_bytes=hf_bytes,
402
- is_default=True,
403
- )
404
-
405
- # 2) Local voice samples placed next to app files
406
- for path in self._list_local_voice_paths():
407
- # Avoid duplicate entry if someone also copied default_voice.wav locally.
408
- if path.name == self.cfg.DEFAULT_VOICE_FILE:
409
- continue
410
- try:
411
- self._register_builtin_voice(
412
- preferred_id=path.stem,
413
- display_name=path.name,
414
- source="local",
415
- source_ref=str(path.name),
416
- audio_bytes=path.read_bytes(),
417
- is_default=False,
418
- )
419
- except Exception as e:
420
- logger.warning(f"Skipping local voice {path.name}: {e}")
421
-
422
- default_profile = self._builtin_voice_profiles.get(self._default_voice_id)
423
- if default_profile is None:
424
- raise RuntimeError("Default built-in voice could not be initialized")
425
-
426
- logger.info(
427
- f"Built-in voices loaded: {len(self._builtin_voice_catalog)} "
428
- f"(default={self._default_voice_id})"
429
- )
430
- return default_profile
431
-
432
- def list_builtin_voices(self) -> list[dict]:
433
- """Return metadata for startup-preloaded voices."""
434
- return [dict(v) for v in self._builtin_voice_catalog]
435
-
436
- @property
437
- def default_voice_name(self) -> str:
438
- return self._default_voice_id
439
-
440
- def resolve_voice_id(self, voice_name: Optional[str]) -> str:
441
- if voice_name is None:
442
- return self._default_voice_id
443
- key = _slugify(str(voice_name))
444
- if not key:
445
- return self._default_voice_id
446
- voice_id = self._voice_alias_to_id.get(key)
447
- if voice_id is None:
448
- available = ", ".join(sorted(self._voice_alias_to_id.keys()))
449
- raise ValueError(f"Unknown voice '{voice_name}'. Available: {available}")
450
- return voice_id
451
-
452
- def get_builtin_voice(self, voice_name: Optional[str]) -> VoiceProfile:
453
- voice_id = self.resolve_voice_id(voice_name)
454
- profile = self._builtin_voice_profiles[voice_id]
455
- if profile.audio_hash:
456
- self._voice_cache.put(profile.audio_hash, profile)
457
- return profile
458
-
459
- def get_builtin_voice_bytes(self, voice_name: Optional[str]) -> Optional[bytes]:
460
- voice_id = self.resolve_voice_id(voice_name)
461
- return self._builtin_voice_bytes.get(voice_id)
462
-
463
- def get_builtin_voice_by_hash(self, audio_hash: str) -> Optional[VoiceProfile]:
464
- return self._builtin_voice_by_hash.get((audio_hash or "").strip())
465
 
466
  # ─── Voice encoding ──────────────────────────────────────────
467
 
 
27
  import time
28
  from collections import OrderedDict
29
  from dataclasses import dataclass
 
30
  from typing import Callable, Generator, Optional
31
 
32
  import librosa
 
48
  }
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # ═══════════════════════════════════════════════════════════════════
52
  # Data Structures
53
  # ═══════════════════════════════════════════════════════════════════
 
203
  ttl_seconds=self.cfg.VOICE_CACHE_TTL_SEC,
204
  )
205
 
206
+ logger.info("Encoding default reference voice …")
207
+ self.default_voice = self._load_default_voice()
 
 
 
 
 
 
 
208
 
209
  logger.info("βœ… ChatterboxWrapper ready")
210
 
 
260
  opts.enable_mem_reuse = True
261
  return opts
262
 
263
+ # ─── Default voice ────────────────────────────────────────────
264
 
265
+ def _load_default_voice(self) -> VoiceProfile:
266
  path = hf_hub_download(
267
  self.cfg.DEFAULT_VOICE_REPO,
268
  filename=self.cfg.DEFAULT_VOICE_FILE,
269
  cache_dir=self.cfg.MODELS_DIR,
270
  )
271
+ audio, _ = librosa.load(path, sr=self.cfg.SAMPLE_RATE)
272
+ return self._encode_audio_array(audio, audio_hash="__default__")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  # ─── Voice encoding ──────────────────────────────────────────
275
 
config.py CHANGED
@@ -17,12 +17,6 @@ def _get_bool(name: str, default: bool) -> bool:
17
  return raw.strip().lower() in {"1", "true", "yes", "on"}
18
 
19
 
20
- def _get_csv(name: str, default: str) -> tuple[str, ...]:
21
- raw = os.getenv(name, default)
22
- items = [x.strip() for x in raw.split(",")]
23
- return tuple(x for x in items if x)
24
-
25
-
26
  class Config:
27
  # ── Model ────────────────────────────────────────────────────
28
  MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX")
@@ -73,12 +67,6 @@ class Config:
73
  # (not a model β€” just a reference WAV, safe to use from any source).
74
  DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX"
75
  DEFAULT_VOICE_FILE: str = "default_voice.wav"
76
- DEFAULT_VOICE_REPOS: tuple[str, ...] = _get_csv(
77
- "CB_DEFAULT_VOICE_REPOS",
78
- DEFAULT_VOICE_REPO,
79
- )
80
- PRELOAD_BUILTIN_VOICES: bool = _get_bool("CB_PRELOAD_BUILTIN_VOICES", True)
81
- MAX_PRELOAD_BUILTIN_VOICES: int = int(os.getenv("CB_MAX_PRELOAD_BUILTIN_VOICES", "64"))
82
  MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024 # 10 MB
83
  MIN_REF_DURATION_SEC: float = 1.5
84
  MAX_REF_DURATION_SEC: float = 30.0
@@ -89,19 +77,11 @@ class Config:
89
  # Smaller chunks = faster TTFB (first audio arrives sooner)
90
  # ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
91
  MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
92
- # Additive parallel mode (3-way split: primary + helper1 + helper2).
93
  ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
94
- HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
95
- HELPER1_BASE_URL: str = os.getenv(
96
- "CB_HELPER1_BASE_URL",
97
- HELPER_BASE_URL,
98
- ).strip()
99
- HELPER2_BASE_URL: str = os.getenv("CB_HELPER2_BASE_URL", "https://shadowhunter222-chab3.hf.space").strip()
100
  HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
101
  HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
102
- # Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
103
- INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
104
- INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
105
  # Optional shared secret for internal chunk endpoints.
106
  INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
107
 
 
17
  return raw.strip().lower() in {"1", "true", "yes", "on"}
18
 
19
 
 
 
 
 
 
 
20
  class Config:
21
  # ── Model ────────────────────────────────────────────────────
22
  MODEL_ID: str = os.getenv("CB_MODEL_ID", "ResembleAI/chatterbox-turbo-ONNX")
 
67
  # (not a model β€” just a reference WAV, safe to use from any source).
68
  DEFAULT_VOICE_REPO: str = "onnx-community/chatterbox-ONNX"
69
  DEFAULT_VOICE_FILE: str = "default_voice.wav"
 
 
 
 
 
 
70
  MAX_VOICE_UPLOAD_BYTES: int = 10 * 1024 * 1024 # 10 MB
71
  MIN_REF_DURATION_SEC: float = 1.5
72
  MAX_REF_DURATION_SEC: float = 30.0
 
77
  # Smaller chunks = faster TTFB (first audio arrives sooner)
78
  # ~200 chars β‰ˆ 1–2 sentences β‰ˆ fastest first-chunk on 2 vCPU
79
  MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
80
+ # Additive parallel mode (odd/even split across primary/helper).
81
  ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
82
+ HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "").strip()
 
 
 
 
 
83
  HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
84
  HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
 
 
 
85
  # Optional shared secret for internal chunk endpoints.
86
  INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
87