Spaces:

ShadowHunter222
/

Chab

Running

App Files Files Community

ShadowHunter222 commited on Apr 15

Commit

4979fb4

verified ·

1 Parent(s): 6937fbe

Upload 9 files

Browse files

Files changed (5) hide show

app.py +155 -425
chatterbox_wrapper.py +2 -0
config.py +3 -7
her_prompt.wav +3 -0
ivr_female_prompt.wav +3 -0

app.py CHANGED Viewed

@@ -1,16 +1,32 @@
 import asyncio
-import http.client
 import io
 import json
 import logging
 import queue as stdlib_queue
 import threading
 import time
 import urllib.parse
 import uuid
 from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass
-from typing import Any, Generator, Optional
 import numpy as np
 import soundfile as sf
@@ -95,36 +111,38 @@ async def cors_middleware(request: Request, call_next):
 async def _resolve_voice(
     voice_ref: Optional[UploadFile],
-    voice_name: Optional[str],
     wrapper: ChatterboxWrapper,
 ) -> VoiceProfile:
-    """Return a VoiceProfile from uploaded audio or built-in voice selection."""
-    if voice_ref is None or voice_ref.filename == "":
         try:
-            return wrapper.get_builtin_voice(voice_name)
         except ValueError as e:
             raise HTTPException(status_code=400, detail=str(e))
-    audio_bytes = await voice_ref.read()
-    if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
-        raise HTTPException(status_code=413, detail="Voice file too large (max 10 MB)")
-    if len(audio_bytes) == 0:
-        raise HTTPException(status_code=400, detail="Empty voice file")
-    loop = asyncio.get_running_loop()
     try:
-        return await loop.run_in_executor(
-            tts_executor, wrapper.encode_voice_from_bytes, audio_bytes
-        )
-    except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        logger.error(f"Voice encoding failed: {e}")
-        raise HTTPException(
-            status_code=400,
-            detail=f"Could not process voice file: {str(e)}. "
-                   f"Supported formats: WAV, MP3, MPEG, M4A, OGG, FLAC, WebM."
-        )
 # ═══════════════════════════════════════════════════════════════════
@@ -152,165 +170,32 @@ def _encode_mp3_chunk(audio: np.ndarray) -> bytes:
     return data
-@dataclass(frozen=True)
-class _ChunkPacket:
-    index: int
-    data: bytes
-    lane: str
-    produced_at: float
-def _internal_headers(
-    *,
-    content_type: Optional[str] = "application/json",
-    accept: str = "audio/mpeg",
-) -> dict[str, str]:
-    headers: dict[str, str] = {"Accept": accept, "Connection": "keep-alive"}
-    if content_type:
-        headers["Content-Type"] = content_type
     if Config.INTERNAL_SHARED_SECRET:
         headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
     return headers
-class _HelperHttpClient:
-    """Small persistent HTTP client for helper server keep-alive calls."""
-    def __init__(self, base_url: str, default_timeout: float):
-        parsed = urllib.parse.urlparse((base_url or "").strip())
-        if parsed.scheme not in {"http", "https"} or not parsed.hostname:
-            raise ValueError(f"Invalid helper URL: {base_url!r}")
-        self._scheme = parsed.scheme
-        self._host = parsed.hostname
-        self._port = parsed.port
-        self._base_path = (parsed.path or "").rstrip("/")
-        self._default_timeout = max(1.0, float(default_timeout))
-        self._conn: Optional[http.client.HTTPConnection] = None
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc, tb):
-        self.close()
-    def close(self):
-        if self._conn is not None:
-            try:
-                self._conn.close()
-            except Exception:
-                pass
-            self._conn = None
-    def _target(self, path: str, query: Optional[str] = None) -> str:
-        normalized = path if path.startswith("/") else f"/{path}"
-        target = f"{self._base_path}{normalized}"
-        if query:
-            target = f"{target}?{query}"
-        return target
-    def _make_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
-        if self._scheme == "https":
-            return http.client.HTTPSConnection(self._host, self._port, timeout=timeout_sec)
-        return http.client.HTTPConnection(self._host, self._port, timeout=timeout_sec)
-    def _ensure_connection(self, timeout_sec: float) -> http.client.HTTPConnection:
-        if self._conn is None:
-            self._conn = self._make_connection(timeout_sec)
-        else:
-            self._conn.timeout = timeout_sec
-        return self._conn
-    def _request(
-        self,
-        method: str,
-        path: str,
-        *,
-        body: Optional[bytes] = None,
-        headers: Optional[dict[str, str]] = None,
-        timeout_sec: Optional[float] = None,
-        query: Optional[str] = None,
-    ) -> tuple[int, bytes, dict[str, str]]:
-        timeout = max(1.0, float(timeout_sec or self._default_timeout))
-        target = self._target(path, query=query)
-        req_headers = headers or {}
-        conn = self._ensure_connection(timeout)
-        try:
-            conn.request(method=method, url=target, body=body, headers=req_headers)
-            resp = conn.getresponse()
-            payload = resp.read()
-            resp_headers = {k.lower(): v for k, v in resp.getheaders()}
-        except Exception:
-            # Force reconnect on next attempt if socket is stale/reset.
-            self.close()
-            raise
-        if resp.status >= 400:
-            snippet = payload[:256].decode("utf-8", errors="replace")
-            raise RuntimeError(
-                f"helper {method} {target} returned {resp.status}: {snippet}"
-            )
-        return resp.status, payload, resp_headers
-    def request_chunk(self, payload: dict[str, Any], timeout_sec: float) -> bytes:
-        _, data, _ = self._request(
-            "POST",
-            "/internal/chunk/synthesize",
-            body=json.dumps(payload).encode("utf-8"),
-            headers=_internal_headers(content_type="application/json", accept="audio/mpeg"),
-            timeout_sec=timeout_sec,
-        )
-        return data
-    def register_voice(self, stream_id: str, audio_bytes: bytes, timeout_sec: float) -> str:
-        query = urllib.parse.urlencode({"stream_id": stream_id})
-        _, data, _ = self._request(
-            "POST",
-            "/internal/voice/register",
-            query=query,
-            body=audio_bytes,
-            headers=_internal_headers(
-                content_type="application/octet-stream",
-                accept="application/json",
-            ),
-            timeout_sec=timeout_sec,
-        )
-        payload = json.loads(data.decode("utf-8"))
-        voice_key = (payload.get("voice_key") or "").strip()
-        if not voice_key:
-            raise RuntimeError("helper voice registration returned no voice_key")
-        return voice_key
-    def cancel_stream(self, stream_id: str, timeout_sec: float = 3.0):
-        self._request(
-            "POST",
-            f"/internal/chunk/cancel/{stream_id}",
-            body=b"",
-            headers=_internal_headers(),
-            timeout_sec=timeout_sec,
-        )
-    def complete_stream(self, stream_id: str, timeout_sec: float = 3.0):
-        self._request(
-            "POST",
-            f"/internal/chunk/complete/{stream_id}",
-            body=b"",
-            headers=_internal_headers(),
-            timeout_sec=timeout_sec,
-        )
 def _helper_request_chunk(
     helper_base_url: str,
     payload: dict,
     timeout_sec: float,
-    helper_client: Optional[_HelperHttpClient] = None,
 ) -> bytes:
-    if helper_client is not None:
-        return helper_client.request_chunk(payload, timeout_sec=timeout_sec)
-    with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
-        return helper_client_single.request_chunk(payload, timeout_sec=timeout_sec)
 def _helper_register_voice(
@@ -318,45 +203,44 @@ def _helper_register_voice(
     stream_id: str,
     audio_bytes: bytes,
     timeout_sec: float,
-    helper_client: Optional[_HelperHttpClient] = None,
 ) -> str:
     """Register reference voice on helper once, return voice_key for chunk calls."""
-    if helper_client is not None:
-        return helper_client.register_voice(
-            stream_id=stream_id,
-            audio_bytes=audio_bytes,
-            timeout_sec=timeout_sec,
-        )
-    with _HelperHttpClient(helper_base_url, default_timeout=timeout_sec) as helper_client_single:
-        return helper_client_single.register_voice(
-            stream_id=stream_id,
-            audio_bytes=audio_bytes,
-            timeout_sec=timeout_sec,
-        )
 def _helper_cancel_stream(helper_base_url: str, stream_id: str):
     """Best-effort cancellation signal to helper."""
     try:
-        with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
-            helper_client.cancel_stream(stream_id=stream_id, timeout_sec=3.0)
     except Exception:
         pass
-def _helper_complete_stream(helper_base_url: str, stream_id: str):
-    """Best-effort stream completion cleanup on helper.
-    Falls back to cancel for backwards compatibility if helper does not expose
-    the completion endpoint yet.
-    """
-    try:
-        with _HelperHttpClient(helper_base_url, default_timeout=3.0) as helper_client:
-            helper_client.complete_stream(stream_id=stream_id, timeout_sec=3.0)
-    except Exception:
-        _helper_cancel_stream(helper_base_url, stream_id)
 # ═══════════════════════════════════════════════════════════════════
 # Endpoints
 # ═══════════════════════════════════════════════════════════════════
@@ -364,19 +248,12 @@ def _helper_complete_stream(helper_base_url: str, stream_id: str):
 @app.get("/health")
 async def health(warm_up: bool = False):
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
-    with _internal_cancel_lock:
-        _purge_internal_stream_state_locked()
-        cancelled_count = len(_internal_cancelled_streams)
-        voice_state_count = len(_internal_stream_voice_keys)
     status = {
         "status": "healthy" if wrapper else "loading",
         "model_loaded": wrapper is not None,
         "model_dtype": Config.MODEL_DTYPE,
         "streaming_supported": True,
         "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
-        "internal_cancelled_streams": cancelled_count,
-        "internal_stream_voice_states": voice_state_count,
     }
     if warm_up and wrapper:
         try:
@@ -388,23 +265,40 @@ async def health(warm_up: bool = False):
     return status
 @app.get("/voices")
 async def list_voices():
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
-    voices = wrapper.list_builtin_voices()
     return {
-        "count": len(voices),
-        "default_voice": wrapper.default_voice_name,
-        "voices": voices,
-        "usage": {
-            "form_field": "voice_name",
-            "json_field": "voice",
-            "note": "If voice_ref is uploaded, it overrides voice_name.",
-        },
     }
@@ -454,51 +348,9 @@ async def text_to_speech(
 # ═══════════════════════════════════════════════════════════════════
 _active_streams: dict[str, threading.Event] = {}
-# stream_id -> expires_at epoch seconds
-_internal_cancelled_streams: dict[str, float] = {}
 _internal_cancel_lock = threading.Lock()
-# stream_id -> (voice_keys, expires_at)
-_internal_stream_voice_keys: dict[str, tuple[set[str], float]] = {}
-# stream_id -> helper base URLs (used to cancel helpers quickly on /tts/stop)
-_stream_helper_routes: dict[str, set[str]] = {}
-_stream_routes_lock = threading.Lock()
-def _purge_internal_stream_state_locked(now: Optional[float] = None):
-    now_ts = now if now is not None else time.time()
-    expired_cancel_ids = [
-        sid for sid, expires_at in _internal_cancelled_streams.items()
-        if expires_at <= now_ts
-    ]
-    for sid in expired_cancel_ids:
-        _internal_cancelled_streams.pop(sid, None)
-    expired_voice_state_ids = [
-        sid for sid, (_, expires_at) in _internal_stream_voice_keys.items()
-        if expires_at <= now_ts
-    ]
-    for sid in expired_voice_state_ids:
-        _internal_stream_voice_keys.pop(sid, None)
-def _touch_internal_stream_voice_keys_locked(stream_id: str):
-    if not stream_id:
-        return
-    entry = _internal_stream_voice_keys.get(stream_id)
-    if entry is None:
-        return
-    keys, _ = entry
-    _internal_stream_voice_keys[stream_id] = (
-        keys,
-        time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
-    )
-def _clear_internal_stream_state_locked(stream_id: str):
-    _internal_cancelled_streams.pop(stream_id, None)
-    _internal_stream_voice_keys.pop(stream_id, None)
 # ═══════════════════════════════════════════════════════════════════
@@ -598,7 +450,7 @@ def _pipeline_stream_generator(
         _active_streams.pop(stream_id, None)
-def _parallel_two_way_stream_generator(
     wrapper: ChatterboxWrapper,
     text: str,
     local_voice: VoiceProfile,
@@ -608,43 +460,26 @@ def _parallel_two_way_stream_generator(
     stream_id: str,
     helper_base_url: str,
 ) -> Generator[bytes, None, None]:
-    """Additive 2-way split streamer (primary + helper).
-    Routing pattern:
-      - chunk 0,2,4...  -> primary (local)
-      - chunk 1,3,5...  -> helper
-    """
     cancel_event = threading.Event()
     _active_streams[stream_id] = cancel_event
-    helper_base_url = (helper_base_url or "").strip()
-    helper_route_set = {helper_base_url} if helper_base_url else set()
-    if helper_route_set:
-        with _stream_routes_lock:
-            _stream_helper_routes[stream_id] = set(helper_route_set)
     clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
     chunks = text_processor.split_for_streaming(clean_text)
     total_chunks = len(chunks)
     if total_chunks == 0:
-        with _stream_routes_lock:
-            _stream_helper_routes.pop(stream_id, None)
         _active_streams.pop(stream_id, None)
         return
     lock = threading.Lock()
     cond = threading.Condition(lock)
-    ready: dict[int, _ChunkPacket] = {}
     first_error: Optional[Exception] = None
     workers_done = 0
-    expected_workers = 2
-    stream_completed = False
-    def _publish(packet: _ChunkPacket):
         with cond:
-            # First write wins for an index to avoid duplicate retry races.
-            if packet.index not in ready:
-                ready[packet.index] = packet
             cond.notify_all()
     def _set_error(err: Exception):
@@ -669,46 +504,23 @@ def _parallel_two_way_stream_generator(
         )
         return _encode_mp3_chunk(audio)
-    def _local_worker():
         try:
             for idx in range(0, total_chunks, 2):
                 if cancel_event.is_set():
                     break
                 data = _synth_local(chunks[idx])
-                _publish(
-                    _ChunkPacket(
-                        index=idx,
-                        data=data,
-                        lane="primary",
-                        produced_at=time.perf_counter(),
-                    )
-                )
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
-    def _helper_worker():
-        helper_available = bool(helper_base_url)
         helper_voice_key: Optional[str] = None
-        helper_timeout = max(1.0, Config.HELPER_TIMEOUT_SEC)
-        helper_client: Optional[_HelperHttpClient] = None
         try:
-            if helper_available:
-                try:
-                    helper_client = _HelperHttpClient(
-                        helper_base_url,
-                        default_timeout=helper_timeout,
-                    )
-                except Exception as conn_err:
-                    helper_available = False
-                    logger.warning(
-                        f"[{stream_id}] helper keep-alive init failed ({conn_err}); "
-                        "using local fallback for helper lane"
-                    )
-            if helper_available and helper_voice_bytes:
                 attempts = 2 if Config.HELPER_RETRY_ONCE else 1
                 last_err: Optional[Exception] = None
                 for _ in range(attempts):
@@ -717,25 +529,19 @@ def _parallel_two_way_stream_generator(
                             helper_base_url=helper_base_url,
                             stream_id=stream_id,
                             audio_bytes=helper_voice_bytes,
-                            timeout_sec=helper_timeout,
-                            helper_client=helper_client,
                         )
                         last_err = None
                         break
                     except Exception as reg_err:
                         last_err = reg_err
                         continue
                 if last_err is not None:
                     helper_available = False
                     logger.warning(
-                        f"[{stream_id}] helper voice registration failed; "
-                        "falling back to local synthesis for helper lane"
                     )
-            elif not helper_available:
-                logger.info(
-                    f"[{stream_id}] helper URL not configured; using local fallback"
-                )
             for idx in range(1, total_chunks, 2):
                 if cancel_event.is_set():
@@ -760,17 +566,9 @@ def _parallel_two_way_stream_generator(
                             helper_data = _helper_request_chunk(
                                 helper_base_url=helper_base_url,
                                 payload=payload,
-                                timeout_sec=helper_timeout,
-                                helper_client=helper_client,
-                            )
-                            _publish(
-                                _ChunkPacket(
-                                    index=idx,
-                                    data=helper_data,
-                                    lane="helper",
-                                    produced_at=time.perf_counter(),
-                                )
                             )
                             last_err = None
                             break
                         except Exception as helper_err:
@@ -782,31 +580,22 @@ def _parallel_two_way_stream_generator(
                     helper_available = False
                     logger.warning(
-                        f"[{stream_id}] helper failed at chunk {idx}; "
-                        "falling back to local synthesis for remaining helper chunks"
                     )
-                # Local fallback for helper lane
                 data = _synth_local(chunks[idx])
-                _publish(
-                    _ChunkPacket(
-                        index=idx,
-                        data=data,
-                        lane="helper-local-fallback",
-                        produced_at=time.perf_counter(),
-                    )
-                )
         except Exception as e:
             _set_error(e)
         finally:
-            if helper_client is not None:
-                helper_client.close()
             _worker_done()
-    local_thread = threading.Thread(target=_local_worker, daemon=True)
-    helper_thread = threading.Thread(target=_helper_worker, daemon=True)
-    local_thread.start()
-    helper_thread.start()
     next_idx = 0
     try:
@@ -816,7 +605,7 @@ def _parallel_two_way_stream_generator(
                     next_idx not in ready
                     and first_error is None
                     and not cancel_event.is_set()
-                    and workers_done < expected_workers
                 ):
                     cond.wait(timeout=0.1)
@@ -824,12 +613,11 @@ def _parallel_two_way_stream_generator(
                     break
                 if next_idx in ready:
-                    packet = ready.pop(next_idx)
-                    buffered_chunks = len(ready)
                 elif first_error is not None:
                     logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
                     break
-                elif workers_done >= expected_workers:
                     logger.error(
                         f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
                     )
@@ -837,39 +625,13 @@ def _parallel_two_way_stream_generator(
                 else:
                     continue
-            logger.debug(
-                "[%s] stitch emit chunk %s/%s from %s (buffered=%s)",
-                stream_id,
-                next_idx + 1,
-                total_chunks,
-                packet.lane,
-                buffered_chunks,
-            )
-            yield packet.data
             next_idx += 1
-        stream_completed = (
-            next_idx >= total_chunks
-            and first_error is None
-            and not cancel_event.is_set()
-        )
     finally:
         cancel_event.set()
-        # For fast stop/cancel, signal helpers first; for normal completion, wait for
-        # workers to flush and then ask helpers to clear stream state.
-        if not stream_completed:
-            for base_url in helper_route_set:
-                _helper_cancel_stream(base_url, stream_id)
-        local_thread.join(timeout=1.0)
-        helper_thread.join(timeout=1.0)
-        if stream_completed:
-            for base_url in helper_route_set:
-                _helper_complete_stream(base_url, stream_id)
-        with _stream_routes_lock:
-            _stream_helper_routes.pop(stream_id, None)
         _active_streams.pop(stream_id, None)
@@ -923,7 +685,7 @@ async def parallel_stream_text_to_speech(
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
     helper_url: Optional[str] = Form(None),
 ):
-    """Additive 2-way split stream mode (primary + helper)."""
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
@@ -949,13 +711,16 @@ async def parallel_stream_text_to_speech(
             logger.error(f"Parallel voice encoding failed: {e}")
             raise HTTPException(400, "Could not process voice file for parallel mode")
     else:
         try:
             selected_voice_id = wrapper.resolve_voice_id(voice_name)
             local_voice = wrapper.get_builtin_voice(selected_voice_id)
         except ValueError as e:
             raise HTTPException(status_code=400, detail=str(e))
-        # Ensure helper uses the same selected built-in voice.
         if selected_voice_id != wrapper.default_voice_name:
             helper_voice_bytes = wrapper.get_builtin_voice_bytes(selected_voice_id)
             if not helper_voice_bytes:
@@ -968,12 +733,12 @@ async def parallel_stream_text_to_speech(
     if not resolved_helper:
         raise HTTPException(
             400,
-            "No helper configured. Set CB_HELPER_BASE_URL or pass helper_url.",
         )
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
-        _parallel_two_way_stream_generator(
             wrapper=wrapper,
             text=text,
             local_voice=local_voice,
@@ -988,7 +753,7 @@ async def parallel_stream_text_to_speech(
             "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
             "Transfer-Encoding": "chunked",
             "X-Stream-Id": stream_id,
-            "X-Streaming-Type": "parallel-2way",
             "Cache-Control": "no-cache",
         },
     )
@@ -1051,13 +816,8 @@ async def internal_voice_register(http_request: Request):
     stream_id = (http_request.query_params.get("stream_id") or "").strip()
     if stream_id:
         with _internal_cancel_lock:
-            _purge_internal_stream_state_locked()
-            keys, _ = _internal_stream_voice_keys.get(stream_id, (set(), 0.0))
             keys.add(voice_key)
-            _internal_stream_voice_keys[stream_id] = (
-                keys,
-                time.time() + max(1, Config.INTERNAL_STREAM_STATE_TTL_SEC),
-            )
     return {"status": "registered", "voice_key": voice_key}
@@ -1074,10 +834,8 @@ async def internal_chunk_synthesize(
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
-        _purge_internal_stream_state_locked()
         if request.stream_id in _internal_cancelled_streams:
             raise HTTPException(409, "Stream already cancelled")
-        _touch_internal_stream_voice_keys_locked(request.stream_id)
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
@@ -1129,28 +887,11 @@ async def internal_chunk_cancel(stream_id: str, http_request: Request):
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
-        _purge_internal_stream_state_locked()
-        _internal_cancelled_streams[stream_id] = (
-            time.time() + max(1, Config.INTERNAL_CANCEL_TTL_SEC)
-        )
         _internal_stream_voice_keys.pop(stream_id, None)
     return {"status": "cancelled", "stream_id": stream_id}
-@app.post("/internal/chunk/complete/{stream_id}")
-async def internal_chunk_complete(stream_id: str, http_request: Request):
-    """Best-effort immediate cleanup after stream completes normally."""
-    if Config.INTERNAL_SHARED_SECRET:
-        provided = http_request.headers.get("X-Internal-Secret", "")
-        if provided != Config.INTERNAL_SHARED_SECRET:
-            raise HTTPException(403, "Forbidden")
-    with _internal_cancel_lock:
-        _purge_internal_stream_state_locked()
-        _clear_internal_stream_state_locked(stream_id)
-    return {"status": "completed", "stream_id": stream_id}
 @app.post("/v1/audio/speech")
 async def openai_compatible_tts(request: TTSJsonRequest):
     """OpenAI-compatible streaming endpoint (JSON body, no file upload).
@@ -1195,10 +936,6 @@ async def stop_stream(stream_id: str):
     event = _active_streams.get(stream_id)
     if event:
         event.set()
-        with _stream_routes_lock:
-            helper_routes = set(_stream_helper_routes.pop(stream_id, set()))
-        for helper_url in helper_routes:
-            _helper_cancel_stream(helper_url, stream_id)
         logger.info(f"Stream {stream_id} cancelled by client")
         return {"status": "stopped", "stream_id": stream_id}
     return {"status": "not_found", "stream_id": stream_id}
@@ -1207,16 +944,9 @@ async def stop_stream(stream_id: str):
 @app.post("/tts/stop")
 async def stop_all_streams():
     """Emergency stop: cancel ALL active TTS streams."""
-    active_items = list(_active_streams.items())
-    count = len(active_items)
-    with _stream_routes_lock:
-        stream_routes = {sid: set(urls) for sid, urls in _stream_helper_routes.items()}
-        _stream_helper_routes.clear()
-    for sid, event in active_items:
         event.set()
-        for helper_url in stream_routes.get(sid, set()):
-            _helper_cancel_stream(helper_url, sid)
     _active_streams.clear()
     logger.info(f"Stopped all streams ({count} active)")
     return {"status": "stopped_all", "count": count}

+"""
+Chatterbox Turbo TTS -- FastAPI Server
+======================================
+Production-ready API with true real-time MP3 streaming,
+in-memory voice cloning, and fully non-blocking inference.
+Endpoints:
+  GET  /health              -> health check + optional warmup
+  GET  /info                -> model info, supported tags, parameters
+  POST /tts                 -> full audio response (WAV/MP3/FLAC)
+  POST /tts/stream          -> chunked MP3 streaming (MediaSource-ready)
+  POST /tts/true-stream     -> alias for /tts/stream (Kokoro compat)
+  POST /tts/stop/{stream_id}-> cancel a specific active stream
+  POST /tts/stop            -> cancel ALL active streams
+  POST /v1/audio/speech     -> OpenAI-compatible streaming
+"""
 import asyncio
 import io
 import json
 import logging
 import queue as stdlib_queue
 import threading
 import time
+import urllib.error
 import urllib.parse
+import urllib.request
 import uuid
 from concurrent.futures import ThreadPoolExecutor
+from typing import Generator, Optional
 import numpy as np
 import soundfile as sf
 async def _resolve_voice(
     voice_ref: Optional[UploadFile],
+    voice_name: str,
     wrapper: ChatterboxWrapper,
 ) -> VoiceProfile:
+    """Return a VoiceProfile from uploaded audio, built-in voice name, or default."""
+    # 1) If a file was uploaded, encode it (highest priority)
+    if voice_ref is not None and voice_ref.filename:
+        audio_bytes = await voice_ref.read()
+        if len(audio_bytes) > Config.MAX_VOICE_UPLOAD_BYTES:
+            raise HTTPException(status_code=413, detail="Voice file too large (max 10 MB)")
+        if len(audio_bytes) == 0:
+            raise HTTPException(status_code=400, detail="Empty voice file")
+        loop = asyncio.get_running_loop()
         try:
+            return await loop.run_in_executor(
+                tts_executor, wrapper.encode_voice_from_bytes, audio_bytes
+            )
         except ValueError as e:
             raise HTTPException(status_code=400, detail=str(e))
+        except Exception as e:
+            logger.error(f"Voice encoding failed: {e}")
+            raise HTTPException(
+                status_code=400,
+                detail=f"Could not process voice file: {str(e)}. "
+                       f"Supported formats: WAV, MP3, MPEG, M4A, OGG, FLAC, WebM."
+            )
+    # 2) Resolve by built-in voice name (returns cached profile — no encoding)
     try:
+        return wrapper.get_builtin_voice(voice_name)
+    except (ValueError, KeyError) as e:
         raise HTTPException(status_code=400, detail=str(e))
 # ═══════════════════════════════════════════════════════════════════
     return data
+def _build_helper_endpoint(base_url: str, path: str) -> str:
+    return f"{base_url.rstrip('/')}{path}"
+def _internal_headers() -> dict[str, str]:
+    headers = {"Content-Type": "application/json", "Accept": "audio/mpeg"}
     if Config.INTERNAL_SHARED_SECRET:
         headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
     return headers
 def _helper_request_chunk(
     helper_base_url: str,
     payload: dict,
     timeout_sec: float,
 ) -> bytes:
+    url = _build_helper_endpoint(helper_base_url, "/internal/chunk/synthesize")
+    body = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        url=url,
+        data=body,
+        headers=_internal_headers(),
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
+        return resp.read()
 def _helper_register_voice(
     stream_id: str,
     audio_bytes: bytes,
     timeout_sec: float,
 ) -> str:
     """Register reference voice on helper once, return voice_key for chunk calls."""
+    query = urllib.parse.urlencode({"stream_id": stream_id})
+    url = _build_helper_endpoint(helper_base_url, f"/internal/voice/register?{query}")
+    headers = {"Content-Type": "application/octet-stream", "Accept": "application/json"}
+    if Config.INTERNAL_SHARED_SECRET:
+        headers["X-Internal-Secret"] = Config.INTERNAL_SHARED_SECRET
+    req = urllib.request.Request(
+        url=url,
+        data=audio_bytes,
+        headers=headers,
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
+        data = json.loads(resp.read().decode("utf-8"))
+    voice_key = (data.get("voice_key") or "").strip()
+    if not voice_key:
+        raise RuntimeError("helper voice registration returned no voice_key")
+    return voice_key
 def _helper_cancel_stream(helper_base_url: str, stream_id: str):
     """Best-effort cancellation signal to helper."""
     try:
+        url = _build_helper_endpoint(helper_base_url, f"/internal/chunk/cancel/{stream_id}")
+        req = urllib.request.Request(
+            url=url,
+            data=b"",
+            headers=_internal_headers(),
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=3.0):
+            pass
     except Exception:
         pass
 # ═══════════════════════════════════════════════════════════════════
 # Endpoints
 # ═══════════════════════════════════════════════════════════════════
 @app.get("/health")
 async def health(warm_up: bool = False):
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     status = {
         "status": "healthy" if wrapper else "loading",
         "model_loaded": wrapper is not None,
         "model_dtype": Config.MODEL_DTYPE,
         "streaming_supported": True,
         "voice_cache_entries": wrapper._voice_cache.size if wrapper else 0,
     }
     if warm_up and wrapper:
         try:
     return status
+@app.get("/info")
+async def info():
+    return {
+        "model": Config.MODEL_ID,
+        "dtype": Config.MODEL_DTYPE,
+        "sample_rate": Config.SAMPLE_RATE,
+        "paralinguistic_tags": list(Config.PARALINGUISTIC_TAGS),
+        "tag_usage": "Insert tags directly in text, e.g. 'That is so funny! [laugh] Anyway…'",
+        "parameters": {
+            "max_new_tokens": {"default": Config.MAX_NEW_TOKENS, "range": "64–2048"},
+            "repetition_penalty": {"default": Config.REPETITION_PENALTY, "range": "1.0–2.0"},
+        },
+        "voice_cloning": {
+            "description": "Upload 3–30s reference WAV/MP3 as 'voice_ref' field",
+            "max_upload_mb": Config.MAX_VOICE_UPLOAD_BYTES // (1024 * 1024),
+        },
+        "parallel_mode": {
+            "enabled": Config.ENABLE_PARALLEL_MODE,
+            "helper_configured": bool(Config.HELPER_BASE_URL),
+            "helper_base_url": Config.HELPER_BASE_URL or None,
+            "supports_voice_ref": True,
+        },
+    }
 @app.get("/voices")
 async def list_voices():
+    """Return all built-in voices available for selection."""
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
     return {
+        "default": wrapper.default_voice_name,
+        "voices": wrapper.list_builtin_voices(),
     }
 # ═══════════════════════════════════════════════════════════════════
 _active_streams: dict[str, threading.Event] = {}
+_internal_cancelled_streams: set[str] = set()
 _internal_cancel_lock = threading.Lock()
+_internal_stream_voice_keys: dict[str, set[str]] = {}
 # ═══════════════════════════════════════════════════════════════════
         _active_streams.pop(stream_id, None)
+def _parallel_odd_even_stream_generator(
     wrapper: ChatterboxWrapper,
     text: str,
     local_voice: VoiceProfile,
     stream_id: str,
     helper_base_url: str,
 ) -> Generator[bytes, None, None]:
+    """Additive odd/even split streamer (primary handles odd, helper handles even)."""
     cancel_event = threading.Event()
     _active_streams[stream_id] = cancel_event
     clean_text = text_processor.sanitize(text.strip()[: Config.MAX_TEXT_LENGTH])
     chunks = text_processor.split_for_streaming(clean_text)
     total_chunks = len(chunks)
     if total_chunks == 0:
         _active_streams.pop(stream_id, None)
         return
     lock = threading.Lock()
     cond = threading.Condition(lock)
+    ready: dict[int, bytes] = {}
     first_error: Optional[Exception] = None
     workers_done = 0
+    def _publish(idx: int, data: bytes):
         with cond:
+            ready[idx] = data
             cond.notify_all()
     def _set_error(err: Exception):
         )
         return _encode_mp3_chunk(audio)
+    def _odd_worker():
         try:
             for idx in range(0, total_chunks, 2):
                 if cancel_event.is_set():
                     break
                 data = _synth_local(chunks[idx])
+                _publish(idx, data)
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
+    def _even_worker():
+        helper_available = True
         helper_voice_key: Optional[str] = None
         try:
+            if helper_voice_bytes:
                 attempts = 2 if Config.HELPER_RETRY_ONCE else 1
                 last_err: Optional[Exception] = None
                 for _ in range(attempts):
                             helper_base_url=helper_base_url,
                             stream_id=stream_id,
                             audio_bytes=helper_voice_bytes,
+                            timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
                         )
                         last_err = None
                         break
                     except Exception as reg_err:
                         last_err = reg_err
                         continue
                 if last_err is not None:
                     helper_available = False
                     logger.warning(
+                        f"[{stream_id}] Helper voice registration failed; "
+                        "falling back to local synthesis for even chunks"
                     )
             for idx in range(1, total_chunks, 2):
                 if cancel_event.is_set():
                             helper_data = _helper_request_chunk(
                                 helper_base_url=helper_base_url,
                                 payload=payload,
+                                timeout_sec=max(1.0, Config.HELPER_TIMEOUT_SEC),
                             )
+                            _publish(idx, helper_data)
                             last_err = None
                             break
                         except Exception as helper_err:
                     helper_available = False
                     logger.warning(
+                        f"[{stream_id}] Helper failed at chunk {idx}; "
+                        "falling back to local synthesis for remaining even chunks"
                     )
+                # Local fallback for even chunks
                 data = _synth_local(chunks[idx])
+                _publish(idx, data)
         except Exception as e:
             _set_error(e)
         finally:
             _worker_done()
+    odd_thread = threading.Thread(target=_odd_worker, daemon=True)
+    even_thread = threading.Thread(target=_even_worker, daemon=True)
+    odd_thread.start()
+    even_thread.start()
     next_idx = 0
     try:
                     next_idx not in ready
                     and first_error is None
                     and not cancel_event.is_set()
+                    and workers_done < 2
                 ):
                     cond.wait(timeout=0.1)
                     break
                 if next_idx in ready:
+                    data = ready.pop(next_idx)
                 elif first_error is not None:
                     logger.error(f"[{stream_id}] Parallel stream error: {first_error}")
                     break
+                elif workers_done >= 2:
                     logger.error(
                         f"[{stream_id}] Parallel stream ended with missing chunk index {next_idx}"
                     )
                 else:
                     continue
+            yield data
             next_idx += 1
     finally:
         cancel_event.set()
+        _helper_cancel_stream(helper_base_url, stream_id)
+        odd_thread.join(timeout=1.0)
+        even_thread.join(timeout=1.0)
         _active_streams.pop(stream_id, None)
     repetition_penalty: float = Form(Config.REPETITION_PENALTY),
     helper_url: Optional[str] = Form(None),
 ):
+    """Additive odd/even split stream mode (primary + helper)."""
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
         raise HTTPException(503, "Model not loaded")
             logger.error(f"Parallel voice encoding failed: {e}")
             raise HTTPException(400, "Could not process voice file for parallel mode")
     else:
+        # Built-in voice selected by name — resolve locally and prepare
+        # bytes for helper registration so helpers cache the same hash.
         try:
             selected_voice_id = wrapper.resolve_voice_id(voice_name)
             local_voice = wrapper.get_builtin_voice(selected_voice_id)
         except ValueError as e:
             raise HTTPException(status_code=400, detail=str(e))
+        # Only send bytes to helper if a non-default voice was selected,
+        # because the helper's own default is already loaded.
         if selected_voice_id != wrapper.default_voice_name:
             helper_voice_bytes = wrapper.get_builtin_voice_bytes(selected_voice_id)
             if not helper_voice_bytes:
     if not resolved_helper:
         raise HTTPException(
             400,
+            "Helper URL not configured. Set CB_HELPER_BASE_URL or pass helper_url.",
         )
     stream_id = uuid.uuid4().hex[:12]
     return StreamingResponse(
+        _parallel_odd_even_stream_generator(
             wrapper=wrapper,
             text=text,
             local_voice=local_voice,
             "Content-Disposition": "attachment; filename=tts_parallel_stream.mp3",
             "Transfer-Encoding": "chunked",
             "X-Stream-Id": stream_id,
+            "X-Streaming-Type": "parallel-odd-even",
             "Cache-Control": "no-cache",
         },
     )
     stream_id = (http_request.query_params.get("stream_id") or "").strip()
     if stream_id:
         with _internal_cancel_lock:
+            keys = _internal_stream_voice_keys.setdefault(stream_id, set())
             keys.add(voice_key)
     return {"status": "registered", "voice_key": voice_key}
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
         if request.stream_id in _internal_cancelled_streams:
             raise HTTPException(409, "Stream already cancelled")
     wrapper: ChatterboxWrapper = getattr(app.state, "wrapper", None)
     if not wrapper:
             raise HTTPException(403, "Forbidden")
     with _internal_cancel_lock:
+        _internal_cancelled_streams.add(stream_id)
         _internal_stream_voice_keys.pop(stream_id, None)
     return {"status": "cancelled", "stream_id": stream_id}
 @app.post("/v1/audio/speech")
 async def openai_compatible_tts(request: TTSJsonRequest):
     """OpenAI-compatible streaming endpoint (JSON body, no file upload).
     event = _active_streams.get(stream_id)
     if event:
         event.set()
         logger.info(f"Stream {stream_id} cancelled by client")
         return {"status": "stopped", "stream_id": stream_id}
     return {"status": "not_found", "stream_id": stream_id}
 @app.post("/tts/stop")
 async def stop_all_streams():
     """Emergency stop: cancel ALL active TTS streams."""
+    count = len(_active_streams)
+    for sid, event in list(_active_streams.items()):
         event.set()
     _active_streams.clear()
     logger.info(f"Stopped all streams ({count} active)")
     return {"status": "stopped_all", "count": count}

chatterbox_wrapper.py CHANGED Viewed

@@ -50,6 +50,7 @@ _SUPPORTED_AUDIO_EXTENSIONS = {
 def _slugify(text: str) -> str:
     buf = []
     prev_underscore = False
     for ch in text.strip().lower():
@@ -64,6 +65,7 @@ def _slugify(text: str) -> str:
     return slug or "voice"
 # ═══════════════════════════════════════════════════════════════════
 # Data Structures
 # ═══════════════════════════════════════════════════════════════════

 def _slugify(text: str) -> str:
+    """Convert a display name to a safe, lowercase identifier."""
     buf = []
     prev_underscore = False
     for ch in text.strip().lower():
     return slug or "voice"
 # ═══════════════════════════════════════════════════════════════════
 # Data Structures
 # ═══════════════════════════════════════════════════════════════════

config.py CHANGED Viewed

@@ -77,14 +77,11 @@ class Config:
     # Smaller chunks = faster TTFB (first audio arrives sooner)
     # ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU
     MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
-    # Additive parallel mode (2-way split: primary + helper).
     ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
     HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
     HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
     HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
-    # Internal housekeeping TTLs to avoid retaining stream metadata indefinitely.
-    INTERNAL_CANCEL_TTL_SEC: int = int(os.getenv("CB_INTERNAL_CANCEL_TTL_SEC", "120"))
-    INTERNAL_STREAM_STATE_TTL_SEC: int = int(os.getenv("CB_INTERNAL_STREAM_STATE_TTL_SEC", "600"))
     # Optional shared secret for internal chunk endpoints.
     INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
@@ -93,14 +90,13 @@ class Config:
     PORT: int = int(os.getenv("CB_PORT", "7860"))
     ALLOWED_ORIGINS: list = [
-        "https://toolboxesai.com",
         "https://www.toolboxesai.com",
         "www.toolboxesai.com",
-        "toolboxesai.com",
         "http://localhost:8788",  "http://127.0.0.1:8788",
         "http://localhost:5502",  "http://127.0.0.1:5502",
         "http://localhost:5501",  "http://127.0.0.1:5501",
         "http://localhost:5500",  "http://127.0.0.1:5500",
         "http://localhost:5173",  "http://127.0.0.1:5173",
         "http://localhost:7860",  "http://127.0.0.1:7860",
-    ]

     # Smaller chunks = faster TTFB (first audio arrives sooner)
     # ~200 chars ≈ 1–2 sentences ≈ fastest first-chunk on 2 vCPU
     MAX_CHUNK_CHARS: int = int(os.getenv("CB_MAX_CHUNK_CHARS", "100"))
+    # Additive parallel mode (odd/even split across primary/helper).
     ENABLE_PARALLEL_MODE: bool = _get_bool("CB_ENABLE_PARALLEL_MODE", True)
     HELPER_BASE_URL: str = os.getenv("CB_HELPER_BASE_URL", "https://shadowhunter222-chab2.hf.space").strip()
     HELPER_TIMEOUT_SEC: float = float(os.getenv("CB_HELPER_TIMEOUT_SEC", "45"))
     HELPER_RETRY_ONCE: bool = _get_bool("CB_HELPER_RETRY_ONCE", True)
     # Optional shared secret for internal chunk endpoints.
     INTERNAL_SHARED_SECRET: str = os.getenv("CB_INTERNAL_SHARED_SECRET", "").strip()
     PORT: int = int(os.getenv("CB_PORT", "7860"))
     ALLOWED_ORIGINS: list = [
         "https://www.toolboxesai.com",
         "www.toolboxesai.com",
+        "https://toolboxesai.com",
         "http://localhost:8788",  "http://127.0.0.1:8788",
         "http://localhost:5502",  "http://127.0.0.1:5502",
         "http://localhost:5501",  "http://127.0.0.1:5501",
         "http://localhost:5500",  "http://127.0.0.1:5500",
         "http://localhost:5173",  "http://127.0.0.1:5173",
         "http://localhost:7860",  "http://127.0.0.1:7860",
+    ]

her_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8eaabbeafe26ad6f78b56dcc32608763eeb69485db074c7136c6818f04a93ced
+size 725328

ivr_female_prompt.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64953bf94657c4334532319fd4f20e9859c31af4445940916b04f129ef1f89e6
+size 2779278