Spaces:

Percy3822
/

Brain

Paused

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

3816405

verified ·

1 Parent(s): cf40d2a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -40

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
-import os, json, time, asyncio, tempfile, struct
 from typing import AsyncGenerator, Dict, Any, Optional
 from fastapi import FastAPI, Request, Query, UploadFile
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
-import websockets  # NEW
 # === Directories ===
 BASE_DIR  = os.environ.get("BASE_DIR", "/tmp/brain_app")
@@ -23,7 +26,7 @@ BASE_WPM       = int(os.environ.get("BASE_WPM", "165"))
 NOISE_SCALE    = float(os.environ.get("NOISE_SCALE", "0.33"))
 NOISE_W        = float(os.environ.get("NOISE_W", "0.92"))
-app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.2.0")
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
 def write_event(event: Dict[str, Any]) -> None:
@@ -42,6 +45,39 @@ def rate_to_length_scale(rate_wpm: Optional[int]) -> float:
     r = max(80, min(320, rate_wpm))
     return round(base / float(r), 3)
 # ---------- Health ----------
 @app.get("/health")
 def health():
@@ -72,10 +108,11 @@ async def stream_logs() -> StreamingResponse:
     return StreamingResponse(gen(), media_type="text/event-stream",
                              headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
-# ---------- TTS proxy (file-mode, not live) ----------
 async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
                                 noise_scale: float, noise_w: float) -> StreamingResponse:
-    import httpx
     length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
     params = {
         "text": text,
@@ -122,26 +159,8 @@ async def tts_say_wav_post(req: Request):
     write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
     return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
-# ---------- NEW: true LIVE streaming via TTS WebSocket ----------
-def _wav_header(sr: int, ch: int, bits: int = 16) -> bytes:
-    byte_rate = sr * ch * (bits // 8)
-    block_align = ch * (bits // 8)
-    # streaming data size; many players accept 0xFFFFFFFF as "unknown length"
-    data_size = 0xFFFFFFFF
-    riff_size = 36 + data_size
-    return b"".join([
-        b"RIFF", struct.pack("<I", riff_size), b"WAVE",
-        b"fmt ", struct.pack("<I", 16), struct.pack("<H", 1),
-        struct.pack("<H", ch), struct.pack("<I", sr),
-        struct.pack("<I", byte_rate), struct.pack("<H", block_align),
-        struct.pack("<H", bits),
-        b"data", struct.pack("<I", data_size),
-    ])
-def _tts_ws_url() -> str:
-    base = TTS_BASE.rstrip("/")
-    return base.replace("http://", "ws://").replace("https://", "wss://") + "/ws/tts"
 @app.get("/tts/say.stream.wav")
 async def tts_say_stream_wav(
     text: str = Query(..., description="Text to synthesize (live)"),
@@ -163,6 +182,7 @@ async def tts_say_stream_wav(
         try:
             ws_url = _tts_ws_url()
             ws = await websockets.connect(ws_url, ping_interval=None, max_size=8_000_000)
             # init
             await ws.send(json.dumps({
                 "event": "init",
@@ -171,12 +191,13 @@ async def tts_say_stream_wav(
                 "noise_scale": noise_scale,
                 "noise_w": noise_w,
             }))
             sr, ch = 22050, 1
-            # wait for ready -> send header immediately so client can start
             while True:
                 m = await ws.recv()
                 if isinstance(m, (bytes, bytearray)):
-                    # ignore stray audio until we know sr/ch
                     continue
                 try:
                     evt = json.loads(m)
@@ -198,36 +219,29 @@ async def tts_say_stream_wav(
             while True:
                 try:
                     msg = await ws.recv()
-                except websockets.exceptions.ConnectionClosedOK:
-                    break
-                except websockets.exceptions.ConnectionClosedError:
                     break
                 if isinstance(msg, (bytes, bytearray)):
-                    # raw PCM16 frame from TTS; just yield
                     if msg:
                         yield msg
                     continue
-                # control event
                 try:
                     evt = json.loads(msg)
                 except Exception:
                     continue
-                kind = evt.get("event")
-                if kind in ("done", "end"):
                     break
-                if kind == "error":
-                    # propagate as bytes (don’t raise; avoid chunk abort)
-                    detail = evt.get("detail", "tts error")
-                    yield f'ERROR: {detail}'.encode("utf-8")
                     break
                 # ignore logs
         except Exception as e:
-            # Log, but don't raise (raising here aborts chunked stream & causes 'incomplete chunked read')
             write_event({"type":"tts_stream_err","err":str(e)})
-            # a tiny trailing pad keeps some clients happy
             yield b""
         finally:
             try:

+# brain_app.py — Brain Space: STT → TTS coordinator + LIVE TTS streaming proxy
+import os, json, time, asyncio, tempfile
 from typing import AsyncGenerator, Dict, Any, Optional
 from fastapi import FastAPI, Request, Query, UploadFile
 from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
+import httpx
+import websockets
 # === Directories ===
 BASE_DIR  = os.environ.get("BASE_DIR", "/tmp/brain_app")
 NOISE_SCALE    = float(os.environ.get("NOISE_SCALE", "0.33"))
 NOISE_W        = float(os.environ.get("NOISE_W", "0.92"))
+app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.1.0")
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
 def write_event(event: Dict[str, Any]) -> None:
     r = max(80, min(320, rate_wpm))
     return round(base / float(r), 3)
+def _tts_ws_url() -> str:
+    """
+    Build the TTS WebSocket URL from TTS_BASE.
+    e.g. https://Percy3822-ActualTTS.hf.space -> wss://Percy3822-ActualTTS.hf.space/ws/tts
+    """
+    base = (TTS_BASE or "").rstrip("/")
+    if base.startswith("https://"):
+        return "wss://" + base[len("https://"):] + "/ws/tts"
+    if base.startswith("http://"):
+        return "ws://" + base[len("http://"):] + "/ws/tts"
+    return (base + "/ws/tts") if not base.endswith("/ws/tts") else base
+def _wav_header(sr: int, ch: int) -> bytes:
+    """Minimal PCM16 WAV header with large data size for streaming."""
+    bits = 16
+    byte_rate   = sr * ch * (bits // 8)
+    block_align = ch * (bits // 8)
+    data_size   = 0x7FFFFFFF
+    riff_size   = (36 + data_size) & 0xFFFFFFFF
+    return (
+        b"RIFF" +
+        riff_size.to_bytes(4, "little") +
+        b"WAVE" +
+        b"fmt " + (16).to_bytes(4, "little") +
+        (1).to_bytes(2, "little") +        # PCM
+        (ch).to_bytes(2, "little") +
+        (sr).to_bytes(4, "little") +
+        (byte_rate).to_bytes(4, "little") +
+        (block_align).to_bytes(2, "little") +
+        (bits).to_bytes(2, "little") +
+        b"data" + data_size.to_bytes(4, "little")
+    )
 # ---------- Health ----------
 @app.get("/health")
 def health():
     return StreamingResponse(gen(), media_type="text/event-stream",
                              headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
+# ---------- TTS proxy streaming (/tts/say.wav) ----------
+# GET:  /tts/say.wav?text=...&voice=...&rate_wpm=165
+# POST: JSON {"text": "...", "voice": "...", "rate_wpm": 165}
 async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
                                 noise_scale: float, noise_w: float) -> StreamingResponse:
     length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
     params = {
         "text": text,
     write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
     return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
+# ---------- LIVE TTS WS → HTTP WAV streaming ----------
+# GET: /tts/say.stream.wav?text=...&voice=...&rate_wpm=165
 @app.get("/tts/say.stream.wav")
 async def tts_say_stream_wav(
     text: str = Query(..., description="Text to synthesize (live)"),
         try:
             ws_url = _tts_ws_url()
             ws = await websockets.connect(ws_url, ping_interval=None, max_size=8_000_000)
             # init
             await ws.send(json.dumps({
                 "event": "init",
                 "noise_scale": noise_scale,
                 "noise_w": noise_w,
             }))
             sr, ch = 22050, 1
+            # wait for ready -> send WAV header immediately
             while True:
                 m = await ws.recv()
                 if isinstance(m, (bytes, bytearray)):
+                    # ignore until we know sr/ch
                     continue
                 try:
                     evt = json.loads(m)
             while True:
                 try:
                     msg = await ws.recv()
+                except websockets.exceptions.ConnectionClosed:
                     break
                 if isinstance(msg, (bytes, bytearray)):
                     if msg:
                         yield msg
                     continue
                 try:
                     evt = json.loads(msg)
                 except Exception:
                     continue
+                k = evt.get("event")
+                if k in ("done", "end"):
                     break
+                if k == "error":
+                    d = evt.get("detail", "tts error")
+                    yield f'ERROR: {d}'.encode("utf-8")
                     break
                 # ignore logs
         except Exception as e:
             write_event({"type":"tts_stream_err","err":str(e)})
             yield b""
         finally:
             try: