Spaces:

Percy3822
/

Brain

Paused

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

0bb83c8

verified ·

1 Parent(s): b9cabc3

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -98

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# brain_app.py — Brain Space: STT → TTS proxy streamer
 import os, json, time, asyncio, tempfile
 from typing import AsyncGenerator, Dict, Any, Optional
 from fastapi import FastAPI, Request, Query, UploadFile
@@ -14,8 +13,7 @@ for p in (BASE_DIR, FILES_DIR, LOGS_DIR):
 # === External Spaces ===
 TTS_BASE  = os.environ.get("TTS_BASE",  "https://Percy3822-ActualTTS.hf.space")
-# TODO: set your STT Space base here (example):
-STT_BASE  = os.environ.get("STT_BASE",  "https://YOUR-STT-SPACE.hf.space")
 # === TTS defaults ===
 DEFAULT_VOICE  = os.environ.get("DEFAULT_VOICE", "en_US-amy-medium")
@@ -23,8 +21,7 @@ BASE_WPM       = int(os.environ.get("BASE_WPM", "165"))
 NOISE_SCALE    = float(os.environ.get("NOISE_SCALE", "0.33"))
 NOISE_W        = float(os.environ.get("NOISE_W", "0.92"))
-# === App ===
-app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.0.0")
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
 def write_event(event: Dict[str, Any]) -> None:
@@ -43,6 +40,7 @@ def rate_to_length_scale(rate_wpm: Optional[int]) -> float:
     r = max(80, min(320, rate_wpm))
     return round(base / float(r), 3)
 @app.get("/health")
 def health():
     return {
@@ -55,7 +53,7 @@ def health():
         "defaults": {"voice": DEFAULT_VOICE, "rate_wpm": BASE_WPM}
     }
-# ========== SSE logs (optional) ==========
 @app.get("/stream/logs")
 async def stream_logs() -> StreamingResponse:
     async def gen() -> AsyncGenerator[bytes, None]:
@@ -72,34 +70,11 @@ async def stream_logs() -> StreamingResponse:
     return StreamingResponse(gen(), media_type="text/event-stream",
                              headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
-# ---------- Helpers ----------
-async def _download_to_temp(url: str) -> str:
-    import httpx, os
-    _, ext = os.path.splitext(url.split("?")[0])
-    if not ext: ext = ".wav"
-    fd, tmp_path = tempfile.mkstemp(prefix="mic_", suffix=ext)
-    os.close(fd)
-    async with httpx.AsyncClient(timeout=300) as client:
-        r = await client.get(url)
-        r.raise_for_status()
-        with open(tmp_path, "wb") as f:
-            f.write(r.content)
-    return tmp_path
-async def _call_stt_transcribe_file(path: str) -> Dict[str, Any]:
-    """POST multipart 'audio' to STT /stt/transcribe and return its JSON."""
-    import httpx
-    stt_url = f"{STT_BASE}/stt/transcribe"
-    files = {"audio": (os.path.basename(path), open(path, "rb"), "audio/wav")}
-    async with httpx.AsyncClient(timeout=300) as client:
-        r = await client.post(stt_url, files=files)
-        ok = r.status_code == 200
-        data = r.json() if ok else {"ok": False, "error": f"STT {r.status_code}"}
-    return data
 async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
                                 noise_scale: float, noise_w: float) -> StreamingResponse:
-    """Proxy stream from TTS /speak.wav based on text."""
     import httpx
     length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
     params = {
@@ -113,6 +88,7 @@ async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
         async with httpx.AsyncClient(timeout=None) as client:
             async with client.stream("GET", f"{TTS_BASE}/speak.wav", params=params) as resp:
                 if resp.status_code != 200:
                     yield (await resp.aread())
                     return
                 async for chunk in resp.aiter_bytes():
@@ -120,77 +96,37 @@ async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
                         yield chunk
     return StreamingResponse(gen(), media_type="audio/wav", headers={"Cache-Control":"no-cache"})
-# ========== The simple end-to-end endpoint ==========
-@app.post("/demo/relay.wav")
-async def demo_relay_wav(
-    req: Request,
     voice: str = Query(DEFAULT_VOICE),
     rate_wpm: Optional[int] = Query(BASE_WPM),
     noise_scale: float = Query(NOISE_SCALE),
     noise_w: float = Query(NOISE_W),
 ):
-    """
-    Accept 5s mic recording from client (multipart 'audio' or JSON {file_url}),
-    send to STT Space for transcription, then IMMEDIATELY proxy stream TTS WAV
-    that speaks back what was heard.
-    """
-    tmp_path = None
-    try:
-        # Ingest audio
-        ctype = (req.headers.get("content-type") or "").lower()
-        if "multipart/form-data" in ctype:
-            form = await req.form()
-            up: UploadFile = form.get("audio")
-            if not up:
-                return JSONResponse({"ok": False, "error": "Missing 'audio' file"}, status_code=400)
-            import os, tempfile
-            suffix = os.path.splitext(up.filename or "")[1] or ".wav"
-            fd, tmp_path = tempfile.mkstemp(prefix="mic_", suffix=suffix)
-            os.close(fd)
-            with open(tmp_path, "wb") as f:
-                f.write(await up.read())
-        else:
-            # JSON with {file_url}
-            try:
-                body = await req.json()
-            except Exception:
-                body = {}
-            url = (body or {}).get("file_url")
-            if not url:
-                return JSONResponse({"ok": False, "error": "Provide multipart 'audio' or JSON {file_url}"},
-                                    status_code=400)
-            tmp_path = await _download_to_temp(url)
-        # STT
-        stt = await _call_stt_transcribe_file(tmp_path)
-        if not stt.get("ok"):
-            write_event({"type":"relay","ok":False,"stage":"stt","err":stt.get("error")})
-            return JSONResponse({"ok": False, "error": f"STT failed: {stt.get('error')}"}, status_code=502)
-        text = (stt.get("text") or "").strip()
-        if not text:
-            write_event({"type":"relay","ok":False,"stage":"stt","err":"empty transcript"})
-            return JSONResponse({"ok": False, "error": "No speech detected"}, status_code=422)
-        # Brain reply (for demo we just echo; you can replace with actual brain logic later)
-        reply_text = f"I heard: {text}"[:800]
-        write_event({"type":"relay","ok":True,"heard_len":len(text),"voice":voice,"rate_wpm":rate_wpm})
-        # TTS proxy stream (immediate)
-        return await _proxy_tts_wav_stream(reply_text, voice, rate_wpm, noise_scale, noise_w)
-    except Exception as e:
-        write_event({"type":"relay","ok":False,"err":str(e)})
-        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
-    finally:
-        try:
-            if tmp_path and os.path.exists(tmp_path):
-                os.unlink(tmp_path)
-        except Exception:
-            pass
-# Optional: serve saved files if you decide to persist later
 @app.get("/files/{name}")
 def get_file(name: str):
     path = os.path.join(FILES_DIR, name)
@@ -200,4 +136,4 @@ def get_file(name: str):
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("brain_app:app", host="0.0.0.0", port=7861, reload=False)

 import os, json, time, asyncio, tempfile
 from typing import AsyncGenerator, Dict, Any, Optional
 from fastapi import FastAPI, Request, Query, UploadFile
 # === External Spaces ===
 TTS_BASE  = os.environ.get("TTS_BASE",  "https://Percy3822-ActualTTS.hf.space")
+STT_BASE  = os.environ.get("STT_BASE",  "https://Percy3822-ActualSTT.hf.space")  # set to your STT Space
 # === TTS defaults ===
 DEFAULT_VOICE  = os.environ.get("DEFAULT_VOICE", "en_US-amy-medium")
 NOISE_SCALE    = float(os.environ.get("NOISE_SCALE", "0.33"))
 NOISE_W        = float(os.environ.get("NOISE_W", "0.92"))
+app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.1.0")
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
 def write_event(event: Dict[str, Any]) -> None:
     r = max(80, min(320, rate_wpm))
     return round(base / float(r), 3)
+# ---------- Health ----------
 @app.get("/health")
 def health():
     return {
         "defaults": {"voice": DEFAULT_VOICE, "rate_wpm": BASE_WPM}
     }
+# ---------- SSE logs (optional) ----------
 @app.get("/stream/logs")
 async def stream_logs() -> StreamingResponse:
     async def gen() -> AsyncGenerator[bytes, None]:
     return StreamingResponse(gen(), media_type="text/event-stream",
                              headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
+# ---------- TTS proxy streaming (/tts/say.wav) ----------
+# GET:  /tts/say.wav?text=...&voice=...&rate_wpm=165
+# POST: JSON {"text": "...", "voice": "...", "rate_wpm": 165}
 async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
                                 noise_scale: float, noise_w: float) -> StreamingResponse:
     import httpx
     length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
     params = {
         async with httpx.AsyncClient(timeout=None) as client:
             async with client.stream("GET", f"{TTS_BASE}/speak.wav", params=params) as resp:
                 if resp.status_code != 200:
+                    # bubble up exact error body from TTS
                     yield (await resp.aread())
                     return
                 async for chunk in resp.aiter_bytes():
                         yield chunk
     return StreamingResponse(gen(), media_type="audio/wav", headers={"Cache-Control":"no-cache"})
+@app.get("/tts/say.wav")
+async def tts_say_wav_get(
+    text: str = Query(..., description="Text to synthesize"),
     voice: str = Query(DEFAULT_VOICE),
     rate_wpm: Optional[int] = Query(BASE_WPM),
     noise_scale: float = Query(NOISE_SCALE),
     noise_w: float = Query(NOISE_W),
 ):
+    write_event({"type":"tts_get","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
+    return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_scale, noise_w)
+@app.post("/tts/say.wav")
+async def tts_say_wav_post(req: Request):
+    try:
+        body = await req.json()
+    except Exception:
+        return JSONResponse({"ok": False, "error": "Invalid JSON body"}, status_code=400)
+    text = (body.get("text") or "").strip()
+    if not text:
+        return JSONResponse({"ok": False, "error": "Missing text"}, status_code=400)
+    voice     = (body.get("voice") or DEFAULT_VOICE).strip()
+    rate_wpm  = int(body.get("rate_wpm", BASE_WPM)) if body.get("rate_wpm") is not None else BASE_WPM
+    noise_s   = float(body.get("noise_scale", NOISE_SCALE))
+    noise_wgt = float(body.get("noise_w", NOISE_W))
+    write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
+    return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
+# ---------- (Optional) simple relay demo kept for later ----------
+# You can keep your /demo/relay.wav here if you still want the file-upload STT→TTS demo.
+# ---------- Optional: serve saved files later ----------
 @app.get("/files/{name}")
 def get_file(name: str):
     path = os.path.join(FILES_DIR, name)
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run("brain_app:app", host="0.0.0.0", port=7861, reload=False)