Spaces:

Percy3822
/

Brain

Paused

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

ab5c984

verified ·

1 Parent(s): 2dda111

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -4

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ STT_COMPUTE = os.environ.get("STT_COMPUTE", "int8") # "int8"|"int8_float16"|"flo
 STT_MAXLEN_S = float(os.environ.get("STT_MAXLEN_S", "600"))  # refuse extremely long uploads
 # ========== App ==========
-app = FastAPI(title="Brain Space (TTS+STT)", version="2.0.0")
 # In-memory queue to fan-out logs to /stream/logs clients
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
@@ -249,7 +249,6 @@ async def _download_to_temp(url: str) -> str:
 def _transcribe_path(path: str, language: Optional[str] = None) -> Dict[str, Any]:
     model = _stt_model()
-    # NOTE: sticking to CPU-friendly settings; adjust if you move to GPU
     segments, info = model.transcribe(
         path,
         language=language,             # "en" or None for auto
@@ -263,8 +262,8 @@ def _transcribe_path(path: str, language: Optional[str] = None) -> Dict[str, Any
     for seg in segments:
         out_segments.append({"start": seg.start, "end": seg.end, "text": seg.text})
         txt_parts.append(seg.text)
-        # guard against absurdly long files if decoder doesn't report duration
-        if STT_MAXLEN_S and len(out_segments) > 0 and dur and seg.end and float(seg.end) > STT_MAXLEN_S:
             break
     text = "".join(txt_parts).strip()
     return {"text": text, "language": getattr(info, "language", language or "unknown"), "duration": dur, "segments": out_segments}
@@ -319,6 +318,84 @@ async def stt_transcribe(
         except Exception:
             pass
 # ========== Optional direct runner ==========
 if __name__ == "__main__":
     import uvicorn

 STT_MAXLEN_S = float(os.environ.get("STT_MAXLEN_S", "600"))  # refuse extremely long uploads
 # ========== App ==========
+app = FastAPI(title="Brain Space (TTS+STT)", version="2.1.0")
 # In-memory queue to fan-out logs to /stream/logs clients
 log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
 def _transcribe_path(path: str, language: Optional[str] = None) -> Dict[str, Any]:
     model = _stt_model()
     segments, info = model.transcribe(
         path,
         language=language,             # "en" or None for auto
     for seg in segments:
         out_segments.append({"start": seg.start, "end": seg.end, "text": seg.text})
         txt_parts.append(seg.text)
+        # guard against absurdly long files
+        if STT_MAXLEN_S and dur and seg.end and float(seg.end) > STT_MAXLEN_S:
             break
     text = "".join(txt_parts).strip()
     return {"text": text, "language": getattr(info, "language", language or "unknown"), "duration": dur, "segments": out_segments}
         except Exception:
             pass
+# --- End-to-end: STT -> Brain -> TTS (streamed WAV) ---
+@app.post("/demo/echo.wav")
+async def demo_echo_wav(
+    req: Request,
+    voice: str = Query(DEFAULT_VOICE, description="Voice id (TTS)"),
+    rate_wpm: Optional[int] = Query(None, description="Words-per-minute -> length_scale"),
+    length_scale: Optional[float] = Query(None, description="Override prosody"),
+    noise_scale: float = Query(NOISE_SCALE),
+    noise_w: float = Query(NOISE_W),
+    save: bool = Query(False, description="Also save output WAV under /files"),
+):
+    """
+    POST either:
+      - multipart/form-data with 'audio' file
+      - or JSON: { "file_url": "https://..." }
+    Returns: streaming audio/wav that says what it heard.
+    """
+    tmp_path = None
+    try:
+        # --- Ingest audio (multipart or JSON URL) ---
+        content_type = req.headers.get("content-type", "").lower()
+        if "multipart/form-data" in content_type:
+            form = await req.form()
+            up = form.get("audio")  # UploadFile
+            if not up:
+                return JSONResponse({"ok": False, "error": "Missing 'audio' file"}, status_code=400)
+            suffix = os.path.splitext(getattr(up, "filename", "") or "")[1] or ".wav"
+            fd, tmp_path = tempfile.mkstemp(prefix="demo_echo_", suffix=suffix)
+            os.close(fd)
+            with open(tmp_path, "wb") as f:
+                f.write(await up.read())
+        else:
+            # JSON with file_url
+            try:
+                body = await req.json()
+            except Exception:
+                body = {}
+            url = (body or {}).get("file_url")
+            if not url:
+                return JSONResponse({"ok": False, "error": "Provide multipart 'audio' or JSON {file_url}"}, status_code=400)
+            tmp_path = await _download_to_temp(url)
+        # --- STT ---
+        stt_res = _transcribe_path(tmp_path, language=None)
+        text = (stt_res.get("text") or "").strip()
+        if not text:
+            write_event({"type": "demo_echo", "data": {"ok": False, "error": "No speech detected"}})
+            return JSONResponse({"ok": False, "error": "No speech detected"}, status_code=422)
+        # --- Brain reply (simple confirmation) ---
+        reply_text = f"I heard: {text}"
+        reply_text = reply_text[:800]  # safety bound
+        # Prosody parameters
+        ls = float(length_scale) if length_scale is not None else rate_to_length_scale(rate_wpm if rate_wpm is not None else BASE_WPM)
+        write_event({"type": "demo_echo", "data": {"ok": True, "heard_len": len(text), "voice": voice, "ls": ls, "save": save}})
+        # --- TTS (stream WAV back to the caller) ---
+        return await _proxy_tts_wav_stream(
+            text=reply_text,
+            voice=voice,
+            length_scale=ls,
+            noise_scale=noise_scale,
+            noise_w=noise_w,
+            save_local=save
+        )
+    except Exception as e:
+        write_event({"type": "demo_echo", "data": {"ok": False, "error": str(e)}})
+        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
+    finally:
+        try:
+            if tmp_path and os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+        except Exception:
+            pass
 # ========== Optional direct runner ==========
 if __name__ == "__main__":
     import uvicorn