Percy3822 commited on
Commit
a20b7d7
·
verified ·
1 Parent(s): 0bb83c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -8
app.py CHANGED
@@ -1,8 +1,10 @@
1
- import os, json, time, asyncio, tempfile
2
  from typing import AsyncGenerator, Dict, Any, Optional
3
  from fastapi import FastAPI, Request, Query, UploadFile
4
  from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
5
 
 
 
6
  # === Directories ===
7
  BASE_DIR = os.environ.get("BASE_DIR", "/tmp/brain_app")
8
  FILES_DIR = os.path.join(BASE_DIR, "files")
@@ -21,7 +23,7 @@ BASE_WPM = int(os.environ.get("BASE_WPM", "165"))
21
  NOISE_SCALE = float(os.environ.get("NOISE_SCALE", "0.33"))
22
  NOISE_W = float(os.environ.get("NOISE_W", "0.92"))
23
 
24
- app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.1.0")
25
  log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
26
 
27
  def write_event(event: Dict[str, Any]) -> None:
@@ -70,9 +72,7 @@ async def stream_logs() -> StreamingResponse:
70
  return StreamingResponse(gen(), media_type="text/event-stream",
71
  headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
72
 
73
- # ---------- TTS proxy streaming (/tts/say.wav) ----------
74
- # GET: /tts/say.wav?text=...&voice=...&rate_wpm=165
75
- # POST: JSON {"text": "...", "voice": "...", "rate_wpm": 165}
76
  async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
77
  noise_scale: float, noise_w: float) -> StreamingResponse:
78
  import httpx
@@ -88,7 +88,6 @@ async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
88
  async with httpx.AsyncClient(timeout=None) as client:
89
  async with client.stream("GET", f"{TTS_BASE}/speak.wav", params=params) as resp:
90
  if resp.status_code != 200:
91
- # bubble up exact error body from TTS
92
  yield (await resp.aread())
93
  return
94
  async for chunk in resp.aiter_bytes():
@@ -123,8 +122,97 @@ async def tts_say_wav_post(req: Request):
123
  write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
124
  return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
125
 
126
- # ---------- (Optional) simple relay demo kept for later ----------
127
- # You can keep your /demo/relay.wav here if you still want the file-upload STT→TTS demo.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  # ---------- Optional: serve saved files later ----------
130
  @app.get("/files/{name}")
 
1
+ import os, json, time, asyncio, tempfile, struct
2
  from typing import AsyncGenerator, Dict, Any, Optional
3
  from fastapi import FastAPI, Request, Query, UploadFile
4
  from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
5
 
6
+ import websockets # NEW
7
+
8
  # === Directories ===
9
  BASE_DIR = os.environ.get("BASE_DIR", "/tmp/brain_app")
10
  FILES_DIR = os.path.join(BASE_DIR, "files")
 
23
  NOISE_SCALE = float(os.environ.get("NOISE_SCALE", "0.33"))
24
  NOISE_W = float(os.environ.get("NOISE_W", "0.92"))
25
 
26
+ app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.2.0")
27
  log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
28
 
29
  def write_event(event: Dict[str, Any]) -> None:
 
72
  return StreamingResponse(gen(), media_type="text/event-stream",
73
  headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
74
 
75
+ # ---------- TTS proxy (file-mode, not live) ----------
 
 
76
  async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
77
  noise_scale: float, noise_w: float) -> StreamingResponse:
78
  import httpx
 
88
  async with httpx.AsyncClient(timeout=None) as client:
89
  async with client.stream("GET", f"{TTS_BASE}/speak.wav", params=params) as resp:
90
  if resp.status_code != 200:
 
91
  yield (await resp.aread())
92
  return
93
  async for chunk in resp.aiter_bytes():
 
122
  write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
123
  return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
124
 
125
+ # ---------- NEW: true LIVE streaming via TTS WebSocket ----------
126
+ def _wav_header(sr: int, ch: int, bits: int = 16) -> bytes:
127
+ byte_rate = sr * ch * (bits // 8)
128
+ block_align = ch * (bits // 8)
129
+ # streaming data size; many players accept 0xFFFFFFFF as "unknown length"
130
+ data_size = 0xFFFFFFFF
131
+ riff_size = 36 + data_size
132
+ return b"".join([
133
+ b"RIFF", struct.pack("<I", riff_size), b"WAVE",
134
+ b"fmt ", struct.pack("<I", 16), struct.pack("<H", 1),
135
+ struct.pack("<H", ch), struct.pack("<I", sr),
136
+ struct.pack("<I", byte_rate), struct.pack("<H", block_align),
137
+ struct.pack("<H", bits),
138
+ b"data", struct.pack("<I", data_size),
139
+ ])
140
+
141
+ def _tts_ws_url() -> str:
142
+ base = TTS_BASE.rstrip("/")
143
+ return base.replace("http://", "ws://").replace("https://", "wss://") + "/ws/tts"
144
+
145
+ @app.get("/tts/say.stream.wav")
146
+ async def tts_say_stream_wav(
147
+ text: str = Query(..., description="Text to synthesize (live)"),
148
+ voice: str = Query(DEFAULT_VOICE),
149
+ rate_wpm: Optional[int] = Query(BASE_WPM),
150
+ length_scale: Optional[float] = Query(None),
151
+ noise_scale: float = Query(NOISE_SCALE),
152
+ noise_w: float = Query(NOISE_W),
153
+ ):
154
+ """
155
+ LIVE streaming proxy: TTS WS (raw PCM16) -> HTTP chunked WAV.
156
+ Starts emitting audio as soon as the TTS starts producing frames.
157
+ """
158
+ ls = float(length_scale) if length_scale is not None else rate_to_length_scale(rate_wpm or BASE_WPM)
159
+ write_event({"type":"tts_stream_get","len":len(text),"voice":voice,"ls":ls})
160
+
161
+ async def gen():
162
+ ws_url = _tts_ws_url()
163
+ async with websockets.connect(ws_url, ping_interval=None, max_size=8_000_000) as ws:
164
+ # init
165
+ await ws.send(json.dumps({
166
+ "event": "init",
167
+ "voice": voice,
168
+ "length_scale": ls,
169
+ "noise_scale": noise_scale,
170
+ "noise_w": noise_w,
171
+ }))
172
+ sr, ch = 22050, 1
173
+ # wait for ready -> send header
174
+ while True:
175
+ m = await ws.recv()
176
+ if isinstance(m, (bytes, bytearray)):
177
+ # unlikely before ready; ignore
178
+ continue
179
+ try:
180
+ evt = json.loads(m)
181
+ except Exception:
182
+ continue
183
+ if evt.get("event") == "ready":
184
+ sr = int(evt.get("sr", 22050))
185
+ ch = int(evt.get("channels", 1))
186
+ yield _wav_header(sr, ch)
187
+ break
188
+ if evt.get("event") == "error":
189
+ yield f'ERROR: {evt.get("detail","tts init error")}'.encode("utf-8")
190
+ return
191
+
192
+ # speak
193
+ await ws.send(json.dumps({"event": "speak", "text": text}))
194
+
195
+ # pump frames
196
+ while True:
197
+ try:
198
+ msg = await ws.recv()
199
+ except websockets.exceptions.ConnectionClosed:
200
+ break
201
+ if isinstance(msg, (bytes, bytearray)):
202
+ yield msg
203
+ else:
204
+ try:
205
+ evt = json.loads(msg)
206
+ except Exception:
207
+ continue
208
+ if evt.get("event") in ("done", "end"):
209
+ break
210
+ if evt.get("event") == "error":
211
+ yield f'ERROR: {evt.get("detail","tts error")}'.encode("utf-8")
212
+ break
213
+
214
+ return StreamingResponse(gen(), media_type="audio/wav",
215
+ headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
216
 
217
  # ---------- Optional: serve saved files later ----------
218
  @app.get("/files/{name}")