Percy3822 commited on
Commit
3816405
·
verified ·
1 Parent(s): cf40d2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -40
app.py CHANGED
@@ -1,9 +1,12 @@
1
- import os, json, time, asyncio, tempfile, struct
 
 
2
  from typing import AsyncGenerator, Dict, Any, Optional
3
  from fastapi import FastAPI, Request, Query, UploadFile
4
  from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
5
 
6
- import websockets # NEW
 
7
 
8
  # === Directories ===
9
  BASE_DIR = os.environ.get("BASE_DIR", "/tmp/brain_app")
@@ -23,7 +26,7 @@ BASE_WPM = int(os.environ.get("BASE_WPM", "165"))
23
  NOISE_SCALE = float(os.environ.get("NOISE_SCALE", "0.33"))
24
  NOISE_W = float(os.environ.get("NOISE_W", "0.92"))
25
 
26
- app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.2.0")
27
  log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
28
 
29
  def write_event(event: Dict[str, Any]) -> None:
@@ -42,6 +45,39 @@ def rate_to_length_scale(rate_wpm: Optional[int]) -> float:
42
  r = max(80, min(320, rate_wpm))
43
  return round(base / float(r), 3)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # ---------- Health ----------
46
  @app.get("/health")
47
  def health():
@@ -72,10 +108,11 @@ async def stream_logs() -> StreamingResponse:
72
  return StreamingResponse(gen(), media_type="text/event-stream",
73
  headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
74
 
75
- # ---------- TTS proxy (file-mode, not live) ----------
 
 
76
  async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
77
  noise_scale: float, noise_w: float) -> StreamingResponse:
78
- import httpx
79
  length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
80
  params = {
81
  "text": text,
@@ -122,26 +159,8 @@ async def tts_say_wav_post(req: Request):
122
  write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
123
  return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
124
 
125
- # ---------- NEW: true LIVE streaming via TTS WebSocket ----------
126
- def _wav_header(sr: int, ch: int, bits: int = 16) -> bytes:
127
- byte_rate = sr * ch * (bits // 8)
128
- block_align = ch * (bits // 8)
129
- # streaming data size; many players accept 0xFFFFFFFF as "unknown length"
130
- data_size = 0xFFFFFFFF
131
- riff_size = 36 + data_size
132
- return b"".join([
133
- b"RIFF", struct.pack("<I", riff_size), b"WAVE",
134
- b"fmt ", struct.pack("<I", 16), struct.pack("<H", 1),
135
- struct.pack("<H", ch), struct.pack("<I", sr),
136
- struct.pack("<I", byte_rate), struct.pack("<H", block_align),
137
- struct.pack("<H", bits),
138
- b"data", struct.pack("<I", data_size),
139
- ])
140
-
141
- def _tts_ws_url() -> str:
142
- base = TTS_BASE.rstrip("/")
143
- return base.replace("http://", "ws://").replace("https://", "wss://") + "/ws/tts"
144
-
145
  @app.get("/tts/say.stream.wav")
146
  async def tts_say_stream_wav(
147
  text: str = Query(..., description="Text to synthesize (live)"),
@@ -163,6 +182,7 @@ async def tts_say_stream_wav(
163
  try:
164
  ws_url = _tts_ws_url()
165
  ws = await websockets.connect(ws_url, ping_interval=None, max_size=8_000_000)
 
166
  # init
167
  await ws.send(json.dumps({
168
  "event": "init",
@@ -171,12 +191,13 @@ async def tts_say_stream_wav(
171
  "noise_scale": noise_scale,
172
  "noise_w": noise_w,
173
  }))
 
174
  sr, ch = 22050, 1
175
- # wait for ready -> send header immediately so client can start
176
  while True:
177
  m = await ws.recv()
178
  if isinstance(m, (bytes, bytearray)):
179
- # ignore stray audio until we know sr/ch
180
  continue
181
  try:
182
  evt = json.loads(m)
@@ -198,36 +219,29 @@ async def tts_say_stream_wav(
198
  while True:
199
  try:
200
  msg = await ws.recv()
201
- except websockets.exceptions.ConnectionClosedOK:
202
- break
203
- except websockets.exceptions.ConnectionClosedError:
204
  break
205
 
206
  if isinstance(msg, (bytes, bytearray)):
207
- # raw PCM16 frame from TTS; just yield
208
  if msg:
209
  yield msg
210
  continue
211
 
212
- # control event
213
  try:
214
  evt = json.loads(msg)
215
  except Exception:
216
  continue
217
- kind = evt.get("event")
218
- if kind in ("done", "end"):
219
  break
220
- if kind == "error":
221
- # propagate as bytes (don’t raise; avoid chunk abort)
222
- detail = evt.get("detail", "tts error")
223
- yield f'ERROR: {detail}'.encode("utf-8")
224
  break
225
  # ignore logs
226
 
227
  except Exception as e:
228
- # Log, but don't raise (raising here aborts chunked stream & causes 'incomplete chunked read')
229
  write_event({"type":"tts_stream_err","err":str(e)})
230
- # a tiny trailing pad keeps some clients happy
231
  yield b""
232
  finally:
233
  try:
 
1
+ # brain_app.py Brain Space: STT → TTS coordinator + LIVE TTS streaming proxy
2
+
3
+ import os, json, time, asyncio, tempfile
4
  from typing import AsyncGenerator, Dict, Any, Optional
5
  from fastapi import FastAPI, Request, Query, UploadFile
6
  from fastapi.responses import JSONResponse, StreamingResponse, FileResponse
7
 
8
+ import httpx
9
+ import websockets
10
 
11
  # === Directories ===
12
  BASE_DIR = os.environ.get("BASE_DIR", "/tmp/brain_app")
 
26
  NOISE_SCALE = float(os.environ.get("NOISE_SCALE", "0.33"))
27
  NOISE_W = float(os.environ.get("NOISE_W", "0.92"))
28
 
29
+ app = FastAPI(title="Brain Space (STT→TTS coordinator)", version="3.1.0")
30
  log_queue: "asyncio.Queue[Dict[str, Any]]" = asyncio.Queue()
31
 
32
  def write_event(event: Dict[str, Any]) -> None:
 
45
  r = max(80, min(320, rate_wpm))
46
  return round(base / float(r), 3)
47
 
48
+ def _tts_ws_url() -> str:
49
+ """
50
+ Build the TTS WebSocket URL from TTS_BASE.
51
+ e.g. https://Percy3822-ActualTTS.hf.space -> wss://Percy3822-ActualTTS.hf.space/ws/tts
52
+ """
53
+ base = (TTS_BASE or "").rstrip("/")
54
+ if base.startswith("https://"):
55
+ return "wss://" + base[len("https://"):] + "/ws/tts"
56
+ if base.startswith("http://"):
57
+ return "ws://" + base[len("http://"):] + "/ws/tts"
58
+ return (base + "/ws/tts") if not base.endswith("/ws/tts") else base
59
+
60
+ def _wav_header(sr: int, ch: int) -> bytes:
61
+ """Minimal PCM16 WAV header with large data size for streaming."""
62
+ bits = 16
63
+ byte_rate = sr * ch * (bits // 8)
64
+ block_align = ch * (bits // 8)
65
+ data_size = 0x7FFFFFFF
66
+ riff_size = (36 + data_size) & 0xFFFFFFFF
67
+ return (
68
+ b"RIFF" +
69
+ riff_size.to_bytes(4, "little") +
70
+ b"WAVE" +
71
+ b"fmt " + (16).to_bytes(4, "little") +
72
+ (1).to_bytes(2, "little") + # PCM
73
+ (ch).to_bytes(2, "little") +
74
+ (sr).to_bytes(4, "little") +
75
+ (byte_rate).to_bytes(4, "little") +
76
+ (block_align).to_bytes(2, "little") +
77
+ (bits).to_bytes(2, "little") +
78
+ b"data" + data_size.to_bytes(4, "little")
79
+ )
80
+
81
  # ---------- Health ----------
82
  @app.get("/health")
83
  def health():
 
108
  return StreamingResponse(gen(), media_type="text/event-stream",
109
  headers={"Cache-Control":"no-cache","Connection":"keep-alive"})
110
 
111
+ # ---------- TTS proxy streaming (/tts/say.wav) ----------
112
+ # GET: /tts/say.wav?text=...&voice=...&rate_wpm=165
113
+ # POST: JSON {"text": "...", "voice": "...", "rate_wpm": 165}
114
  async def _proxy_tts_wav_stream(text: str, voice: str, rate_wpm: Optional[int],
115
  noise_scale: float, noise_w: float) -> StreamingResponse:
 
116
  length_scale = rate_to_length_scale(rate_wpm) if rate_wpm is not None else rate_to_length_scale(BASE_WPM)
117
  params = {
118
  "text": text,
 
159
  write_event({"type":"tts_post","len":len(text),"voice":voice,"rate_wpm":rate_wpm})
160
  return await _proxy_tts_wav_stream(text, voice, rate_wpm, noise_s, noise_wgt)
161
 
162
+ # ---------- LIVE TTS WS HTTP WAV streaming ----------
163
+ # GET: /tts/say.stream.wav?text=...&voice=...&rate_wpm=165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  @app.get("/tts/say.stream.wav")
165
  async def tts_say_stream_wav(
166
  text: str = Query(..., description="Text to synthesize (live)"),
 
182
  try:
183
  ws_url = _tts_ws_url()
184
  ws = await websockets.connect(ws_url, ping_interval=None, max_size=8_000_000)
185
+
186
  # init
187
  await ws.send(json.dumps({
188
  "event": "init",
 
191
  "noise_scale": noise_scale,
192
  "noise_w": noise_w,
193
  }))
194
+
195
  sr, ch = 22050, 1
196
+ # wait for ready -> send WAV header immediately
197
  while True:
198
  m = await ws.recv()
199
  if isinstance(m, (bytes, bytearray)):
200
+ # ignore until we know sr/ch
201
  continue
202
  try:
203
  evt = json.loads(m)
 
219
  while True:
220
  try:
221
  msg = await ws.recv()
222
+ except websockets.exceptions.ConnectionClosed:
 
 
223
  break
224
 
225
  if isinstance(msg, (bytes, bytearray)):
 
226
  if msg:
227
  yield msg
228
  continue
229
 
 
230
  try:
231
  evt = json.loads(msg)
232
  except Exception:
233
  continue
234
+ k = evt.get("event")
235
+ if k in ("done", "end"):
236
  break
237
+ if k == "error":
238
+ d = evt.get("detail", "tts error")
239
+ yield f'ERROR: {d}'.encode("utf-8")
 
240
  break
241
  # ignore logs
242
 
243
  except Exception as e:
 
244
  write_event({"type":"tts_stream_err","err":str(e)})
 
245
  yield b""
246
  finally:
247
  try: