Percy3822 commited on
Commit
fa99ff3
·
verified ·
1 Parent(s): 976e3b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -170
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import asyncio
2
  import json
3
  import os
4
- import re
5
- import shlex
6
- import subprocess
7
  import tarfile
8
  import time
9
  from pathlib import Path
10
- from typing import Optional, Dict, Any
11
 
12
  import uvicorn
13
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
@@ -17,7 +14,7 @@ from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
17
  # Writable directory picker
18
  # -------------------------
19
 
20
- def pick_writable_dir(*candidates: Path) -> Path:
21
  errs = []
22
  for p in candidates:
23
  if not p:
@@ -25,171 +22,129 @@ def pick_writable_dir(*candidates: Path) -> Path:
25
  try:
26
  p.mkdir(parents=True, exist_ok=True)
27
  probe = p / ".probe"
28
- with open(probe, "wb") as f:
29
- f.write(b"ok")
30
  probe.unlink(missing_ok=True)
31
  return p
32
  except Exception as e:
33
  errs.append(f"{p}: {type(e)._name_}({e})")
34
- raise RuntimeError("No writable dir. Tried:\n" + "\n".join(errs))
35
 
36
- # Honors env overrides; then tries common writable places on HF Spaces
37
  ENV_DIR = os.getenv("TTS_DATA_DIR")
38
- VOICES_DIR = None
39
- FILES_DIR = None
40
-
41
- def init_dirs():
42
- global VOICES_DIR, FILES_DIR
43
- cand_voices = []
44
- if ENV_DIR:
45
- cand_voices.append(Path(ENV_DIR) / "voices")
46
- cand_voices += [
47
- Path("/home/user/.cache/actualtts/voices"),
48
- Path("/home/user/voices"),
49
- Path("/tmp/actualtts/voices"),
50
- Path("/dev/shm/actualtts_voices"),
51
- ]
52
- VOICES_DIR = pick_writable_dir(*cand_voices)
53
-
54
- cand_files = []
55
- if ENV_DIR:
56
- cand_files.append(Path(ENV_DIR) / "files")
57
- cand_files += [
58
- Path("/home/user/.cache/actualtts/files"),
59
- Path("/tmp/actualtts/files"),
60
- Path("/dev/shm/actualtts_files"),
61
- ]
62
- FILES_DIR = pick_writable_dir(*cand_files)
63
-
64
- init_dirs()
65
 
66
  # -------------------------
67
  # Piper CLI integration
68
  # -------------------------
69
 
70
- # Piper binary is preinstalled in CPU Spaces images that have Piper CLI.
71
- # If your image differs, set PIPER_BIN env to the correct path.
72
  PIPER_BIN = os.getenv("PIPER_BIN", "piper")
73
 
74
- # A small catalog of good CPU voices (VITS-based) hosted on HF.
75
  HF_VOICES: Dict[str, str] = {
76
  "en_US-libritts-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-libritts-high.onnx.tar.gz",
77
  "en_US-lessac-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-lessac-high.onnx.tar.gz",
78
  "en_US-amy-medium": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-amy-medium.onnx.tar.gz",
79
  }
80
-
81
  DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
82
- DEFAULT_SR = 22050 # Piper typically outputs 22050 Hz
83
- DEFAULT_CHANNELS = 1
84
-
85
- _HTTP_CLIENT = None # lazy import only if needed
86
 
87
-
88
- def _http():
89
- global _HTTP_CLIENT
90
- if _HTTP_CLIENT is None:
91
  import requests
92
- _HTTP_CLIENT = requests.Session()
93
- _HTTP_CLIENT.headers.update({"User-Agent": "ActualTTS/CPU"})
94
- return _HTTP_CLIENT
95
-
96
 
97
  def ensure_voice(voice_id: str) -> Dict[str, Path]:
98
- """
99
- Ensure the given voice is present locally. If missing, download and extract.
100
- Returns {"model": Path, "config": Path}
101
- """
 
102
  vdir = VOICES_DIR / voice_id
103
  model = vdir / f"{voice_id}.onnx"
104
- config = vdir / f"{voice_id}.onnx.json"
105
-
106
- if model.exists() and config.exists():
107
- return {"model": model, "config": config}
108
 
109
  url = HF_VOICES.get(voice_id)
110
  if not url:
111
- # Try a heuristic: accept 'en-us' as a generic alias
112
- if voice_id.lower() in ("en-us", "en_us", "english"):
113
- voice_id = "en_US-libritts-high"
114
- url = HF_VOICES[voice_id]
115
- vdir = VOICES_DIR / voice_id
116
- model = vdir / f"{voice_id}.onnx"
117
- config = vdir / f"{voice_id}.onnx.json"
118
- else:
119
- raise RuntimeError(f"Unknown/unsupported voice '{voice_id}'. Known: {list(HF_VOICES)}")
120
 
121
  vdir.mkdir(parents=True, exist_ok=True)
122
  tar_path = vdir / f"{voice_id}.onnx.tar.gz"
123
 
124
- # Download
125
- r = _http().get(url, timeout=120, stream=True)
126
  r.raise_for_status()
127
  with open(tar_path, "wb") as f:
128
  for chunk in r.iter_content(1 << 16):
129
  if chunk:
130
  f.write(chunk)
131
 
132
- # Extract
133
  with tarfile.open(tar_path, "r:gz") as tf:
134
  tf.extractall(vdir)
135
-
136
  tar_path.unlink(missing_ok=True)
137
 
138
- if not model.exists() or not config.exists():
139
- raise RuntimeError(f"Voice files not found after extraction: {voice_id}")
140
-
141
- return {"model": model, "config": config}
142
-
143
 
144
  def build_piper_cmd(text: str, voice_id: str, to_stdout: bool, out_path: Optional[Path] = None,
145
- length_scale: float = 1.10, noise_scale: float = 0.35, noise_w: float = 0.90) -> list:
146
  vc = ensure_voice(voice_id)
147
- args = [
148
  PIPER_BIN,
149
  "-m", str(vc["model"]),
150
  "-c", str(vc["config"]),
151
- "-q", # quieter logs
152
  "--length_scale", str(length_scale),
153
- "--noise_scale", str(noise_scale),
154
- "--noise_w", str(noise_w),
155
  ]
156
  if to_stdout:
157
- args += ["-f", "-"]
158
  else:
159
  if out_path is None:
160
- raise ValueError("out_path is required when to_stdout=False")
161
- args += ["-f", str(out_path)]
162
- return args
163
 
164
-
165
- async def run_piper_to_file(text: str, voice_id: str, out_path: Path,
166
- length_scale: float, noise_scale: float, noise_w: float) -> None:
167
- cmd = build_piper_cmd(text, voice_id, to_stdout=False, out_path=out_path,
168
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
169
  proc = await asyncio.create_subprocess_exec(
170
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
171
  )
172
- # write the text
173
  await proc.stdin.write(text.encode("utf-8"))
174
  await proc.stdin.drain()
175
  proc.stdin.close()
176
  await proc.wait()
177
  if proc.returncode != 0:
178
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
179
- raise RuntimeError(f"Piper failed (code {proc.returncode}). Stderr:\n{stderr}")
180
 
 
 
 
 
181
 
182
- async def run_piper_stream(text: str, voice_id: str, websocket: WebSocket,
183
- length_scale: float, noise_scale: float, noise_w: float) -> None:
184
- """
185
- Stream binary PCM16 via WS while Piper renders to stdout.
186
- """
187
- cmd = build_piper_cmd(text, voice_id, to_stdout=True,
188
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
189
-
190
- # Tell client we're ready (so it can open its audio device early)
191
- await websocket.send_text(json.dumps({"event": "ready", "sr": DEFAULT_SR, "channels": DEFAULT_CHANNELS}))
192
-
193
  proc = await asyncio.create_subprocess_exec(
194
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
195
  )
@@ -197,39 +152,44 @@ async def run_piper_stream(text: str, voice_id: str, websocket: WebSocket,
197
  await proc.stdin.drain()
198
  proc.stdin.close()
199
 
 
 
 
200
  try:
201
  while True:
202
  chunk = await proc.stdout.read(4096)
203
  if not chunk:
204
  break
205
- # Piper emits WAV unless given raw flag; CLI doesn't expose raw PCM easily.
206
- # However, Piper's default stdout with -f - is WAV. Many clients can accept WAV frames incrementally.
207
- # Your client plays raw PCM; so we keep WAV to avoid truncation and let the client handle it,
208
- # or we can strip the first 44 bytes (WAV header) once and then stream the rest as PCM16.
209
- # We'll do header-strip-once below:
210
-
211
- # Detect/strip WAV header (44 bytes) exactly once per stream:
212
- if len(chunk) >= 44 and chunk[0:4] == b"RIFF" and chunk[8:12] == b"WAVE":
213
- # Skip the 44-byte header (simple WAV)
214
- chunk = chunk[44:]
215
- if not chunk:
216
- continue
217
 
218
- await websocket.send_bytes(chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  await proc.wait()
221
  if proc.returncode != 0:
222
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
223
- await websocket.send_text(json.dumps({"event": "error", "detail": stderr}))
224
  else:
225
- await websocket.send_text(json.dumps({"event": "done"}))
226
  except WebSocketDisconnect:
227
  try:
228
  proc.kill()
229
  except Exception:
230
  pass
231
 
232
-
233
  # ---------------
234
  # FastAPI wiring
235
  # ---------------
@@ -238,31 +198,26 @@ app = FastAPI(title="ActualTTS (CPU)")
238
 
239
  @app.get("/health")
240
  def health():
241
- def list_voices():
242
- out = []
243
- for child in VOICES_DIR.iterdir():
244
- if not child.is_dir():
245
- continue
246
- name = child.name
247
- if (child / f"{name}.onnx").exists() and (child / f"{name}.onnx.json").exists():
248
- out.append(name)
249
- return out
250
-
251
  return {
252
  "ok": True,
253
  "engine": "piper-tts (CLI, CPU)",
254
  "default_voice": DEFAULT_VOICE,
255
  "voice_dir": str(VOICES_DIR),
256
- "available_voices": list_voices(),
257
  "files_dir": str(FILES_DIR),
258
  }
259
 
260
-
261
  @app.get("/")
262
  def root():
263
  return PlainTextResponse("ActualTTS (CPU) — use POST /speak or WS /ws/tts")
264
 
265
-
266
  @app.get("/file/{name}")
267
  def get_file(name: str):
268
  path = FILES_DIR / name
@@ -270,20 +225,18 @@ def get_file(name: str):
270
  return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
271
  return FileResponse(path)
272
 
273
-
274
  @app.post("/speak")
275
  async def speak(request: Request):
276
  """
277
- JSON body:
278
- {
279
- "text": "Hello world",
280
- "voice": "en_US-libritts-high",
281
- "length_scale": 1.10,
282
- "noise_scale": 0.35,
283
- "noise_w": 0.90
284
- }
285
- Returns:
286
- {"ok": true, "audio_url": "/file/tts-<ts>.wav"}
287
  """
288
  try:
289
  body = await request.json()
@@ -295,33 +248,26 @@ async def speak(request: Request):
295
  return JSONResponse({"detail": "Missing text"}, status_code=400)
296
 
297
  voice = (body.get("voice") or DEFAULT_VOICE).strip()
298
- length_scale = float(body.get("length_scale", 1.10))
299
- noise_scale = float(body.get("noise_scale", 0.35))
300
- noise_w = float(body.get("noise_w", 0.90))
301
 
302
- # prepare output
303
  ts = int(time.time() * 1000)
304
  out_path = FILES_DIR / f"tts-{ts}.wav"
305
 
306
  try:
307
- # ensure voice (download if needed)
308
  ensure_voice(voice)
309
- # run piper CLI to a wav file
310
- await run_piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w)
311
  except Exception as e:
312
  return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
313
 
314
  return {"ok": True, "audio_url": f"/file/{out_path.name}"}
315
 
316
-
317
  @app.websocket("/ws/tts")
318
  async def ws_tts(ws: WebSocket):
319
  await ws.accept()
320
  voice = DEFAULT_VOICE
321
- length_scale = 1.10
322
- noise_scale = 0.35
323
- noise_w = 0.90
324
- text_to_speak: Optional[str] = None
325
 
326
  try:
327
  while True:
@@ -330,34 +276,27 @@ async def ws_tts(ws: WebSocket):
330
  data = json.loads(msg)
331
  except Exception:
332
  continue
333
-
334
  ev = data.get("event")
335
  if ev == "init":
336
  voice = (data.get("voice") or voice).strip()
337
- # allow optional tuning via WS
338
  if "length_scale" in data: length_scale = float(data["length_scale"])
339
  if "noise_scale" in data: noise_scale = float(data["noise_scale"])
340
  if "noise_w" in data: noise_w = float(data["noise_w"])
341
- # ensure voice now so we can send ready immediately
342
  try:
343
  ensure_voice(voice)
344
  except Exception as e:
345
  await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
346
  await ws.close()
347
  return
348
- await ws.send_text(json.dumps({"event": "ready", "sr": DEFAULT_SR, "channels": DEFAULT_CHANNELS}))
349
-
350
  elif ev == "speak":
351
- text_to_speak = (data.get("text") or "").strip()
352
- if not text_to_speak:
353
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
354
  continue
355
- # stream via stdout
356
- await run_piper_stream(text_to_speak, voice, ws, length_scale, noise_scale, noise_w)
357
- else:
358
- # ignore unknown events
359
- pass
360
-
361
  except WebSocketDisconnect:
362
  return
363
  except Exception as e:
@@ -370,7 +309,5 @@ async def ws_tts(ws: WebSocket):
370
  except Exception:
371
  pass
372
 
373
-
374
  if __name__ == "__main__":
375
- # For local debug; Spaces uses Entrypoint/Cmd
376
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
1
  import asyncio
2
  import json
3
  import os
 
 
 
4
  import tarfile
5
  import time
6
  from pathlib import Path
7
+ from typing import Optional, Dict
8
 
9
  import uvicorn
10
  from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
 
14
  # Writable directory picker
15
  # -------------------------
16
 
17
+ def pick_writable_dir(candidates):
18
  errs = []
19
  for p in candidates:
20
  if not p:
 
22
  try:
23
  p.mkdir(parents=True, exist_ok=True)
24
  probe = p / ".probe"
25
+ probe.write_bytes(b"ok")
 
26
  probe.unlink(missing_ok=True)
27
  return p
28
  except Exception as e:
29
  errs.append(f"{p}: {type(e)._name_}({e})")
30
+ raise RuntimeError("No writable dir. Tried:\n " + "\n ".join(errs))
31
 
32
+ # Only pick truly safe/writable locations on HF Spaces
33
  ENV_DIR = os.getenv("TTS_DATA_DIR")
34
+ VOICE_CANDIDATES = [
35
+ Path("/tmp/actualtts/voices"),
36
+ Path("/dev/shm/actualtts_voices"),
37
+ Path(ENV_DIR) / "voices" if ENV_DIR else None,
38
+ ]
39
+ FILE_CANDIDATES = [
40
+ Path("/tmp/actualtts/files"),
41
+ Path("/dev/shm/actualtts_files"),
42
+ Path(ENV_DIR) / "files" if ENV_DIR else None,
43
+ ]
44
+
45
+ VOICES_DIR = pick_writable_dir([p for p in VOICE_CANDIDATES if p])
46
+ FILES_DIR = pick_writable_dir([p for p in FILE_CANDIDATES if p])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # -------------------------
49
  # Piper CLI integration
50
  # -------------------------
51
 
 
 
52
  PIPER_BIN = os.getenv("PIPER_BIN", "piper")
53
 
54
+ # Good human-y CPU voices hosted by Rhasspy (VITS-based)
55
  HF_VOICES: Dict[str, str] = {
56
  "en_US-libritts-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-libritts-high.onnx.tar.gz",
57
  "en_US-lessac-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-lessac-high.onnx.tar.gz",
58
  "en_US-amy-medium": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US-amy-medium.onnx.tar.gz",
59
  }
 
60
  DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
61
+ DEFAULT_SR = 22050
62
+ DEFAULT_CH = 1
 
 
63
 
64
+ _http = None
65
+ def http():
66
+ global _http
67
+ if _http is None:
68
  import requests
69
+ _http = requests.Session()
70
+ _http.headers.update({"User-Agent": "ActualTTS/CPU"})
71
+ return _http
 
72
 
73
  def ensure_voice(voice_id: str) -> Dict[str, Path]:
74
+ """Download+extract voice once. Returns paths to model/config."""
75
+ # Alias common generic tags
76
+ if voice_id.lower() in ("en-us", "en_us", "english"):
77
+ voice_id = "en_US-libritts-high"
78
+
79
  vdir = VOICES_DIR / voice_id
80
  model = vdir / f"{voice_id}.onnx"
81
+ cfg = vdir / f"{voice_id}.onnx.json"
82
+ if model.exists() and cfg.exists():
83
+ return {"model": model, "config": cfg}
 
84
 
85
  url = HF_VOICES.get(voice_id)
86
  if not url:
87
+ raise RuntimeError(f"Unknown voice '{voice_id}'. Known: {list(HF_VOICES)}")
 
 
 
 
 
 
 
 
88
 
89
  vdir.mkdir(parents=True, exist_ok=True)
90
  tar_path = vdir / f"{voice_id}.onnx.tar.gz"
91
 
92
+ r = http().get(url, timeout=180, stream=True)
 
93
  r.raise_for_status()
94
  with open(tar_path, "wb") as f:
95
  for chunk in r.iter_content(1 << 16):
96
  if chunk:
97
  f.write(chunk)
98
 
 
99
  with tarfile.open(tar_path, "r:gz") as tf:
100
  tf.extractall(vdir)
 
101
  tar_path.unlink(missing_ok=True)
102
 
103
+ if not model.exists() or not cfg.exists():
104
+ raise RuntimeError(f"Voice files missing after extraction for '{voice_id}'")
105
+ return {"model": model, "config": cfg}
 
 
106
 
107
  def build_piper_cmd(text: str, voice_id: str, to_stdout: bool, out_path: Optional[Path] = None,
108
+ length_scale: float = 1.08, noise_scale: float = 0.35, noise_w: float = 0.90) -> list:
109
  vc = ensure_voice(voice_id)
110
+ cmd = [
111
  PIPER_BIN,
112
  "-m", str(vc["model"]),
113
  "-c", str(vc["config"]),
114
+ "-q",
115
  "--length_scale", str(length_scale),
116
+ "--noise_scale", str(noise_scale),
117
+ "--noise_w", str(noise_w),
118
  ]
119
  if to_stdout:
120
+ cmd += ["-f", "-"] # write WAV to stdout
121
  else:
122
  if out_path is None:
123
+ raise ValueError("out_path required when to_stdout=False")
124
+ cmd += ["-f", str(out_path)]
125
+ return cmd
126
 
127
+ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w):
128
+ cmd = build_piper_cmd(text, voice, to_stdout=False, out_path=out_path,
 
 
129
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
130
  proc = await asyncio.create_subprocess_exec(
131
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
132
  )
 
133
  await proc.stdin.write(text.encode("utf-8"))
134
  await proc.stdin.drain()
135
  proc.stdin.close()
136
  await proc.wait()
137
  if proc.returncode != 0:
138
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
139
+ raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
140
 
141
+ async def piper_stream_stdout(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w):
142
+ """Stream WAV from Piper stdout over WS, stripping the WAV header once even if split."""
143
+ # Notify client early so it can open audio device
144
+ await ws.send_text(json.dumps({"event": "ready", "sr": DEFAULT_SR, "channels": DEFAULT_CH}))
145
 
146
+ cmd = build_piper_cmd(text, voice, to_stdout=True,
 
 
 
 
 
147
  length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
 
 
 
 
148
  proc = await asyncio.create_subprocess_exec(
149
  *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
150
  )
 
152
  await proc.stdin.drain()
153
  proc.stdin.close()
154
 
155
+ header_needed = True
156
+ header_buf = bytearray()
157
+
158
  try:
159
  while True:
160
  chunk = await proc.stdout.read(4096)
161
  if not chunk:
162
  break
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ if header_needed:
165
+ header_buf.extend(chunk)
166
+ if len(header_buf) < 44:
167
+ # keep reading until we have at least a WAV header
168
+ continue
169
+ # If it really is a WAV stream, strip the 44-byte header
170
+ if header_buf[:4] == b"RIFF" and header_buf[8:12] == b"WAVE":
171
+ payload = header_buf[44:]
172
+ else:
173
+ payload = bytes(header_buf) # not a WAV? just pass through
174
+ header_buf.clear()
175
+ header_needed = False
176
+ if payload:
177
+ await ws.send_bytes(payload)
178
+ else:
179
+ await ws.send_bytes(chunk)
180
 
181
  await proc.wait()
182
  if proc.returncode != 0:
183
  stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
184
+ await ws.send_text(json.dumps({"event": "error", "detail": stderr}))
185
  else:
186
+ await ws.send_text(json.dumps({"event": "done"}))
187
  except WebSocketDisconnect:
188
  try:
189
  proc.kill()
190
  except Exception:
191
  pass
192
 
 
193
  # ---------------
194
  # FastAPI wiring
195
  # ---------------
 
198
 
199
  @app.get("/health")
200
  def health():
201
+ voices = []
202
+ for child in VOICES_DIR.iterdir():
203
+ if not child.is_dir():
204
+ continue
205
+ name = child.name
206
+ if (child / f"{name}.onnx").exists() and (child / f"{name}.onnx.json").exists():
207
+ voices.append(name)
 
 
 
208
  return {
209
  "ok": True,
210
  "engine": "piper-tts (CLI, CPU)",
211
  "default_voice": DEFAULT_VOICE,
212
  "voice_dir": str(VOICES_DIR),
213
+ "available_voices": voices,
214
  "files_dir": str(FILES_DIR),
215
  }
216
 
 
217
  @app.get("/")
218
  def root():
219
  return PlainTextResponse("ActualTTS (CPU) — use POST /speak or WS /ws/tts")
220
 
 
221
  @app.get("/file/{name}")
222
  def get_file(name: str):
223
  path = FILES_DIR / name
 
225
  return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
226
  return FileResponse(path)
227
 
 
228
  @app.post("/speak")
229
  async def speak(request: Request):
230
  """
231
+ Body (JSON):
232
+ {
233
+ "text": "Hello",
234
+ "voice": "en_US-libritts-high",
235
+ "length_scale": 1.08,
236
+ "noise_scale": 0.35,
237
+ "noise_w": 0.90
238
+ }
239
+ => {"ok": true, "audio_url": "/file/tts-XXXX.wav"}
 
240
  """
241
  try:
242
  body = await request.json()
 
248
  return JSONResponse({"detail": "Missing text"}, status_code=400)
249
 
250
  voice = (body.get("voice") or DEFAULT_VOICE).strip()
251
+ length_scale = float(body.get("length_scale", 1.08))
252
+ noise_scale = float(body.get("noise_scale", 0.35))
253
+ noise_w = float(body.get("noise_w", 0.90))
254
 
 
255
  ts = int(time.time() * 1000)
256
  out_path = FILES_DIR / f"tts-{ts}.wav"
257
 
258
  try:
 
259
  ensure_voice(voice)
260
+ await piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w)
 
261
  except Exception as e:
262
  return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
263
 
264
  return {"ok": True, "audio_url": f"/file/{out_path.name}"}
265
 
 
266
  @app.websocket("/ws/tts")
267
  async def ws_tts(ws: WebSocket):
268
  await ws.accept()
269
  voice = DEFAULT_VOICE
270
+ length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
 
 
 
271
 
272
  try:
273
  while True:
 
276
  data = json.loads(msg)
277
  except Exception:
278
  continue
 
279
  ev = data.get("event")
280
  if ev == "init":
281
  voice = (data.get("voice") or voice).strip()
 
282
  if "length_scale" in data: length_scale = float(data["length_scale"])
283
  if "noise_scale" in data: noise_scale = float(data["noise_scale"])
284
  if "noise_w" in data: noise_w = float(data["noise_w"])
 
285
  try:
286
  ensure_voice(voice)
287
  except Exception as e:
288
  await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
289
  await ws.close()
290
  return
291
+ # 'ready' is sent inside piper_stream_stdout too, but send an early ping-less notice:
292
+ await ws.send_text(json.dumps({"event": "ready", "sr": DEFAULT_SR, "channels": DEFAULT_CH}))
293
  elif ev == "speak":
294
+ text = (data.get("text") or "").strip()
295
+ if not text:
296
  await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
297
  continue
298
+ await piper_stream_stdout(text, voice, ws, length_scale, noise_scale, noise_w)
299
+ # ignore other events
 
 
 
 
300
  except WebSocketDisconnect:
301
  return
302
  except Exception as e:
 
309
  except Exception:
310
  pass
311
 
 
312
  if __name__ == "__main__":
 
313
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)