Update app.py
Browse files
app.py
CHANGED
|
@@ -4,14 +4,14 @@ import json
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
-
from typing import
|
| 8 |
|
| 9 |
import uvicorn
|
| 10 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, BackgroundTasks, Query
|
| 11 |
from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
|
| 12 |
|
| 13 |
# -------------------------
|
| 14 |
-
# Writable
|
| 15 |
# -------------------------
|
| 16 |
|
| 17 |
def pick_writable_dir(candidates):
|
|
@@ -29,7 +29,6 @@ def pick_writable_dir(candidates):
|
|
| 29 |
errs.append(f"{p}: {type(e).__name__}({e})")
|
| 30 |
raise RuntimeError("No writable dir. Tried:\n " + "\n ".join(errs))
|
| 31 |
|
| 32 |
-
# Only pick truly safe/writable locations on HF Spaces
|
| 33 |
ENV_DIR = os.getenv("TTS_DATA_DIR")
|
| 34 |
VOICE_CANDIDATES = [
|
| 35 |
Path("/tmp/actualtts/voices"),
|
|
@@ -57,24 +56,22 @@ def _safe_unlink(path: Path):
|
|
| 57 |
|
| 58 |
PIPER_BIN = os.getenv("PIPER_BIN", "piper")
|
| 59 |
|
| 60 |
-
# Hugging Face layout (no tarballs). You can pin a commit hash via env for stability.
|
| 61 |
HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
|
| 62 |
-
HF_REV
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000")) # >= ~5MB
|
| 66 |
MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000")) # >= 1KB
|
| 67 |
|
| 68 |
-
#
|
| 69 |
VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = {
|
| 70 |
-
"en_US-libritts-high": ("en", "en_US", "libritts", "high",
|
| 71 |
-
"en_US-lessac-high": ("en", "en_US", "lessac", "high",
|
| 72 |
"en_US-amy-medium": ("en", "en_US", "amy", "medium", "en_US-amy-medium"),
|
| 73 |
}
|
| 74 |
|
| 75 |
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
|
| 76 |
-
|
| 77 |
-
DEFAULT_CH = 1
|
| 78 |
|
| 79 |
_http = None
|
| 80 |
def http():
|
|
@@ -105,34 +102,40 @@ def _file_ok(p: Path, min_bytes: int) -> bool:
|
|
| 105 |
except Exception:
|
| 106 |
return False
|
| 107 |
|
| 108 |
-
def
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Aliases
|
| 111 |
if voice_id.lower() in ("en-us", "en_us", "english"):
|
| 112 |
voice_id = "en_US-libritts-high"
|
| 113 |
-
|
| 114 |
if voice_id not in VOICE_MAP:
|
| 115 |
raise RuntimeError(f"Unknown voice '{voice_id}'. Known: {list(VOICE_MAP)}")
|
| 116 |
|
| 117 |
lang, country, family, quality, base = VOICE_MAP[voice_id]
|
| 118 |
-
vdir
|
| 119 |
model = vdir / f"{base}.onnx"
|
| 120 |
cfg = vdir / f"{base}.onnx.json"
|
| 121 |
|
| 122 |
vdir.mkdir(parents=True, exist_ok=True)
|
| 123 |
|
| 124 |
-
|
| 125 |
-
prefix = f"{HF_REPO_BASE}/{HF_REV}/{lang}/{country}/{family}/{quality}/{base}"
|
| 126 |
model_url = f"{prefix}.onnx"
|
| 127 |
cfg_url = f"{prefix}.onnx.json"
|
| 128 |
|
| 129 |
-
# Download/verify
|
| 130 |
if not _file_ok(model, MIN_ONNX_BYTES):
|
| 131 |
_download(model_url, model)
|
| 132 |
if not _file_ok(cfg, MIN_JSON_BYTES):
|
| 133 |
_download(cfg_url, cfg)
|
| 134 |
|
| 135 |
-
# Final sanity
|
| 136 |
if not _file_ok(model, MIN_ONNX_BYTES):
|
| 137 |
sz = model.stat().st_size if model.exists() else 0
|
| 138 |
raise RuntimeError(f"Downloaded .onnx too small ({sz} bytes) for '{voice_id}'")
|
|
@@ -140,10 +143,14 @@ def ensure_voice(voice_id: str) -> Dict[str, Path]:
|
|
| 140 |
sz = cfg.stat().st_size if cfg.exists() else 0
|
| 141 |
raise RuntimeError(f"Downloaded .onnx.json too small ({sz} bytes) for '{voice_id}'")
|
| 142 |
|
| 143 |
-
|
|
|
|
| 144 |
|
| 145 |
-
def build_piper_cmd(
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
vc = ensure_voice(voice_id)
|
| 148 |
cmd = [
|
| 149 |
PIPER_BIN,
|
|
@@ -155,12 +162,12 @@ def build_piper_cmd(text: str, voice_id: str, to_stdout: bool, out_path: Optiona
|
|
| 155 |
"--noise_w", str(noise_w),
|
| 156 |
]
|
| 157 |
if to_stdout:
|
| 158 |
-
# Stream RAW PCM (
|
| 159 |
cmd += ["--raw", "-f", "-"]
|
| 160 |
else:
|
| 161 |
if out_path is None:
|
| 162 |
raise ValueError("out_path required when to_stdout=False")
|
| 163 |
-
#
|
| 164 |
cmd += ["-f", str(out_path)]
|
| 165 |
return cmd
|
| 166 |
|
|
@@ -170,8 +177,7 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
|
|
| 170 |
proc = await asyncio.create_subprocess_exec(
|
| 171 |
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 172 |
)
|
| 173 |
-
|
| 174 |
-
proc.stdin.write((text + "\n").encode("utf-8"))
|
| 175 |
await proc.stdin.drain()
|
| 176 |
proc.stdin.close()
|
| 177 |
await proc.wait()
|
|
@@ -179,22 +185,20 @@ async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_
|
|
| 179 |
stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
|
| 180 |
raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
|
| 181 |
|
| 182 |
-
async def
|
| 183 |
-
"""Stream RAW PCM
|
| 184 |
-
|
| 185 |
-
|
| 186 |
cmd = build_piper_cmd(text, voice, to_stdout=True,
|
| 187 |
length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
|
| 188 |
proc = await asyncio.create_subprocess_exec(
|
| 189 |
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 190 |
)
|
| 191 |
|
| 192 |
-
#
|
| 193 |
proc.stdin.write((text + "\n").encode("utf-8"))
|
| 194 |
await proc.stdin.drain()
|
| 195 |
proc.stdin.close()
|
| 196 |
|
| 197 |
-
# Forward stderr lines to client (debug visibility)
|
| 198 |
async def pump_stderr():
|
| 199 |
try:
|
| 200 |
while True:
|
|
@@ -209,15 +213,14 @@ async def piper_stream_stdout(text, voice, ws: WebSocket, length_scale, noise_sc
|
|
| 209 |
pass
|
| 210 |
|
| 211 |
stderr_task = asyncio.create_task(pump_stderr())
|
|
|
|
| 212 |
|
| 213 |
-
total_bytes = 0
|
| 214 |
try:
|
| 215 |
-
# RAW PCM passthrough
|
| 216 |
while True:
|
| 217 |
chunk = await proc.stdout.read(4096)
|
| 218 |
if not chunk:
|
| 219 |
break
|
| 220 |
-
|
| 221 |
await ws.send_bytes(chunk)
|
| 222 |
|
| 223 |
await proc.wait()
|
|
@@ -226,9 +229,9 @@ async def piper_stream_stdout(text, voice, ws: WebSocket, length_scale, noise_sc
|
|
| 226 |
if proc.returncode != 0:
|
| 227 |
rem = await proc.stderr.read()
|
| 228 |
detail = rem.decode("utf-8", "ignore").strip()
|
| 229 |
-
await ws.send_text(json.dumps({"event": "error", "detail": detail or f
|
| 230 |
else:
|
| 231 |
-
if
|
| 232 |
await ws.send_text(json.dumps({"event": "error", "detail": "No audio produced"}))
|
| 233 |
else:
|
| 234 |
await ws.send_text(json.dumps({"event": "done"}))
|
|
@@ -242,9 +245,9 @@ async def piper_stream_stdout(text, voice, ws: WebSocket, length_scale, noise_sc
|
|
| 242 |
except Exception:
|
| 243 |
pass
|
| 244 |
|
| 245 |
-
#
|
| 246 |
-
# FastAPI
|
| 247 |
-
#
|
| 248 |
|
| 249 |
app = FastAPI(title="ActualTTS (CPU)")
|
| 250 |
|
|
@@ -256,11 +259,10 @@ def health():
|
|
| 256 |
if not child.is_dir():
|
| 257 |
continue
|
| 258 |
name = child.name
|
| 259 |
-
# Only list voices that have sane-sized files
|
| 260 |
base = child / f"{name}.onnx"
|
| 261 |
cfg = child / f"{name}.onnx.json"
|
| 262 |
if _file_ok(base, MIN_ONNX_BYTES) and _file_ok(cfg, MIN_JSON_BYTES):
|
| 263 |
-
voices.append(name)
|
| 264 |
return {
|
| 265 |
"ok": True,
|
| 266 |
"engine": "piper-tts (CLI, CPU)",
|
|
@@ -284,9 +286,10 @@ def get_file(name: str):
|
|
| 284 |
@app.post("/speak")
|
| 285 |
async def speak(request: Request):
|
| 286 |
"""
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
| 290 |
"""
|
| 291 |
try:
|
| 292 |
body = await request.json()
|
|
@@ -297,7 +300,7 @@ async def speak(request: Request):
|
|
| 297 |
if not text:
|
| 298 |
return JSONResponse({"detail": "Missing text"}, status_code=400)
|
| 299 |
|
| 300 |
-
voice
|
| 301 |
length_scale = float(body.get("length_scale", 1.08))
|
| 302 |
noise_scale = float(body.get("noise_scale", 0.35))
|
| 303 |
noise_w = float(body.get("noise_w", 0.90))
|
|
@@ -313,7 +316,6 @@ async def speak(request: Request):
|
|
| 313 |
|
| 314 |
return {"ok": True, "audio_url": f"/file/{out_path.name}"}
|
| 315 |
|
| 316 |
-
# --- Direct-file endpoints (audio/wav response) ---
|
| 317 |
@app.post("/speak.wav")
|
| 318 |
async def speak_wav_post(request: Request, background_tasks: BackgroundTasks):
|
| 319 |
"""POST JSON -> returns audio/wav directly"""
|
|
@@ -350,7 +352,7 @@ async def speak_wav_get(
|
|
| 350 |
length_scale: float = 1.08,
|
| 351 |
noise_scale: float = 0.35,
|
| 352 |
noise_w: float = 0.90,
|
| 353 |
-
background_tasks: BackgroundTasks =
|
| 354 |
):
|
| 355 |
"""GET query -> returns audio/wav directly"""
|
| 356 |
text = (text or "").strip()
|
|
@@ -366,10 +368,10 @@ async def speak_wav_get(
|
|
| 366 |
except Exception as e:
|
| 367 |
return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
|
| 368 |
|
| 369 |
-
background_tasks
|
| 370 |
-
|
|
|
|
| 371 |
|
| 372 |
-
# --- Diagnostics: inspect/refresh downloaded voices ---
|
| 373 |
@app.get("/debug/voices")
|
| 374 |
def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
|
| 375 |
out = {"dir": str(VOICES_DIR), "voices": []}
|
|
@@ -383,6 +385,7 @@ def debug_voices(redownload: bool = Query(False, description="Force re-download
|
|
| 383 |
"model_exists": model.exists(), "cfg_exists": cfg.exists(),
|
| 384 |
"model_size": (model.stat().st_size if model.exists() else 0),
|
| 385 |
"cfg_size": (cfg.stat().st_size if cfg.exists() else 0),
|
|
|
|
| 386 |
}
|
| 387 |
out["voices"].append(info)
|
| 388 |
|
|
@@ -395,6 +398,7 @@ def debug_voices(redownload: bool = Query(False, description="Force re-download
|
|
| 395 |
info["redownloaded"] = True
|
| 396 |
info["model_size"] = (model.stat().st_size if model.exists() else 0)
|
| 397 |
info["cfg_size"] = (cfg.stat().st_size if cfg.exists() else 0)
|
|
|
|
| 398 |
except Exception as e:
|
| 399 |
info["redownload_error"] = str(e)
|
| 400 |
return out
|
|
@@ -404,6 +408,7 @@ async def ws_tts(ws: WebSocket):
|
|
| 404 |
await ws.accept()
|
| 405 |
voice = DEFAULT_VOICE
|
| 406 |
length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
|
|
|
|
| 407 |
|
| 408 |
try:
|
| 409 |
while True:
|
|
@@ -419,18 +424,20 @@ async def ws_tts(ws: WebSocket):
|
|
| 419 |
if "noise_scale" in data: noise_scale = float(data["noise_scale"])
|
| 420 |
if "noise_w" in data: noise_w = float(data["noise_w"])
|
| 421 |
try:
|
| 422 |
-
ensure_voice(voice)
|
|
|
|
| 423 |
except Exception as e:
|
| 424 |
await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
|
| 425 |
await ws.close()
|
| 426 |
return
|
| 427 |
-
|
|
|
|
| 428 |
elif ev == "speak":
|
| 429 |
text = (data.get("text") or "").strip()
|
| 430 |
if not text:
|
| 431 |
await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
|
| 432 |
continue
|
| 433 |
-
await
|
| 434 |
# ignore other events
|
| 435 |
except WebSocketDisconnect:
|
| 436 |
return
|
|
|
|
| 4 |
import os
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import Dict, Optional, Tuple
|
| 8 |
|
| 9 |
import uvicorn
|
| 10 |
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, BackgroundTasks, Query
|
| 11 |
from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
|
| 12 |
|
| 13 |
# -------------------------
|
| 14 |
+
# Writable directories
|
| 15 |
# -------------------------
|
| 16 |
|
| 17 |
def pick_writable_dir(candidates):
|
|
|
|
| 29 |
errs.append(f"{p}: {type(e).__name__}({e})")
|
| 30 |
raise RuntimeError("No writable dir. Tried:\n " + "\n ".join(errs))
|
| 31 |
|
|
|
|
| 32 |
ENV_DIR = os.getenv("TTS_DATA_DIR")
|
| 33 |
VOICE_CANDIDATES = [
|
| 34 |
Path("/tmp/actualtts/voices"),
|
|
|
|
| 56 |
|
| 57 |
PIPER_BIN = os.getenv("PIPER_BIN", "piper")
|
| 58 |
|
|
|
|
| 59 |
HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
|
| 60 |
+
HF_REV = os.getenv("PIPER_VOICES_REV", "main") # set a commit hash here if you want pinning
|
| 61 |
|
| 62 |
+
# sanity thresholds (bytes) to detect corrupt downloads
|
| 63 |
+
MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000")) # >= ~5MB (real models are 10s–100s MB)
|
| 64 |
MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000")) # >= 1KB
|
| 65 |
|
| 66 |
+
# (lang, country, family, quality, basename)
|
| 67 |
VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = {
|
| 68 |
+
"en_US-libritts-high": ("en", "en_US", "libritts", "high", "en_US-libritts-high"),
|
| 69 |
+
"en_US-lessac-high": ("en", "en_US", "lessac", "high", "en_US-lessac-high"),
|
| 70 |
"en_US-amy-medium": ("en", "en_US", "amy", "medium", "en_US-amy-medium"),
|
| 71 |
}
|
| 72 |
|
| 73 |
DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high")
|
| 74 |
+
DEFAULT_CH = 1 # mono
|
|
|
|
| 75 |
|
| 76 |
_http = None
|
| 77 |
def http():
|
|
|
|
| 102 |
except Exception:
|
| 103 |
return False
|
| 104 |
|
| 105 |
+
def _read_sr_from_cfg(cfg_path: Path) -> int:
|
| 106 |
+
import json as _json
|
| 107 |
+
try:
|
| 108 |
+
with open(cfg_path, "r", encoding="utf-8") as f:
|
| 109 |
+
j = _json.load(f)
|
| 110 |
+
sr = int(j.get("sample_rate", 22050))
|
| 111 |
+
return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050
|
| 112 |
+
except Exception:
|
| 113 |
+
return 22050
|
| 114 |
+
|
| 115 |
+
def ensure_voice(voice_id: str) -> Dict[str, Path | int]:
|
| 116 |
+
"""Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR."""
|
| 117 |
# Aliases
|
| 118 |
if voice_id.lower() in ("en-us", "en_us", "english"):
|
| 119 |
voice_id = "en_US-libritts-high"
|
|
|
|
| 120 |
if voice_id not in VOICE_MAP:
|
| 121 |
raise RuntimeError(f"Unknown voice '{voice_id}'. Known: {list(VOICE_MAP)}")
|
| 122 |
|
| 123 |
lang, country, family, quality, base = VOICE_MAP[voice_id]
|
| 124 |
+
vdir = VOICES_DIR / voice_id
|
| 125 |
model = vdir / f"{base}.onnx"
|
| 126 |
cfg = vdir / f"{base}.onnx.json"
|
| 127 |
|
| 128 |
vdir.mkdir(parents=True, exist_ok=True)
|
| 129 |
|
| 130 |
+
prefix = f"{HF_REPO_BASE}/{HF_REV}/{lang}/{country}/{family}/{quality}/{base}"
|
|
|
|
| 131 |
model_url = f"{prefix}.onnx"
|
| 132 |
cfg_url = f"{prefix}.onnx.json"
|
| 133 |
|
|
|
|
| 134 |
if not _file_ok(model, MIN_ONNX_BYTES):
|
| 135 |
_download(model_url, model)
|
| 136 |
if not _file_ok(cfg, MIN_JSON_BYTES):
|
| 137 |
_download(cfg_url, cfg)
|
| 138 |
|
|
|
|
| 139 |
if not _file_ok(model, MIN_ONNX_BYTES):
|
| 140 |
sz = model.stat().st_size if model.exists() else 0
|
| 141 |
raise RuntimeError(f"Downloaded .onnx too small ({sz} bytes) for '{voice_id}'")
|
|
|
|
| 143 |
sz = cfg.stat().st_size if cfg.exists() else 0
|
| 144 |
raise RuntimeError(f"Downloaded .onnx.json too small ({sz} bytes) for '{voice_id}'")
|
| 145 |
|
| 146 |
+
sr = _read_sr_from_cfg(cfg)
|
| 147 |
+
return {"model": model, "config": cfg, "sr": sr}
|
| 148 |
|
| 149 |
+
def build_piper_cmd(
|
| 150 |
+
text: str, voice_id: str, to_stdout: bool,
|
| 151 |
+
out_path: Optional[Path] = None,
|
| 152 |
+
length_scale: float = 1.08, noise_scale: float = 0.35, noise_w: float = 0.90
|
| 153 |
+
) -> list:
|
| 154 |
vc = ensure_voice(voice_id)
|
| 155 |
cmd = [
|
| 156 |
PIPER_BIN,
|
|
|
|
| 162 |
"--noise_w", str(noise_w),
|
| 163 |
]
|
| 164 |
if to_stdout:
|
| 165 |
+
# Stream RAW PCM (16-bit little-endian). Simpler to play on clients.
|
| 166 |
cmd += ["--raw", "-f", "-"]
|
| 167 |
else:
|
| 168 |
if out_path is None:
|
| 169 |
raise ValueError("out_path required when to_stdout=False")
|
| 170 |
+
# File output: Piper writes WAV by default.
|
| 171 |
cmd += ["-f", str(out_path)]
|
| 172 |
return cmd
|
| 173 |
|
|
|
|
| 177 |
proc = await asyncio.create_subprocess_exec(
|
| 178 |
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 179 |
)
|
| 180 |
+
proc.stdin.write((text + "\n").encode("utf-8")) # newline to terminate
|
|
|
|
| 181 |
await proc.stdin.drain()
|
| 182 |
proc.stdin.close()
|
| 183 |
await proc.wait()
|
|
|
|
| 185 |
stderr = (await proc.stderr.read()).decode("utf-8", "ignore")
|
| 186 |
raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}")
|
| 187 |
|
| 188 |
+
async def piper_stream_raw(text, voice, ws: WebSocket, length_scale, noise_scale, noise_w, sr: int):
|
| 189 |
+
"""Stream RAW PCM frames over WS; send stderr as 'log' events; signal 'done'."""
|
| 190 |
+
# We already announced 'ready' with the correct sr in the init step.
|
|
|
|
| 191 |
cmd = build_piper_cmd(text, voice, to_stdout=True,
|
| 192 |
length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w)
|
| 193 |
proc = await asyncio.create_subprocess_exec(
|
| 194 |
*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
| 195 |
)
|
| 196 |
|
| 197 |
+
# Send text and close stdin
|
| 198 |
proc.stdin.write((text + "\n").encode("utf-8"))
|
| 199 |
await proc.stdin.drain()
|
| 200 |
proc.stdin.close()
|
| 201 |
|
|
|
|
| 202 |
async def pump_stderr():
|
| 203 |
try:
|
| 204 |
while True:
|
|
|
|
| 213 |
pass
|
| 214 |
|
| 215 |
stderr_task = asyncio.create_task(pump_stderr())
|
| 216 |
+
total = 0
|
| 217 |
|
|
|
|
| 218 |
try:
|
|
|
|
| 219 |
while True:
|
| 220 |
chunk = await proc.stdout.read(4096)
|
| 221 |
if not chunk:
|
| 222 |
break
|
| 223 |
+
total += len(chunk)
|
| 224 |
await ws.send_bytes(chunk)
|
| 225 |
|
| 226 |
await proc.wait()
|
|
|
|
| 229 |
if proc.returncode != 0:
|
| 230 |
rem = await proc.stderr.read()
|
| 231 |
detail = rem.decode("utf-8", "ignore").strip()
|
| 232 |
+
await ws.send_text(json.dumps({"event": "error", "detail": detail or f"piper exited {proc.returncode}"}))
|
| 233 |
else:
|
| 234 |
+
if total == 0:
|
| 235 |
await ws.send_text(json.dumps({"event": "error", "detail": "No audio produced"}))
|
| 236 |
else:
|
| 237 |
await ws.send_text(json.dumps({"event": "done"}))
|
|
|
|
| 245 |
except Exception:
|
| 246 |
pass
|
| 247 |
|
| 248 |
+
# -------------------------
|
| 249 |
+
# FastAPI app & routes
|
| 250 |
+
# -------------------------
|
| 251 |
|
| 252 |
app = FastAPI(title="ActualTTS (CPU)")
|
| 253 |
|
|
|
|
| 259 |
if not child.is_dir():
|
| 260 |
continue
|
| 261 |
name = child.name
|
|
|
|
| 262 |
base = child / f"{name}.onnx"
|
| 263 |
cfg = child / f"{name}.onnx.json"
|
| 264 |
if _file_ok(base, MIN_ONNX_BYTES) and _file_ok(cfg, MIN_JSON_BYTES):
|
| 265 |
+
voices.append({"id": name, "sr": _read_sr_from_cfg(cfg)})
|
| 266 |
return {
|
| 267 |
"ok": True,
|
| 268 |
"engine": "piper-tts (CLI, CPU)",
|
|
|
|
| 286 |
@app.post("/speak")
|
| 287 |
async def speak(request: Request):
|
| 288 |
"""
|
| 289 |
+
POST JSON:
|
| 290 |
+
{ "text": "Hello", "voice": "en_US-libritts-high",
|
| 291 |
+
"length_scale": 1.08, "noise_scale": 0.35, "noise_w": 0.90 }
|
| 292 |
+
Returns: { "ok": true, "audio_url": "/file/tts-XXXX.wav" }
|
| 293 |
"""
|
| 294 |
try:
|
| 295 |
body = await request.json()
|
|
|
|
| 300 |
if not text:
|
| 301 |
return JSONResponse({"detail": "Missing text"}, status_code=400)
|
| 302 |
|
| 303 |
+
voice = (body.get("voice") or DEFAULT_VOICE).strip()
|
| 304 |
length_scale = float(body.get("length_scale", 1.08))
|
| 305 |
noise_scale = float(body.get("noise_scale", 0.35))
|
| 306 |
noise_w = float(body.get("noise_w", 0.90))
|
|
|
|
| 316 |
|
| 317 |
return {"ok": True, "audio_url": f"/file/{out_path.name}"}
|
| 318 |
|
|
|
|
| 319 |
@app.post("/speak.wav")
|
| 320 |
async def speak_wav_post(request: Request, background_tasks: BackgroundTasks):
|
| 321 |
"""POST JSON -> returns audio/wav directly"""
|
|
|
|
| 352 |
length_scale: float = 1.08,
|
| 353 |
noise_scale: float = 0.35,
|
| 354 |
noise_w: float = 0.90,
|
| 355 |
+
background_tasks: BackgroundTasks = None,
|
| 356 |
):
|
| 357 |
"""GET query -> returns audio/wav directly"""
|
| 358 |
text = (text or "").strip()
|
|
|
|
| 368 |
except Exception as e:
|
| 369 |
return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
|
| 370 |
|
| 371 |
+
if background_tasks:
|
| 372 |
+
background_tasks.add_task(_safe_unlink, out_path)
|
| 373 |
+
return FileResponse(out_path, media_type="audio/wav", filename=out_path.name)
|
| 374 |
|
|
|
|
| 375 |
@app.get("/debug/voices")
|
| 376 |
def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")):
|
| 377 |
out = {"dir": str(VOICES_DIR), "voices": []}
|
|
|
|
| 385 |
"model_exists": model.exists(), "cfg_exists": cfg.exists(),
|
| 386 |
"model_size": (model.stat().st_size if model.exists() else 0),
|
| 387 |
"cfg_size": (cfg.stat().st_size if cfg.exists() else 0),
|
| 388 |
+
"sr": _read_sr_from_cfg(cfg) if cfg.exists() else None,
|
| 389 |
}
|
| 390 |
out["voices"].append(info)
|
| 391 |
|
|
|
|
| 398 |
info["redownloaded"] = True
|
| 399 |
info["model_size"] = (model.stat().st_size if model.exists() else 0)
|
| 400 |
info["cfg_size"] = (cfg.stat().st_size if cfg.exists() else 0)
|
| 401 |
+
info["sr"] = _read_sr_from_cfg(cfg) if cfg.exists() else None
|
| 402 |
except Exception as e:
|
| 403 |
info["redownload_error"] = str(e)
|
| 404 |
return out
|
|
|
|
| 408 |
await ws.accept()
|
| 409 |
voice = DEFAULT_VOICE
|
| 410 |
length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90
|
| 411 |
+
voice_sr = 22050 # will be overwritten by ensure_voice
|
| 412 |
|
| 413 |
try:
|
| 414 |
while True:
|
|
|
|
| 424 |
if "noise_scale" in data: noise_scale = float(data["noise_scale"])
|
| 425 |
if "noise_w" in data: noise_w = float(data["noise_w"])
|
| 426 |
try:
|
| 427 |
+
info = ensure_voice(voice)
|
| 428 |
+
voice_sr = int(info.get("sr", 22050))
|
| 429 |
except Exception as e:
|
| 430 |
await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
|
| 431 |
await ws.close()
|
| 432 |
return
|
| 433 |
+
# announce the correct SR so the client opens the audio device properly
|
| 434 |
+
await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH}))
|
| 435 |
elif ev == "speak":
|
| 436 |
text = (data.get("text") or "").strip()
|
| 437 |
if not text:
|
| 438 |
await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
|
| 439 |
continue
|
| 440 |
+
await piper_stream_raw(text, voice, ws, length_scale, noise_scale, noise_w, sr=voice_sr)
|
| 441 |
# ignore other events
|
| 442 |
except WebSocketDisconnect:
|
| 443 |
return
|