| | import asyncio |
| | import json |
| | import os |
| | import sys |
| | import shutil |
| | import time |
| | from pathlib import Path |
| | from typing import Dict, Optional, Tuple |
| |
|
| | import uvicorn |
| | from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, BackgroundTasks, Query, Header |
| | from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse |
| |
|
| | |
| | |
| | |
| |
|
| | def pick_writable_dir(candidates): |
| | errs = [] |
| | for p in candidates: |
| | if not p: |
| | continue |
| | try: |
| | p.mkdir(parents=True, exist_ok=True) |
| | probe = p / ".probe" |
| | probe.write_bytes(b"ok") |
| | probe.unlink(missing_ok=True) |
| | return p |
| | except Exception as e: |
| | errs.append(f"{p}: {type(e).__name__}({e})") |
| | raise RuntimeError("No writable dir. Tried:\n " + "\n ".join(errs)) |
| |
|
| | ENV_DIR = os.getenv("TTS_DATA_DIR") |
| | VOICE_CANDIDATES = [ |
| | Path("/tmp/actualtts/voices"), |
| | Path("/dev/shm/actualtts_voices"), |
| | Path(ENV_DIR) / "voices" if ENV_DIR else None, |
| | ] |
| | FILE_CANDIDATES = [ |
| | Path("/tmp/actualtts/files"), |
| | Path("/dev/shm/actualtts_files"), |
| | Path(ENV_DIR) / "files" if ENV_DIR else None, |
| | ] |
| |
|
| | VOICES_DIR = pick_writable_dir([p for p in VOICE_CANDIDATES if p]) |
| | FILES_DIR = pick_writable_dir([p for p in FILE_CANDIDATES if p]) |
| |
|
| | def _safe_unlink(path: Path): |
| | try: |
| | path.unlink(missing_ok=True) |
| | except Exception: |
| | pass |
| |
|
| | |
| | |
| | |
| |
|
| | def resolve_piper_cmd(): |
| | env = os.getenv("PIPER_BIN") |
| | if env: |
| | return env.split() |
| | path = shutil.which("piper") |
| | if path: |
| | return [path] |
| | |
| | return [sys.executable, "-m", "piper"] |
| |
|
| | PIPER_CMD = resolve_piper_cmd() |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | SENTENCE_SILENCE = float(os.getenv("PIPER_SENTENCE_SILENCE", "0.05")) |
| |
|
| | |
| | STREAM_BATCH_MS = int(os.getenv("STREAM_BATCH_MS", "100")) |
| |
|
| | DEFAULT_CH = 1 |
| |
|
| | |
| | PREBUFFER_MS = int(os.getenv("PREBUFFER_MS", "6000")) |
| | PREBUFFER_MAX_WAIT_MS = int(os.getenv("PREBUFFER_MAX_WAIT_MS", "15000")) |
| |
|
| | |
| | MAX_TEXT_CHARS = int(os.getenv("MAX_TEXT_CHARS", "800")) |
| |
|
| | |
| | AUTH_SHARED_SECRET = (os.getenv("AUTH_SHARED_SECRET") or "").strip() |
| |
|
| | def _auth_ok(x_auth: Optional[str]) -> bool: |
| | return (not AUTH_SHARED_SECRET) or (x_auth == AUTH_SHARED_SECRET) |
| |
|
| | |
| | |
| | |
| |
|
| | HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve" |
| | HF_REV = os.getenv("PIPER_VOICES_REV", "main") |
| |
|
| | |
| | MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000")) |
| | MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000")) |
| |
|
| | |
| | VOICE_MAP: Dict[str, Tuple[str, str, str, str, str]] = { |
| | "en_US-libritts-high": ("en", "en_US", "libritts", "high", "en_US-libritts-high"), |
| | "en_US-lessac-high": ("en", "en_US", "lessac", "high", "en_US-lessac-high"), |
| | "en_US-amy-medium": ("en", "en_US", "amy", "medium", "en_US-amy-medium"), |
| | } |
| |
|
| | DEFAULT_VOICE = os.getenv("DEFAULT_VOICE", "en_US-libritts-high") |
| |
|
| | _http = None |
| | def http(): |
| | global _http |
| | if _http is None: |
| | import requests |
| | _http = requests.Session() |
| | _http.headers.update({"User-Agent": "ActualTTS/CPU"}) |
| | return _http |
| |
|
| | def _download(url: str, dest: Path, timeout: int = 300): |
| | dest.parent.mkdir(parents=True, exist_ok=True) |
| | with http().get(url, timeout=timeout, stream=True, headers={"Accept": "application/octet-stream"}) as r: |
| | r.raise_for_status() |
| | ct = (r.headers.get("content-type") or "").lower() |
| | if "text/html" in ct: |
| | raise RuntimeError(f"Bad content-type for {url}: {ct}") |
| | tmp = dest.with_suffix(dest.suffix + ".part") |
| | with open(tmp, "wb") as f: |
| | for chunk in r.iter_content(1 << 16): |
| | if chunk: |
| | f.write(chunk) |
| | tmp.replace(dest) |
| |
|
| | def _file_ok(p: Path, min_bytes: int) -> bool: |
| | try: |
| | return p.exists() and p.stat().st_size >= min_bytes |
| | except Exception: |
| | return False |
| |
|
| | def _read_sr_from_cfg(cfg_path: Path) -> int: |
| | try: |
| | with open(cfg_path, "r", encoding="utf-8") as f: |
| | j = json.load(f) |
| | sr = int(j.get("sample_rate", 22050)) |
| | |
| | return sr if sr in (16000, 22050, 24000, 44100, 48000) else 22050 |
| | except Exception: |
| | return 22050 |
| |
|
| | def ensure_voice(voice_id: str) -> Dict[str, object]: |
| | """Ensure voice .onnx and .onnx.json exist locally with sane sizes. Returns paths and SR.""" |
| | |
| | if voice_id.lower() in ("en-us", "en_us", "english"): |
| | voice_id = "en_US-libritts-high" |
| | if voice_id not in VOICE_MAP: |
| | raise RuntimeError(f"Unknown voice '{voice_id}'. Known: {list(VOICE_MAP)}") |
| |
|
| | lang, country, family, quality, base = VOICE_MAP[voice_id] |
| | vdir = VOICES_DIR / voice_id |
| | model = vdir / f"{base}.onnx" |
| | cfg = vdir / f"{base}.onnx.json" |
| |
|
| | vdir.mkdir(parents=True, exist_ok=True) |
| |
|
| | prefix = f"{HF_REPO_BASE}/{HF_REV}/{lang}/{country}/{family}/{quality}/{base}" |
| | model_url = f"{prefix}.onnx" |
| | cfg_url = f"{prefix}.onnx.json" |
| |
|
| | if not _file_ok(model, MIN_ONNX_BYTES): |
| | _download(model_url, model) |
| | if not _file_ok(cfg, MIN_JSON_BYTES): |
| | _download(cfg_url, cfg) |
| |
|
| | if not _file_ok(model, MIN_ONNX_BYTES): |
| | sz = model.stat().st_size if model.exists() else 0 |
| | raise RuntimeError(f"Downloaded .onnx too small ({sz} bytes) for '{voice_id}'") |
| | if not _file_ok(cfg, MIN_JSON_BYTES): |
| | sz = cfg.stat().st_size if cfg.exists() else 0 |
| | raise RuntimeError(f"Downloaded .onnx.json too small ({sz} bytes) for '{voice_id}'") |
| |
|
| | sr = _read_sr_from_cfg(cfg) |
| | return {"model": model, "config": cfg, "sr": sr} |
| |
|
| | |
| | |
| | |
| |
|
| | def build_piper_cmd( |
| | text: str, voice_id: str, to_stdout: bool, |
| | out_path: Optional[Path] = None, |
| | length_scale: float = 1.08, noise_scale: float = 0.35, noise_w: float = 0.90 |
| | ) -> list: |
| | vc = ensure_voice(voice_id) |
| | cmd = [ |
| | *PIPER_CMD, |
| | "-m", str(vc["model"]), |
| | "-c", str(vc["config"]), |
| | "--length-scale", str(length_scale), |
| | "--noise-scale", str(noise_scale), |
| | "--noise-w", str(noise_w), |
| | "--sentence-silence", str(SENTENCE_SILENCE), |
| | ] |
| | if to_stdout: |
| | |
| | cmd += ["-f", "-", "--output-raw"] |
| | else: |
| | if out_path is None: |
| | raise ValueError("out_path required when to_stdout=False") |
| | |
| | cmd += ["-f", str(out_path)] |
| | return cmd |
| |
|
| | async def piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w): |
| | cmd = build_piper_cmd(text, voice, to_stdout=False, out_path=out_path, |
| | length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w) |
| | proc = await asyncio.create_subprocess_exec( |
| | *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE |
| | ) |
| | |
| | proc.stdin.write((text + "\n").encode("utf-8")) |
| | await proc.stdin.drain() |
| | proc.stdin.close() |
| | await proc.wait() |
| | if proc.returncode != 0: |
| | stderr = (await proc.stderr.read()).decode("utf-8", "ignore") |
| | raise RuntimeError(f"Piper failed (code {proc.returncode}).\n{stderr}") |
| |
|
| | async def piper_stream_raw( |
| | text: str, |
| | voice: str, |
| | ws: WebSocket, |
| | sr: int, |
| | channels: int, |
| | length_scale: float, |
| | noise_scale: float, |
| | noise_w: float, |
| | prebuffer_ms: int, |
| | prebuffer_max_wait_ms: int, |
| | ): |
| | """ |
| | Synthesize immediately; stream in *batched, clock-paced* frames: |
| | - Accumulate audio until `prebuffer_ms` (or `prebuffer_max_wait_ms` elapses). |
| | - Then send fixed batches of STREAM_BATCH_MS at a steady cadence. |
| | """ |
| | cmd = build_piper_cmd(text, voice, to_stdout=True, |
| | length_scale=length_scale, noise_scale=noise_scale, noise_w=noise_w) |
| | proc = await asyncio.create_subprocess_exec( |
| | *cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE |
| | ) |
| |
|
| | |
| | proc.stdin.write((text + "\n").encode("utf-8")) |
| | await proc.stdin.drain() |
| | proc.stdin.close() |
| |
|
| | async def pump_stderr(): |
| | try: |
| | while True: |
| | line = await proc.stderr.readline() |
| | if not line: |
| | break |
| | try: |
| | await ws.send_text(json.dumps({"event": "log", "stderr": line.decode("utf-8", "ignore").rstrip()})) |
| | except Exception: |
| | break |
| | except Exception: |
| | pass |
| |
|
| | stderr_task = asyncio.create_task(pump_stderr()) |
| | total = 0 |
| |
|
| | |
| | bytes_per_ms = max(1, int(sr * channels * 2 / 1000)) |
| | batch_bytes = max(bytes_per_ms, int(STREAM_BATCH_MS * bytes_per_ms)) |
| | target_prebuffer_bytes = max(0, int(prebuffer_ms) * bytes_per_ms) |
| |
|
| | buf = bytearray() |
| | started_streaming = False |
| | first_audio_ts = None |
| | pace_start_t = None |
| | batches_sent = 0 |
| |
|
| | try: |
| | while True: |
| | chunk = await proc.stdout.read(8192) |
| | if chunk: |
| | if first_audio_ts is None: |
| | first_audio_ts = time.time() |
| | buf.extend(chunk) |
| |
|
| | |
| | if not started_streaming: |
| | enough = (len(buf) >= target_prebuffer_bytes) if target_prebuffer_bytes > 0 else True |
| | waited = False |
| | if first_audio_ts is not None and prebuffer_max_wait_ms > 0: |
| | waited = ((time.time() - first_audio_ts) * 1000.0) >= prebuffer_max_wait_ms |
| | if enough or waited: |
| | started_streaming = True |
| | pace_start_t = time.time() |
| | batches_sent = 0 |
| |
|
| | |
| | if started_streaming: |
| | while len(buf) >= batch_bytes: |
| | due_t = pace_start_t + (batches_sent * STREAM_BATCH_MS) / 1000.0 |
| | sleep_s = due_t - time.time() |
| | if sleep_s > 0: |
| | await asyncio.sleep(sleep_s) |
| | await ws.send_bytes(buf[:batch_bytes]) |
| | del buf[:batch_bytes] |
| | total += batch_bytes |
| | batches_sent += 1 |
| | continue |
| |
|
| | |
| | if not started_streaming and len(buf) > 0: |
| | started_streaming = True |
| | pace_start_t = time.time() |
| | batches_sent = 0 |
| |
|
| | |
| | while len(buf) >= batch_bytes: |
| | due_t = pace_start_t + (batches_sent * STREAM_BATCH_MS) / 1000.0 |
| | sleep_s = due_t - time.time() |
| | if sleep_s > 0: |
| | await asyncio.sleep(sleep_s) |
| | await ws.send_bytes(buf[:batch_bytes]) |
| | del buf[:batch_bytes] |
| | total += batch_bytes |
| | batches_sent += 1 |
| |
|
| | |
| | if len(buf) > 0: |
| | await ws.send_bytes(bytes(buf)) |
| | total += len(buf) |
| | buf.clear() |
| | break |
| |
|
| | await proc.wait() |
| | await stderr_task |
| |
|
| | if proc.returncode != 0: |
| | rem = await proc.stderr.read() |
| | detail = rem.decode("utf-8", "ignore").strip() |
| | await ws.send_text(json.dumps({"event": "error", "detail": detail or f"piper exited {proc.returncode}"})) |
| | else: |
| | if total == 0: |
| | await ws.send_text(json.dumps({"event": "error", "detail": "No audio produced"})) |
| | else: |
| | await ws.send_text(json.dumps({"event": "done"})) |
| | except WebSocketDisconnect: |
| | try: |
| | proc.kill() |
| | except Exception: |
| | pass |
| | try: |
| | await stderr_task |
| | except Exception: |
| | pass |
| |
|
| | |
| | |
| | |
| |
|
| | app = FastAPI(title="ActualTTS (CPU)") |
| |
|
| | @app.get("/health") |
| | def health(): |
| | voices = [] |
| | if VOICES_DIR.exists(): |
| | for child in VOICES_DIR.iterdir(): |
| | if not child.is_dir(): |
| | continue |
| | name = child.name |
| | base = child / f"{name}.onnx" |
| | cfg = child / f"{name}.onnx.json" |
| | if _file_ok(base, MIN_ONNX_BYTES) and _file_ok(cfg, MIN_JSON_BYTES): |
| | voices.append({"id": name, "sr": _read_sr_from_cfg(cfg)}) |
| | |
| | try: |
| | import numpy, onnxruntime as ort |
| | numpy_version = numpy.__version__ |
| | onnxruntime_version = ort.__version__ |
| | except Exception: |
| | numpy_version = onnxruntime_version = None |
| |
|
| | return { |
| | "ok": True, |
| | "engine": "piper-tts (CLI, CPU)", |
| | "default_voice": DEFAULT_VOICE, |
| | "voice_dir": str(VOICES_DIR), |
| | "available_voices": voices, |
| | "files_dir": str(FILES_DIR), |
| | "numpy": numpy_version, |
| | "onnxruntime": onnxruntime_version, |
| | } |
| |
|
| | @app.get("/") |
| | def root(): |
| | return PlainTextResponse("ActualTTS (CPU) — use POST /speak, GET/POST /speak.wav, or WS /ws/tts") |
| |
|
| | @app.post("/provision") |
| | async def provision(request: Request, x_auth: Optional[str] = Header(None)): |
| | """ |
| | POST JSON: { "voice": "en_US-amy-medium" } |
| | Downloads voice assets if missing. Returns {ok, voice, sr}. |
| | """ |
| | if not _auth_ok(x_auth): |
| | return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401) |
| | try: |
| | body = await request.json() |
| | except Exception: |
| | return JSONResponse({"ok": False, "error": "invalid json"}, status_code=400) |
| | voice = (body.get("voice") or DEFAULT_VOICE).strip() |
| | try: |
| | info = ensure_voice(voice) |
| | return {"ok": True, "voice": voice, "sr": int(info.get("sr", 22050))} |
| | except Exception as e: |
| | return JSONResponse({"ok": False, "error": str(e)}, status_code=500) |
| |
|
| | @app.get("/file/{name}") |
| | def get_file(name: str): |
| | path = FILES_DIR / name |
| | if not path.exists(): |
| | return JSONResponse({"ok": False, "error": "not found"}, status_code=404) |
| | return FileResponse(path) |
| |
|
| | def _validate_text(text: str) -> Optional[str]: |
| | if not text: |
| | return "Missing text" |
| | if len(text) > MAX_TEXT_CHARS: |
| | return f"text too long (>{MAX_TEXT_CHARS} chars)" |
| | return None |
| |
|
| | @app.post("/speak") |
| | async def speak(request: Request, x_auth: Optional[str] = Header(None)): |
| | """ |
| | POST JSON: |
| | { "text": "Hello", "voice": "en_US-libritts-high", |
| | "length_scale": 1.08, "noise_scale": 0.35, "noise_w": 0.90 } |
| | Returns: { "ok": true, "audio_url": "/file/tts-XXXX.wav" } |
| | """ |
| | if not _auth_ok(x_auth): |
| | return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401) |
| | try: |
| | body = await request.json() |
| | except Exception: |
| | return JSONResponse({"detail": "Invalid JSON"}, status_code=400) |
| |
|
| | text = (body.get("text") or "").strip() |
| | err = _validate_text(text) |
| | if err: |
| | return JSONResponse({"detail": err}, status_code=400) |
| |
|
| | voice = (body.get("voice") or DEFAULT_VOICE).strip() |
| | length_scale = float(body.get("length_scale", 1.08)) |
| | noise_scale = float(body.get("noise_scale", 0.35)) |
| | noise_w = float(body.get("noise_w", 0.90)) |
| |
|
| | ts = int(time.time() * 1000) |
| | out_path = FILES_DIR / f"tts-{ts}.wav" |
| |
|
| | try: |
| | ensure_voice(voice) |
| | await piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w) |
| | except Exception as e: |
| | return JSONResponse({"ok": False, "error": str(e)}, status_code=500) |
| |
|
| | return {"ok": True, "audio_url": f"/file/{out_path.name}"} |
| |
|
| | @app.post("/speak.wav") |
| | async def speak_wav_post(request: Request, background_tasks: BackgroundTasks, x_auth: Optional[str] = Header(None)): |
| | """POST JSON -> returns audio/wav directly""" |
| | if not _auth_ok(x_auth): |
| | return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401) |
| | try: |
| | body = await request.json() |
| | except Exception: |
| | return JSONResponse({"detail": "Invalid JSON"}, status_code=400) |
| |
|
| | text = (body.get("text") or "").strip() |
| | err = _validate_text(text) |
| | if err: |
| | return JSONResponse({"detail": err}, status_code=400) |
| |
|
| | voice = (body.get("voice") or DEFAULT_VOICE).strip() |
| | length_scale = float(body.get("length_scale", 1.08)) |
| | noise_scale = float(body.get("noise_scale", 0.35)) |
| | noise_w = float(body.get("noise_w", 0.90)) |
| |
|
| | ts = int(time.time() * 1000) |
| | out_path = FILES_DIR / f"tts-{ts}.wav" |
| |
|
| | try: |
| | ensure_voice(voice) |
| | await piper_to_file(text, voice, out_path, length_scale, noise_scale, noise_w) |
| | except Exception as e: |
| | return JSONResponse({"ok": False, "error": str(e)}, status_code=500) |
| |
|
| | background_tasks.add_task(_safe_unlink, out_path) |
| | return FileResponse(out_path, media_type="audio/wav", filename=out_path.name, background=background_tasks) |
| |
|
| | @app.get("/speak.wav") |
| | async def speak_wav_get( |
| | text: str, |
| | voice: str = DEFAULT_VOICE, |
| | length_scale: float = 1.08, |
| | noise_scale: float = 0.35, |
| | noise_w: float = 0.90, |
| | background_tasks: BackgroundTasks = None, |
| | x_auth: Optional[str] = Header(None), |
| | ): |
| | """GET query -> returns audio/wav directly""" |
| | if not _auth_ok(x_auth): |
| | return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401) |
| |
|
| | text = (text or "").strip() |
| | err = _validate_text(text) |
| | if err: |
| | return JSONResponse({"detail": err}, status_code=400) |
| |
|
| | ts = int(time.time() * 1000) |
| | out_path = FILES_DIR / f"tts-{ts}.wav" |
| |
|
| | try: |
| | ensure_voice(voice.strip()) |
| | await piper_to_file(text, voice.strip(), out_path, float(length_scale), float(noise_scale), float(noise_w)) |
| | except Exception as e: |
| | return JSONResponse({"ok": False, "error": str(e)}, status_code=500) |
| |
|
| | if background_tasks: |
| | background_tasks.add_task(_safe_unlink, out_path) |
| | return FileResponse(out_path, media_type="audio/wav", filename=out_path.name) |
| |
|
| | |
| | @app.get("/debug/voices") |
| | def debug_voices(redownload: bool = Query(False, description="Force re-download bad/missing files")): |
| | out = {"dir": str(VOICES_DIR), "voices": []} |
| | for vid, (_lang, _country, _family, _quality, base) in VOICE_MAP.items(): |
| | vdir = VOICES_DIR / vid |
| | model = vdir / f"{base}.onnx" |
| | cfg = vdir / f"{base}.onnx.json" |
| | info = { |
| | "id": vid, |
| | "model": str(model), "cfg": str(cfg), |
| | "model_exists": model.exists(), "cfg_exists": cfg.exists(), |
| | "model_size": (model.stat().st_size if model.exists() else 0), |
| | "cfg_size": (cfg.stat().st_size if cfg.exists() else 0), |
| | "sr": _read_sr_from_cfg(cfg) if cfg.exists() else None, |
| | } |
| | out["voices"].append(info) |
| |
|
| | if redownload and (not _file_ok(model, MIN_ONNX_BYTES) or not _file_ok(cfg, MIN_JSON_BYTES)): |
| | try: |
| | vdir.mkdir(parents=True, exist_ok=True) |
| | for p in vdir.glob("*"): |
| | p.unlink(missing_ok=True) |
| | ensure_voice(vid) |
| | info["redownloaded"] = True |
| | info["model_size"] = (model.stat().st_size if model.exists() else 0) |
| | info["cfg_size"] = (cfg.stat().st_size if cfg.exists() else 0) |
| | info["sr"] = _read_sr_from_cfg(cfg) if cfg.exists() else None |
| | except Exception as e: |
| | info["redownload_error"] = str(e) |
| | return out |
| |
|
| | |
| | @app.websocket("/ws/tts") |
| | async def ws_tts(ws: WebSocket): |
| | await ws.accept() |
| | voice = DEFAULT_VOICE |
| | length_scale, noise_scale, noise_w = 1.08, 0.35, 0.90 |
| | voice_sr = 22050 |
| | |
| | prebuffer_ms = PREBUFFER_MS |
| | prebuffer_max_wait_ms = PREBUFFER_MAX_WAIT_MS |
| |
|
| | try: |
| | while True: |
| | msg = await ws.receive_text() |
| | try: |
| | data = json.loads(msg) |
| | except Exception: |
| | continue |
| | ev = data.get("event") |
| | if ev == "init": |
| | |
| | token = (data.get("token") or "") |
| | if AUTH_SHARED_SECRET and token != AUTH_SHARED_SECRET: |
| | await ws.send_text(json.dumps({"event": "error", "detail": "unauthorized"})) |
| | await ws.close(); return |
| |
|
| | voice = (data.get("voice") or voice).strip() |
| | |
| | if "length_scale" in data: length_scale = float(data["length_scale"]) |
| | if "noise_scale" in data: noise_scale = float(data["noise_scale"]) |
| | if "noise_w" in data: noise_w = float(data["noise_w"]) |
| | |
| | if "length_scale" not in data and "rate_wpm" in data: |
| | try: |
| | rate_wpm = int(data.get("rate_wpm", 165)) |
| | length_scale = max(0.70, min(1.40, 165.0 / max(100, rate_wpm))) |
| | except Exception: |
| | pass |
| | |
| | if "prebuffer_ms" in data: |
| | try: prebuffer_ms = max(0, int(data["prebuffer_ms"])) |
| | except Exception: pass |
| | if "prebuffer_max_wait_ms" in data: |
| | try: prebuffer_max_wait_ms = max(0, int(data["prebuffer_max_wait_ms"])) |
| | except Exception: pass |
| |
|
| | try: |
| | info = ensure_voice(voice) |
| | voice_sr = int(info.get("sr", 22050)) |
| | except Exception as e: |
| | await ws.send_text(json.dumps({"event": "error", "detail": str(e)})) |
| | await ws.close() |
| | return |
| | |
| | await ws.send_text(json.dumps({"event": "ready", "sr": voice_sr, "channels": DEFAULT_CH})) |
| | elif ev == "speak": |
| | text = (data.get("text") or "").strip() |
| | if not text: |
| | await ws.send_text(json.dumps({"event": "error", "detail": "empty text"})) |
| | continue |
| | if len(text) > MAX_TEXT_CHARS: |
| | await ws.send_text(json.dumps({"event":"error","detail": f"text too long (>{MAX_TEXT_CHARS})"})) |
| | continue |
| | await piper_stream_raw( |
| | text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w, |
| | prebuffer_ms, prebuffer_max_wait_ms |
| | ) |
| | |
| | except WebSocketDisconnect: |
| | return |
| | except Exception as e: |
| | try: |
| | await ws.send_text(json.dumps({"event": "error", "detail": str(e)})) |
| | except Exception: |
| | pass |
| | try: |
| | await ws.close() |
| | except Exception: |
| | pass |
| |
|
| | if __name__ == "__main__": |
| | uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")), reload=False) |
| |
|