piper

Sleeping

App Files Files Community

Percy3822 commited on Sep 4, 2025

Commit

565b0ed

verified ·

1 Parent(s): 579def3

Create app.py

Browse files

Files changed (1) hide show

app.py +196 -0

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os, json, time, uuid, asyncio, subprocess, shlex
+from pathlib import Path
+from typing import List, Dict, Optional
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+VOICE_DIR = Path(os.environ.get("VOICE_DIR", "/home/user/voices"))
+FILES_DIR = Path(os.environ.get("FILES_DIR", "/tmp/tts_app/files"))
+for d in (VOICE_DIR, FILES_DIR):
+    d.mkdir(parents=True, exist_ok=True)
+def list_voices() -> List[str]:
+    # voice = pair of .onnx and .onnx.json with same base
+    voices = []
+    for onnx in VOICE_DIR.glob("*.onnx"):
+        cfg = onnx.with_suffix(onnx.suffix + ".json")
+        if cfg.exists():
+            voices.append(onnx.stem)  # e.g. "en_US-amy-medium.onnx" -> "en_US-amy-medium"
+    # strip trailing ".onnx" from stem if present
+    cleaned = []
+    for v in voices:
+        cleaned.append(v.replace(".onnx", ""))  # tolerate different stems
+    return sorted(set(cleaned))
+def voice_paths(voice: str):
+    # accept either "en_US-amy-medium" or "en_US-amy-medium.onnx"
+    base = voice.replace(".onnx", "")
+    onnx = VOICE_DIR / f"{base}.onnx"
+    cfg = VOICE_DIR / f"{base}.onnx.json"
+    if not onnx.exists() or not cfg.exists():
+        raise FileNotFoundError(f"Voice '{voice}' not found. Available: {list_voices()}")
+    return onnx, cfg
+def read_sample_rate(cfg_path: Path) -> int:
+    try:
+        cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
+        # common piper key
+        sr = cfg.get("audio", {}).get("sample_rate") or cfg.get("sample_rate") or 22050
+        return int(sr)
+    except Exception:
+        return 22050
+app = FastAPI(title="TTS (Piper CLI)")
+# Allow browser clients
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[""], allow_credentials=True, allow_methods=[""], allow_headers=["*"],
+)
+# serve generated wavs
+app.mount("/file", StaticFiles(directory=str(FILES_DIR)), name="file")
+@app.get("/health")
+def health():
+    voices = list_voices()
+    return {
+        "ok": True,
+        "engine": "piper-tts (CLI, CPU)",
+        "available_voices": voices,
+        "default_voice": voices[0] if voices else None,
+        "files_dir": str(FILES_DIR),
+        "voice_dir": str(VOICE_DIR),
+    }
+@app.post("/speak")
+async def speak(body: Dict = Body(...)):
+    """
+    HTTP fallback: synthesize full WAV, then return URL.
+    body = { "text": "...", "voice": "en_US-amy-medium" }
+    """
+    text = (body.get("text") or "").strip()
+    voice = (body.get("voice") or (list_voices()[0] if list_voices() else None))
+    if not text:
+        return JSONResponse({"ok": False, "error": "No text"}, status_code=400)
+    if not voice:
+        return JSONResponse({"ok": False, "error": "No voices available"}, status_code=500)
+    try:
+        onnx, cfg = voice_paths(voice)
+    except FileNotFoundError as e:
+        return JSONResponse({"ok": False, "error": str(e)}, status_code=400)
+    out_name = f"tts-{int(time.time()*1000)}-{uuid.uuid4().hex[:8]}.wav"
+    out_path = FILES_DIR / out_name
+    cmd = f"piper --model {shlex.quote(str(onnx))} --config {shlex.quote(str(cfg))} --output_file {shlex.quote(str(out_path))}"
+    try:
+        p = subprocess.run(
+            shlex.split(cmd),
+            input=text.encode("utf-8"),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=True,
+        )
+    except subprocess.CalledProcessError as e:
+        return JSONResponse({"ok": False, "error": "Piper failed", "detail": e.stderr.decode("utf-8","ignore")}, status_code=500)
+    return {"ok": True, "audio_url": f"/file/{out_name}"}
+@app.websocket("/ws/tts")
+async def ws_tts(ws: WebSocket):
+    await ws.accept()
+    try:
+        # 1) init from client
+        init = await ws.receive_json()
+        if init.get("type") != "init":
+            await ws.send_json({"event": "error", "error": "Expected init"})
+            await ws.close()
+            return
+        voice = init.get("voice") or (list_voices()[0] if list_voices() else None)
+        if not voice:
+            await ws.send_json({"event": "error", "error": "No voices available"})
+            await ws.close()
+            return
+        try:
+            onnx, cfg = voice_paths(voice)
+        except FileNotFoundError as e:
+            await ws.send_json({"event": "error", "error": str(e)})
+            await ws.close()
+            return
+        sr = read_sample_rate(cfg)
+        await ws.send_json({"event": "ready", "sr": sr})
+        # 2) wait speak message
+        msg = await ws.receive_json()
+        if msg.get("type") != "speak" or not msg.get("text"):
+            await ws.send_json({"event": "error", "error": "Expected speak with text"})
+            await ws.close()
+            return
+        text = msg["text"]
+        # 3) start Piper in raw streaming mode, read stdout chunks and forward as binary PCM
+        cmd = [
+            "piper",
+            "--model", str(onnx),
+            "--config", str(cfg),
+            "--output_raw", "-",         # 16-bit PCM little-endian
+            "--sentence_silence", "0.2",
+        ]
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        # feed JSONL with text
+        payload = json.dumps({"text": text}) + "\n"
+        proc.stdin.write(payload.encode("utf-8"))
+        await proc.stdin.drain()
+        proc.stdin.close()
+        async def pump_stdout():
+            try:
+                while True:
+                    chunk = await proc.stdout.read(4096)
+                    if not chunk:
+                        break
+                    # send as binary frame
+                    await ws.send_bytes(chunk)
+            except Exception as e:
+                # swallow to allow clean close
+                pass
+        async def wait_and_emit():
+            rc = await proc.wait()
+            # flush any stderr as info event (not fatal)
+            try:
+                err = await proc.stderr.read()
+            except Exception:
+                err = b""
+            await ws.send_json({"event": "done", "rc": rc, "stderr": err.decode("utf-8","ignore")})
+        await asyncio.gather(pump_stdout(), wait_and_emit())
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        try:
+            await ws.send_json({"event":"error","error":repr(e)})
+        except Exception:
+            pass
+    finally:
+        try:
+            await ws.close()
+        except Exception:
+            pass