piper

Sleeping

App Files Files Community

Percy3822 commited on Sep 4, 2025

Commit

b447642

verified ·

1 Parent(s): d62389c

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -77

app.py CHANGED Viewed

@@ -1,96 +1,152 @@
-import os, json, time, asyncio, subprocess, wave, io
-from pathlib import Path
-from fastapi import FastAPI, WebSocket, Body
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-VOICE_DIR = Path(os.environ.get("VOICE_DIR","/home/user/voices"))
-FILES_DIR = Path(os.environ.get("FILES_DIR","/home/user/files"))
-DEFAULT_VOICE = os.environ.get("DEFAULT_VOICE","en_US-amy-medium")
-VOICE_ONNX = VOICE_DIR / f"{DEFAULT_VOICE}.onnx"
-VOICE_CFG = VOICE_DIR / f"{DEFAULT_VOICE}.onnx.json"
-app = FastAPI(title="TTS (Piper CLI)")
 app.add_middleware(
-    CORSMiddleware, allow_origins=[""], allow_methods=[""],
-    allow_headers=["*"], allow_credentials=True,
 )
-with open(VOICE_CFG, "r", encoding="utf-8") as f:
-    cfg = json.load(f)
-SAMPLE_RATE = int(cfg.get("audio",{}).get("sample_rate", 22050))
 @app.get("/health")
 def health():
     return {
-        "ok": True, "engine": "piper-cli",
         "default_voice": DEFAULT_VOICE,
-        "available_voices": [DEFAULT_VOICE] if VOICE_ONNX.exists() else [],
-        "sample_rate": SAMPLE_RATE, "files_dir": str(FILES_DIR)
     }
-def _spawn_piper(text: str):
-    cmd = [
-        "piper",
-        "--model", str(VOICE_ONNX),
-        "--config", str(VOICE_CFG),
-        "--output_raw", "-",   # 16-bit PCM, little-endian to stdout
-    ]
-    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    p.stdin.write((text.strip() + "\n").encode("utf-8"))
-    p.stdin.close()
-    return p
 @app.websocket("/ws/tts")
 async def ws_tts(ws: WebSocket):
     await ws.accept()
-    # init
-    init = await ws.receive_text()
     try:
-        data = json.loads(init)
-    except:
-        data = {}
-    # send ready with SR
-    await ws.send_text(json.dumps({"event":"ready","sr":SAMPLE_RATE}))
-    # wait speak
-    speak = await ws.receive_text()
-    msg = json.loads(speak)
-    text = msg.get("text","")
-    p = _spawn_piper(text)
-    loop = asyncio.get_event_loop()
-    async def stream_stdout():
-        while True:
-            chunk = await loop.run_in_executor(None, p.stdout.read, 4096)
-            if not chunk: break
-            await ws.send_bytes(chunk)
-    await stream_stdout()
-    rc = p.wait(timeout=60)
-    await ws.send_text(json.dumps({"event":"done","rc":rc}))
-    await ws.close()
-@app.post("/speak")
-async def speak(body: dict = Body(...)):
-    text = body.get("text","").strip()
-    if not text:
-        return JSONResponse({"ok":False,"error":"empty text"}, 400)
-    p = _spawn_piper(text)
-    raw = p.stdout.read()
-    p.wait(timeout=60)
-    # Wrap RAW PCM to WAV
-    buf = io.BytesIO()
-    with wave.open(buf, "wb") as wf:
-        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(SAMPLE_RATE)
-        wf.writeframes(raw)
-    wav = buf.getvalue()
-    fname = f"tts-{int(time.time()*1000)}.wav"
-    path = FILES_DIR / fname
-    FILES_DIR.mkdir(parents=True, exist_ok=True)
-    with open(path, "wb") as f:
-        f.write(wav)
-    return {"ok": True, "audio_url": f"/file/{fname}"}

+import os, io, time, json, asyncio, tempfile, wave, uuid, shlex
+from typing import Optional
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
+from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
 from fastapi.middleware.cors import CORSMiddleware
+import anyio
+APP_NAME = "ActualTTS (espeak-ng)"
+BASE_DIR = "/tmp/actual_tts"
+FILES_DIR = os.path.join(BASE_DIR, "files")
+os.makedirs(FILES_DIR, exist_ok=True)
+DEFAULT_VOICE = "en-us"  # espeak voice id
+DEFAULT_RATE_WPM = 170   # speaking speed
+app = FastAPI(title=APP_NAME)
+# CORS: allow local client
 app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+@app.get("/", response_class=PlainTextResponse)
+def root():
+    return "OK"
 @app.get("/health")
 def health():
     return {
+        "ok": True,
+        "engine": "espeak-ng",
         "default_voice": DEFAULT_VOICE,
+        "files_dir": FILES_DIR,
+        "tip": "WebSocket /ws/tts (init, then speak), or POST /speak",
     }
+def espeak_cmd(text: str, voice: str = DEFAULT_VOICE, rate_wpm: int = DEFAULT_RATE_WPM):
+    # --stdout makes espeak-ng write a valid WAV to stdout
+    # -v voice, -s speed(WPM)
+    # We quote text via shell=False + pass list args
+    return ["espeak-ng", "--stdout", "-v", voice, "-s", str(rate_wpm), text]
+async def synth_to_file(text: str, voice: str, rate_wpm: int) -> str:
+    """Run espeak-ng once, capture its WAV, write to a file, return path."""
+    tmp_path = os.path.join(FILES_DIR, f"tts-{int(time.time()*1000)}.wav")
+    proc = await asyncio.create_subprocess_exec(
+        *espeak_cmd(text, voice, rate_wpm),
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    wav_bytes, err = await proc.communicate()
+    if proc.returncode != 0 or not wav_bytes:
+        raise RuntimeError(f"espeak-ng failed rc={proc.returncode}, err={err.decode('utf-8','ignore')}")
+    with open(tmp_path, "wb") as f:
+        f.write(wav_bytes)
+    return tmp_path
+@app.post("/speak")
+async def speak_post(payload: dict = Body(...)):
+    text: str = payload.get("text", "")
+    voice: str = payload.get("voice") or DEFAULT_VOICE
+    rate_wpm: int = int(payload.get("rate_wpm", DEFAULT_RATE_WPM))
+    if not text.strip():
+        return JSONResponse({"ok": False, "error": "no text"}, status_code=400)
+    try:
+        path = await synth_to_file(text, voice, rate_wpm)
+    except Exception as e:
+        return JSONResponse({"ok": False, "error": "Synthesis failed", "detail": str(e)}, status_code=500)
+    rel = f"/file/{os.path.basename(path)}"
+    return {"ok": True, "audio_url": rel}
+@app.get("/file/{fname}")
+async def get_file(fname: str):
+    path = os.path.join(FILES_DIR, fname)
+    if not os.path.isfile(path):
+        return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
+    return FileResponse(path, media_type="audio/wav")
+# --------- WebSocket Streaming TTS ----------
+# Protocol:
+#   Client sends: {"event":"init","voice":"en-us","rate_wpm":170}
+#   Server replies: {"event":"ready","sr":22050}
+#   Client sends: {"event":"speak","text":"..."}
+#   Server streams: binary frames with WAV bytes as they are produced
+#                   then {"event":"done"}
 @app.websocket("/ws/tts")
 async def ws_tts(ws: WebSocket):
     await ws.accept()
+    voice = DEFAULT_VOICE
+    rate_wpm = DEFAULT_RATE_WPM
     try:
+        # Expect init first
+        first = await ws.receive_text()
+        msg = json.loads(first)
+        if msg.get("event") != "init":
+            await ws.send_text(json.dumps({"event":"error","detail":"first message must be {'event':'init',...}"}))
+            await ws.close(code=1002)
+            return
+        if "voice" in msg and msg["voice"]:
+            voice = msg["voice"]
+        if "rate_wpm" in msg:
+            try:
+                rate_wpm = int(msg["rate_wpm"])
+            except:
+                pass
+        # Tell client our sample-rate. espeak-ng emits 22050 Hz PCM.
+        await ws.send_text(json.dumps({"event":"ready","sr":22050}))
+        # Wait for speak
+        nxt = await ws.receive_text()
+        data = json.loads(nxt)
+        if data.get("event") != "speak" or not data.get("text"):
+            await ws.send_text(json.dumps({"event":"error","detail":"need {'event':'speak','text':...}"}))
+            await ws.close()
+            return
+        text = data["text"]
+        # Spawn espeak-ng and stream its stdout as binary chunks
+        proc = await asyncio.create_subprocess_exec(
+            *espeak_cmd(text, voice, rate_wpm),
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        # espeak-ng writes a full WAV header then data. We just forward in chunks.
+        try:
+            while True:
+                chunk = await proc.stdout.read(4096)
+                if not chunk:
+                    break
+                await ws.send_bytes(chunk)
+            rc = await proc.wait()
+            if rc != 0:
+                err = (await proc.stderr.read()).decode("utf-8","ignore")
+                await ws.send_text(json.dumps({"event":"error","detail":f"espeak-ng failed rc={rc}: {err[:200]}" }))
+            await ws.send_text(json.dumps({"event":"done"}))
+        finally:
+            with anyio.move_on_after(0.1):
+                proc.kill()
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        with anyio.move_on_after(0.1):
+            await ws.send_text(json.dumps({"event":"error","detail":str(e)}))
+        with anyio.move_on_after(0.1):
+            await ws.close()