piper

Sleeping

App Files Files Community

Percy3822 commited on Sep 5, 2025

Commit

8df8ab9

verified ·

1 Parent(s): ac493b7

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -308

app.py CHANGED Viewed

@@ -1,41 +1,28 @@
-import os
-import io
-import re
-import time
-import json
-import shutil
-import pathlib
-import asyncio
-import tempfile
-import subprocess
-from typing import Optional, List
-import aiofiles
-import requests
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
-from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
 from fastapi.middleware.cors import CORSMiddleware
-# --------------------------
 # CONFIG & PATHS (robust writable base)
-# --------------------------
-import os, pathlib, tempfile
 def _first_writable(candidates):
     for p in candidates:
         try:
             path = pathlib.Path(p).resolve()
             path.mkdir(parents=True, exist_ok=True)
-            # sanity: try to create a tiny temp inside
-            (path / ".write_test").write_text("ok", encoding="utf-8")
-            (path / ".write_test").unlink(missing_ok=True)
             return path
         except Exception:
             continue
-    # last resort: use Python's temp dir
-    return pathlib.Path(tempfile.gettempdir()).resolve() / "tts_app"
-# allow override via env var, else try common writable roots in HF Spaces
 _env_base = os.environ.get("TTS_BASE_DIR", "").strip()
 _candidates = []
 if _env_base:
@@ -43,178 +30,32 @@ if _env_base:
 _candidates += ["/data/tts_app", "/tmp/tts_app", "/home/user/tts_app"]
 BASE_DIR = _first_writable(_candidates)
-BASE_DIR.mkdir(parents=True, exist_ok=True)
 FILES_DIR = BASE_DIR / "files"
 VOICES_DIR = BASE_DIR / "voices"
 FILES_DIR.mkdir(parents=True, exist_ok=True)
 VOICES_DIR.mkdir(parents=True, exist_ok=True)
-# Known, reliable Piper model URLs (tiny selections). You can add more later.
-# These are Rhasspy / Piper HF mirror direct archives that contain .onnx + .onnx.json.
-KNOWN_VOICES = {
-    # Natural & lightweight
-    "en_US-amy-medium": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.tar.gz?download=true",
-    "en_US-lessac-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.tar.gz?download=true",
-    "en_US-libritts-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts/high/en_US-libritts-high.tar.gz?download=true",
-}
-# Sensible defaults
-DEFAULT_VOICE_ORDER = ["en_US-lessac-high", "en_US-libritts-high", "en_US-amy-medium"]
-DEFAULT_RATE_WPM = 170
-# --------------------------
-# UTILS
-# --------------------------
-def voice_files(voice_name: str):
-    """Return model + config paths if present; else None."""
-    vdir = VOICES_DIR / voice_name
-    model = vdir / f"{voice_name}.onnx"
-    cfg = vdir / f"{voice_name}.onnx.json"
-    if model.exists() and cfg.exists():
-        return str(model), str(cfg)
-    return None
-def list_available_voices() -> List[str]:
-    out = []
-    for p in VOICES_DIR.iterdir():
-        if not p.is_dir():
-            continue
-        name = p.name
-        if voice_files(name):
-            out.append(name)
-    return sorted(out)
-def ensure_piper():
-    if not os.path.isfile(PIPER_BIN):
-        raise RuntimeError("piper CLI not found in container PATH")
-def untar_to_dir(tar_path: str, target_dir: pathlib.Path):
-    import tarfile
-    target_dir.mkdir(parents=True, exist_ok=True)
-    with tarfile.open(tar_path, "r:gz") as tar:
-        tar.extractall(path=target_dir)
-def download_voice_archive(url: str, dest_dir: pathlib.Path) -> str:
-    dest_dir.mkdir(parents=True, exist_ok=True)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tmp:
-        r = requests.get(url, stream=True, timeout=120)
-        r.raise_for_status()
-        for chunk in r.iter_content(chunk_size=1 << 20):
-            if chunk:
-                tmp.write(chunk)
-        tmp.flush()
-        return tmp.name
-def find_first_usable_voice(preferred: Optional[str] = None) -> Optional[str]:
-    avail = list_available_voices()
-    if preferred and preferred in avail:
-        return preferred
-    for v in DEFAULT_VOICE_ORDER:
-        if v in avail:
-            return v
-    return avail[0] if avail else None
-def normalize_text(text: str) -> str:
-    t = (text or "").strip()
-    if not t:
-        return t
-    # Ensure final punctuation so Piper doesn’t “trail off”
-    if t[-1] not in ".!?":
-        t += "."
-    return t
-async def run_piper_to_wav(text: str, voice: str, rate_wpm: int) -> str:
-    """Run piper CLI once, produce a single WAV file path."""
-    ensure_piper()
-    files = voice_files(voice)
-    if not files:
-        raise RuntimeError(f"Voice '{voice}' not provisioned")
-    model, cfg = files
-    out_path = FILES_DIR / f"tts-{int(time.time()*1000)}.wav"
-    cmd = [
-        PIPER_BIN,
-        "--model", model,
-        "--config", cfg,
-        "--output_file", str(out_path),
-        "--length_scale", "1.08",
-        "--noise_scale", "0.33",
-        "--noise_w", "0.9",
-    ]
-    # Rate via SSML prosody hack: wrap text
-    # Piper respects SSML rate attributes for some voices; if not, length_scale above helps.
-    ssml = f"<speak><prosody rate='{rate_wpm}wpm'>{text}</prosody></speak>"
-    proc = await asyncio.create_subprocess_exec(
-        *cmd,
-        stdin=asyncio.subprocess.PIPE,
-        stdout=asyncio.subprocess.DEVNULL,
-        stderr=asyncio.subprocess.PIPE,
-    )
-    _, err = await proc.communicate(input=ssml.encode("utf-8"))
-    if proc.returncode != 0:
-        raise RuntimeError(f"piper failed: {err.decode('utf-8', 'ignore')}")
-    return str(out_path)
-async def chunked_ws_speak(ws: WebSocket, text: str, voice: str, rate_wpm: int):
-    """
-    Chunk text into sentences; synth each chunk to WAV sequentially,
-    stream WAV bytes in small blocks so the client can play immediately.
-    """
-    # Sentence-ish split
-    parts = [p.strip() for p in re.split(r'(?<=[\.\!\?])\s+|\n+', text) if p.strip()]
-    if not parts:
-        parts = [text]
-    # Let the client know sample rate; we don’t know in advance reliably, so we announce late.
-    for idx, sent in enumerate(parts, 1):
-        sent = normalize_text(sent)
-        try:
-            wav_path = await run_piper_to_wav(sent, voice, rate_wpm)
-        except Exception as e:
-            await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
-            return
-        # Peek WAV header to extract SR/channels
-        import wave
-        with wave.open(wav_path, "rb") as w:
-            sr = w.getframerate()
-            ch = w.getnchannels()
-            sampw = w.getsampwidth()
-            frames = w.getnframes()
-        if idx == 1:
-            await ws.send_text(json.dumps({"event": "ready", "sr": sr, "channels": ch}))
-        # Stream bytes in ~32 KiB blocks
-        async with aiofiles.open(wav_path, "rb") as f:
-            while True:
-                chunk = await f.read(32768)
-                if not chunk:
-                    break
-                await ws.send_bytes(chunk)
-                await asyncio.sleep(0)  # yield
-        # optional: delete temp wav to save space
-        try:
-            os.remove(wav_path)
-        except:
-            pass
-    await ws.send_text(json.dumps({"event": "done"}))
-# --------------------------
-# FASTAPI APP
-# --------------------------
-app = FastAPI(title="ActualTTS (Piper CLI, CPU)", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True,
-    allow_methods=[""], allow_headers=[""],
 )
-@app.get("/", response_class=PlainTextResponse)
-def root():
-    return "ActualTTS: use /health, POST /speak, WS /ws/tts, POST /provision"
 @app.get("/health")
 def health():
@@ -223,133 +64,52 @@ def health():
         "engine": "piper-tts (CLI, CPU)",
         "default_voice": None,
         "voice_dir": str(VOICES_DIR),
-        "available_voices": list_available_voices(),
         "files_dir": str(FILES_DIR),
     }
-@app.post("/provision")
-def provision(voice: str = Body(..., embed=True)):
-    """
-    Download and extract a Piper voice into VOICES_DIR.
-    Body: { "voice": "en_US-amy-medium" }
-    """
-    url = KNOWN_VOICES.get(voice)
-    if not url:
-        return JSONResponse({"ok": False, "error": f"Unknown voice '{voice}'"}, status_code=400)
-    try:
-        tar_path = download_voice_archive(url, VOICES_DIR)
-        target = VOICES_DIR / voice
-        untar_to_dir(tar_path, target)
-        os.unlink(tar_path)
-        if not voice_files(voice):
-            return JSONResponse({"ok": False, "error": "Provision finished but model files not found"}, status_code=500)
-        return {"ok": True, "voice": voice, "available_voices": list_available_voices()}
-    except Exception as e:
-        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
 @app.post("/speak")
-async def speak(
-    text: str = Body(..., embed=True),
-    voice: Optional[str] = Body(None, embed=True),
-    rate_wpm: int = Body(DEFAULT_RATE_WPM, embed=True),
-):
-    text = normalize_text(text)
-    v = voice or find_first_usable_voice()
-    if not v:
-        return JSONResponse(
-            {"ok": False, "error": "No voices available. POST /provision {voice: ...} first."},
-            status_code=400,
-        )
-    try:
-        wav_path = await run_piper_to_wav(text, v, rate_wpm)
-    except Exception as e:
-        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
-    rel = "/file/" + os.path.basename(wav_path)
-    return {"ok": True, "audio_url": rel, "voice": v}
-@app.get("/file/{name}")
-async def get_file(name: str):
-    path = FILES_DIR / name
-    if not path.exists():
-        return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
-    return FileResponse(path, media_type="audio/wav")
-@app.websocket("/ws/tts")
-async def ws_tts(ws: WebSocket):
-    await ws.accept()
-    voice = None
-    rate = DEFAULT_RATE_WPM
-    try:
-        # Expect an init first
-        init = await ws.receive_text()
-        try:
-            msg = json.loads(init)
-        except Exception:
-            await ws.send_text(json.dumps({"event": "error", "detail": "bad init json"}))
-            await ws.close()
-            return
-        if not (isinstance(msg, dict) and msg.get("event") == "init"):
-            await ws.send_text(json.dumps({"event": "error", "detail": "first message must be {'event':'init'}"}))
-            await ws.close()
-            return
-        # Resolve voice
-        v_in = msg.get("voice")
-        if v_in:
-            if not voice_files(v_in):
-                await ws.send_text(json.dumps({"event": "error", "detail": f"Voice '{v_in}' not provisioned"}))
-                await ws.close()
-                return
-            voice = v_in
-        else:
-            voice = find_first_usable_voice()
-            if not voice:
-                await ws.send_text(json.dumps({"event": "error", "detail": "No voices. POST /provision first."}))
-                await ws.close()
-                return
-        try:
-            rate = int(msg.get("rate_wpm", DEFAULT_RATE_WPM))
-        except Exception:
-            rate = DEFAULT_RATE_WPM
-        # Next message must be speak
-        nxt = await ws.receive_text()
-        try:
-            m2 = json.loads(nxt)
-        except Exception:
-            await ws.send_text(json.dumps({"event": "error", "detail": "bad speak json"}))
-            await ws.close()
-            return
-        if m2.get("event") != "speak":
-            await ws.send_text(json.dumps({"event": "error", "detail": "expected {'event':'speak','text':...}"}))
-            await ws.close()
-            return
-        text = normalize_text(m2.get("text", ""))
-        if not text:
-            await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
-            await ws.close()
-            return
-        # Stream
-        await chunked_ws_speak(ws, text, voice=voice, rate_wpm=rate)
-    except WebSocketDisconnect:
-        return
-    except Exception as e:
-        try:
-            await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
-        finally:
-            try:
-                await ws.close()
-            except:
-                pass
-# For HF Spaces (uvicorn entry)
-def start():
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)
-if __name__ == "__main__":
-    start()

+import os, pathlib, tempfile, shutil, uuid
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import subprocess
+# ----------------------------------
 # CONFIG & PATHS (robust writable base)
+# ----------------------------------
 def _first_writable(candidates):
     for p in candidates:
         try:
             path = pathlib.Path(p).resolve()
             path.mkdir(parents=True, exist_ok=True)
+            test_file = path / ".write_test"
+            test_file.write_text("ok", encoding="utf-8")
+            test_file.unlink(missing_ok=True)
             return path
         except Exception:
             continue
+    fallback = pathlib.Path(tempfile.gettempdir()) / "tts_app"
+    fallback.mkdir(parents=True, exist_ok=True)
+    return fallback.resolve()
 _env_base = os.environ.get("TTS_BASE_DIR", "").strip()
 _candidates = []
 if _env_base:
 _candidates += ["/data/tts_app", "/tmp/tts_app", "/home/user/tts_app"]
 BASE_DIR = _first_writable(_candidates)
 FILES_DIR = BASE_DIR / "files"
 VOICES_DIR = BASE_DIR / "voices"
 FILES_DIR.mkdir(parents=True, exist_ok=True)
 VOICES_DIR.mkdir(parents=True, exist_ok=True)
+# ----------------------------------
+# FASTAPI SETUP
+# ----------------------------------
+app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# ----------------------------------
+# UTILITIES
+# ----------------------------------
+class TTSRequest(BaseModel):
+    text: str
+    voice: str = "en_US-libritts-high"
+    length_scale: float = 1.0
+    noise_scale: float = 0.33
+    noise_w: float = 0.8
 @app.get("/health")
 def health():
         "engine": "piper-tts (CLI, CPU)",
         "default_voice": None,
         "voice_dir": str(VOICES_DIR),
+        "available_voices": sorted([v.name for v in VOICES_DIR.glob("/*.onnx")]),
         "files_dir": str(FILES_DIR),
     }
 @app.post("/speak")
+def speak(body: TTSRequest):
+    if not body.text.strip():
+        raise HTTPException(status_code=400, detail="Empty text")
+    text_id = str(uuid.uuid4())[:8]
+    txt_path = FILES_DIR / f"{text_id}.txt"
+    wav_path = FILES_DIR / f"{text_id}.wav"
+    txt_path.write_text(body.text.strip(), encoding="utf-8")
+    # --------------------------
+    # Run piper
+    # --------------------------
+    voice = body.voice
+    voice_path = VOICES_DIR / voice / "model.onnx"
+    if not voice_path.exists():
+        return JSONResponse(content={"ok": False, "error": f"Voice not found: {voice}"})
+    cmd = [
+        "piper",
+        "--model", str(voice_path),
+        "--output_file", str(wav_path),
+        "--length_scale", str(body.length_scale),
+        "--noise_scale", str(body.noise_scale),
+        "--noise_w", str(body.noise_w),
+    ]
+    try:
+        with open(txt_path, "r", encoding="utf-8") as f:
+            subprocess.run(cmd, stdin=f, check=True)
+    except subprocess.CalledProcessError as e:
+        return JSONResponse(content={"ok": False, "error": str(e)})
+    return {
+        "ok": True,
+        "audio_url": f"/files/{wav_path.name}",
+    }
+@app.get("/files/{filename}")
+def get_file(filename: str):
+    f = FILES_DIR / filename
+    if not f.exists():
+        raise HTTPException(status_code=404, detail="File not found")
+    return JSONResponse(content={"url": f"/files/{filename}"})