piper

Sleeping

App Files Files Community

Percy3822 commited on Oct 10, 2025

Commit

5cd7d81

verified ·

1 Parent(s): 5ea3089

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -14

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
 # app.py
 import asyncio
 import json
 import os
@@ -9,7 +12,7 @@ from pathlib import Path
 from typing import Dict, Optional, Tuple
 import uvicorn
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, BackgroundTasks, Query
 from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
 # -------------------------
@@ -28,7 +31,7 @@ def pick_writable_dir(candidates):
             probe.unlink(missing_ok=True)
             return p
         except Exception as e:
-            errs.append(f"{p}: {type(e).__name__}({e})")
     raise RuntimeError("No writable dir. Tried:\n  " + "\n  ".join(errs))
 ENV_DIR = os.getenv("TTS_DATA_DIR")
@@ -80,6 +83,15 @@ STREAM_BATCH_MS  = int(os.getenv("STREAM_BATCH_MS", "100"))            # ~100 ms
 DEFAULT_CH = 1  # mono
 # -------------------------
 # Voice download & checks
 # -------------------------
@@ -88,7 +100,7 @@ HF_REPO_BASE = "https://huggingface.co/rhasspy/piper-voices/resolve"
 HF_REV       = os.getenv("PIPER_VOICES_REV", "main")  # optionally pin a commit hash
 # sanity thresholds (bytes)
-MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000"))  # >= ~5MB (real models are much larger)
 MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000"))     # >= 1KB
 # (lang, country, family, quality, basename)
@@ -325,8 +337,8 @@ def health():
     # optional environment versions
     try:
         import numpy, onnxruntime as ort
-        numpy_version = numpy.__version__
-        onnxruntime_version = ort.__version__
     except Exception:
         numpy_version = onnxruntime_version = None
@@ -345,6 +357,25 @@ def health():
 def root():
     return PlainTextResponse("ActualTTS (CPU) — use POST /speak, GET/POST /speak.wav, or WS /ws/tts")
 @app.get("/file/{name}")
 def get_file(name: str):
     path = FILES_DIR / name
@@ -352,22 +383,32 @@ def get_file(name: str):
         return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
     return FileResponse(path)
 @app.post("/speak")
-async def speak(request: Request):
     """
     POST JSON:
       { "text": "Hello", "voice": "en_US-libritts-high",
         "length_scale": 1.08, "noise_scale": 0.35, "noise_w": 0.90 }
     Returns: { "ok": true, "audio_url": "/file/tts-XXXX.wav" }
     """
     try:
         body = await request.json()
     except Exception:
         return JSONResponse({"detail": "Invalid JSON"}, status_code=400)
     text = (body.get("text") or "").strip()
-    if not text:
-        return JSONResponse({"detail": "Missing text"}, status_code=400)
     voice        = (body.get("voice") or DEFAULT_VOICE).strip()
     length_scale = float(body.get("length_scale", 1.08))
@@ -386,16 +427,19 @@ async def speak(request: Request):
     return {"ok": True, "audio_url": f"/file/{out_path.name}"}
 @app.post("/speak.wav")
-async def speak_wav_post(request: Request, background_tasks: BackgroundTasks):
     """POST JSON -> returns audio/wav directly"""
     try:
         body = await request.json()
     except Exception:
         return JSONResponse({"detail": "Invalid JSON"}, status_code=400)
     text = (body.get("text") or "").strip()
-    if not text:
-        return JSONResponse({"detail": "Missing text"}, status_code=400)
     voice        = (body.get("voice") or DEFAULT_VOICE).strip()
     length_scale = float(body.get("length_scale", 1.08))
@@ -422,11 +466,16 @@ async def speak_wav_get(
     noise_scale: float = 0.35,
     noise_w: float = 0.90,
     background_tasks: BackgroundTasks = None,
 ):
     """GET query -> returns audio/wav directly"""
     text = (text or "").strip()
-    if not text:
-        return JSONResponse({"detail": "Missing text"}, status_code=400)
     ts = int(time.time() * 1000)
     out_path = FILES_DIR / f"tts-{ts}.wav"
@@ -490,10 +539,25 @@ async def ws_tts(ws: WebSocket):
                 continue
             ev = data.get("event")
             if ev == "init":
                 voice = (data.get("voice") or voice).strip()
                 if "length_scale" in data: length_scale = float(data["length_scale"])
                 if "noise_scale"  in data: noise_scale  = float(data["noise_scale"])
                 if "noise_w"      in data: noise_w      = float(data["noise_w"])
                 try:
                     info = ensure_voice(voice)
                     voice_sr = int(info.get("sr", 22050))
@@ -508,6 +572,9 @@ async def ws_tts(ws: WebSocket):
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
                 await piper_stream_raw(text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w)
             # ignore others
     except WebSocketDisconnect:
@@ -523,4 +590,4 @@ async def ws_tts(ws: WebSocket):
             pass
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")), reload=False)

+---
 # app.py
+```python
 import asyncio
 import json
 import os
 from typing import Dict, Optional, Tuple
 import uvicorn
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, BackgroundTasks, Query, Header
 from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
 # -------------------------
             probe.unlink(missing_ok=True)
             return p
         except Exception as e:
+            errs.append(f"{p}: {type(e)._name_}({e})")
     raise RuntimeError("No writable dir. Tried:\n  " + "\n  ".join(errs))
 ENV_DIR = os.getenv("TTS_DATA_DIR")
 DEFAULT_CH = 1  # mono
+# Input clamp (basic DoS protection)
+MAX_TEXT_CHARS = int(os.getenv("MAX_TEXT_CHARS", "800"))
+# Optional shared secret (x-auth header) for internal/protected calls
+AUTH_SHARED_SECRET = (os.getenv("AUTH_SHARED_SECRET") or "").strip()
+def _auth_ok(x_auth: Optional[str]) -> bool:
+    return (not AUTH_SHARED_SECRET) or (x_auth == AUTH_SHARED_SECRET)
 # -------------------------
 # Voice download & checks
 # -------------------------
 HF_REV       = os.getenv("PIPER_VOICES_REV", "main")  # optionally pin a commit hash
 # sanity thresholds (bytes)
+MIN_ONNX_BYTES = int(os.getenv("MIN_ONNX_BYTES", "5000000"))  # >= ~5MB
 MIN_JSON_BYTES = int(os.getenv("MIN_JSON_BYTES", "1000"))     # >= 1KB
 # (lang, country, family, quality, basename)
     # optional environment versions
     try:
         import numpy, onnxruntime as ort
+        numpy_version = numpy._version_
+        onnxruntime_version = ort._version_
     except Exception:
         numpy_version = onnxruntime_version = None
 def root():
     return PlainTextResponse("ActualTTS (CPU) — use POST /speak, GET/POST /speak.wav, or WS /ws/tts")
+@app.post("/provision")
+async def provision(request: Request, x_auth: Optional[str] = Header(None)):
+    """
+    POST JSON: { "voice": "en_US-amy-medium" }
+    Downloads voice assets if missing. Returns {ok, voice, sr}.
+    """
+    if not _auth_ok(x_auth):
+        return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401)
+    try:
+        body = await request.json()
+    except Exception:
+        return JSONResponse({"ok": False, "error": "invalid json"}, status_code=400)
+    voice = (body.get("voice") or DEFAULT_VOICE).strip()
+    try:
+        info = ensure_voice(voice)
+        return {"ok": True, "voice": voice, "sr": int(info.get("sr", 22050))}
+    except Exception as e:
+        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
 @app.get("/file/{name}")
 def get_file(name: str):
     path = FILES_DIR / name
         return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
     return FileResponse(path)
+def _validate_text(text: str) -> Optional[str]:
+    if not text:
+        return "Missing text"
+    if len(text) > MAX_TEXT_CHARS:
+        return f"text too long (>{MAX_TEXT_CHARS} chars)"
+    return None
 @app.post("/speak")
+async def speak(request: Request, x_auth: Optional[str] = Header(None)):
     """
     POST JSON:
       { "text": "Hello", "voice": "en_US-libritts-high",
         "length_scale": 1.08, "noise_scale": 0.35, "noise_w": 0.90 }
     Returns: { "ok": true, "audio_url": "/file/tts-XXXX.wav" }
     """
+    if not _auth_ok(x_auth):
+        return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401)
     try:
         body = await request.json()
     except Exception:
         return JSONResponse({"detail": "Invalid JSON"}, status_code=400)
     text = (body.get("text") or "").strip()
+    err = _validate_text(text)
+    if err:
+        return JSONResponse({"detail": err}, status_code=400)
     voice        = (body.get("voice") or DEFAULT_VOICE).strip()
     length_scale = float(body.get("length_scale", 1.08))
     return {"ok": True, "audio_url": f"/file/{out_path.name}"}
 @app.post("/speak.wav")
+async def speak_wav_post(request: Request, background_tasks: BackgroundTasks, x_auth: Optional[str] = Header(None)):
     """POST JSON -> returns audio/wav directly"""
+    if not _auth_ok(x_auth):
+        return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401)
     try:
         body = await request.json()
     except Exception:
         return JSONResponse({"detail": "Invalid JSON"}, status_code=400)
     text = (body.get("text") or "").strip()
+    err = _validate_text(text)
+    if err:
+        return JSONResponse({"detail": err}, status_code=400)
     voice        = (body.get("voice") or DEFAULT_VOICE).strip()
     length_scale = float(body.get("length_scale", 1.08))
     noise_scale: float = 0.35,
     noise_w: float = 0.90,
     background_tasks: BackgroundTasks = None,
+    x_auth: Optional[str] = Header(None),
 ):
     """GET query -> returns audio/wav directly"""
+    if not _auth_ok(x_auth):
+        return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401)
     text = (text or "").strip()
+    err = _validate_text(text)
+    if err:
+        return JSONResponse({"detail": err}, status_code=400)
     ts = int(time.time() * 1000)
     out_path = FILES_DIR / f"tts-{ts}.wav"
                 continue
             ev = data.get("event")
             if ev == "init":
+                # optional shared-secret over WS: accept via querystring token or in 'token' field
+                token = (data.get("token") or "")
+                if AUTH_SHARED_SECRET and token != AUTH_SHARED_SECRET:
+                    await ws.send_text(json.dumps({"event": "error", "detail": "unauthorized"}))
+                    await ws.close(); return
                 voice = (data.get("voice") or voice).strip()
+                # Accept explicit params first
                 if "length_scale" in data: length_scale = float(data["length_scale"])
                 if "noise_scale"  in data: noise_scale  = float(data["noise_scale"])
                 if "noise_w"      in data: noise_w      = float(data["noise_w"])
+                # Optional: map rate_wpm → length_scale if user didn't set a custom length_scale
+                if "length_scale" not in data and "rate_wpm" in data:
+                    try:
+                        rate_wpm = int(data.get("rate_wpm", 165))
+                        # crude monotonic mapping: faster WPM → smaller length_scale
+                        length_scale = max(0.70, min(1.40, 165.0 / max(100, rate_wpm)))
+                    except Exception:
+                        pass
                 try:
                     info = ensure_voice(voice)
                     voice_sr = int(info.get("sr", 22050))
                 if not text:
                     await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
                     continue
+                if len(text) > MAX_TEXT_CHARS:
+                    await ws.send_text(json.dumps({"event":"error","detail": f"text too long (>{MAX_TEXT_CHARS})"}))
+                    continue
                 await piper_stream_raw(text, voice, ws, voice_sr, DEFAULT_CH, length_scale, noise_scale, noise_w)
             # ignore others
     except WebSocketDisconnect:
             pass
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "7860")), reload=False)