Update app.py
Browse files
app.py
CHANGED
|
@@ -1,91 +1,117 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
from pathlib import Path
|
| 4 |
-
import
|
|
|
|
|
|
|
|
|
|
| 5 |
import subprocess
|
| 6 |
-
import
|
| 7 |
-
import shutil
|
| 8 |
|
| 9 |
app = FastAPI()
|
| 10 |
|
| 11 |
-
# ==========
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
try:
|
| 17 |
-
path.mkdir(parents=True, exist_ok=True)
|
| 18 |
-
test_file = path / ".write_test"
|
| 19 |
-
test_file.write_text("ok")
|
| 20 |
-
test_file.unlink()
|
| 21 |
-
VOICES_DIR = path
|
| 22 |
-
break
|
| 23 |
-
except Exception:
|
| 24 |
-
continue
|
| 25 |
-
else:
|
| 26 |
-
raise RuntimeError("❌ Could not find any writable directory for VOICES_DIR.")
|
| 27 |
-
|
| 28 |
-
FILES_DIR = VOICES_DIR.parent / "files"
|
| 29 |
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
# ==========
|
| 32 |
-
VOICE_TAGS = [
|
| 33 |
-
"en_US-libritts-high", # Humanlike, CPU-friendly (VITS-based)
|
| 34 |
-
"en_US-amy-medium", # Classic Piper Amy
|
| 35 |
-
"en_US-lessac-high", # LJSpeech-derived VITS
|
| 36 |
-
]
|
| 37 |
-
|
| 38 |
@app.get("/health")
|
| 39 |
-
def health():
|
| 40 |
return {
|
| 41 |
"ok": True,
|
| 42 |
"engine": "piper-tts (CLI, CPU)",
|
|
|
|
| 43 |
"voice_dir": str(VOICES_DIR),
|
|
|
|
| 44 |
"files_dir": str(FILES_DIR),
|
| 45 |
-
"default_voice": None,
|
| 46 |
-
"available_voices": VOICE_TAGS,
|
| 47 |
}
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
@app.post("/speak")
|
| 50 |
-
async def speak(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
path = FILES_DIR / filename
|
| 89 |
-
if path.exists():
|
| 90 |
-
return FileResponse(path)
|
| 91 |
-
raise HTTPException(status_code=404, detail="File not found")
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import os, io, time, uuid, shutil, tempfile
|
| 3 |
from pathlib import Path
|
| 4 |
+
from fastapi import FastAPI, Request
|
| 5 |
+
from fastapi.responses import FileResponse, StreamingResponse, JSONResponse
|
| 6 |
+
from fastapi.websockets import WebSocket
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
import subprocess
|
| 9 |
+
import wave
|
|
|
|
| 10 |
|
| 11 |
app = FastAPI()
|
| 12 |
|
| 13 |
+
# ========== CONFIG ==========
|
| 14 |
+
ROOT_DIR = Path(_file_).parent.resolve()
|
| 15 |
+
VOICES_DIR = ROOT_DIR / "voices"
|
| 16 |
+
FILES_DIR = ROOT_DIR / "files"
|
| 17 |
+
VOICES_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
FILES_DIR.mkdir(parents=True, exist_ok=True)
|
| 19 |
+
DEFAULT_VOICE = "en_US-libritts-high" # Replace with actual voice file
|
| 20 |
+
DEFAULT_SR = 22050
|
| 21 |
|
| 22 |
+
# ========== HEALTH ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@app.get("/health")
|
| 24 |
+
async def health():
|
| 25 |
return {
|
| 26 |
"ok": True,
|
| 27 |
"engine": "piper-tts (CLI, CPU)",
|
| 28 |
+
"default_voice": DEFAULT_VOICE,
|
| 29 |
"voice_dir": str(VOICES_DIR),
|
| 30 |
+
"available_voices": [f.stem for f in VOICES_DIR.glob("*.onnx")],
|
| 31 |
"files_dir": str(FILES_DIR),
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
+
# ========== SPEAK (HTTP) ==========
|
| 35 |
+
class SpeakRequest(BaseModel):
|
| 36 |
+
text: str
|
| 37 |
+
voice: str = DEFAULT_VOICE
|
| 38 |
+
rate_wpm: int = 170
|
| 39 |
+
length_scale: float = 1.0
|
| 40 |
+
noise_scale: float = 0.33
|
| 41 |
+
noise_w: float = 0.5
|
| 42 |
+
|
| 43 |
@app.post("/speak")
|
| 44 |
+
async def speak(req: SpeakRequest):
|
| 45 |
+
out_path = FILES_DIR / f"{uuid.uuid4().hex}.wav"
|
| 46 |
+
voice_path = VOICES_DIR / f"{req.voice}.onnx"
|
| 47 |
+
|
| 48 |
+
if not voice_path.exists():
|
| 49 |
+
return JSONResponse({"error": "Voice not found."}, status_code=404)
|
| 50 |
+
|
| 51 |
+
cmd = [
|
| 52 |
+
"piper",
|
| 53 |
+
"--model", str(voice_path),
|
| 54 |
+
"--output_file", str(out_path),
|
| 55 |
+
"--text", req.text,
|
| 56 |
+
"--length_scale", str(req.length_scale),
|
| 57 |
+
"--noise_scale", str(req.noise_scale),
|
| 58 |
+
"--noise_w", str(req.noise_w),
|
| 59 |
+
]
|
| 60 |
+
subprocess.run(cmd, check=True)
|
| 61 |
+
|
| 62 |
+
return FileResponse(out_path, media_type="audio/wav")
|
| 63 |
+
|
| 64 |
+
# ========== STREAM (WebSocket) ==========
|
| 65 |
+
@app.websocket("/ws/tts")
|
| 66 |
+
async def tts_stream(websocket: WebSocket):
|
| 67 |
+
await websocket.accept()
|
| 68 |
+
voice = DEFAULT_VOICE
|
| 69 |
+
settings = {
|
| 70 |
+
"length_scale": 1.0,
|
| 71 |
+
"noise_scale": 0.33,
|
| 72 |
+
"noise_w": 0.5
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
temp_file = FILES_DIR / f"{uuid.uuid4().hex}.wav"
|
| 76 |
+
wave_writer = wave.open(str(temp_file), 'wb')
|
| 77 |
+
wave_writer.setnchannels(1)
|
| 78 |
+
wave_writer.setsampwidth(2)
|
| 79 |
+
wave_writer.setframerate(DEFAULT_SR)
|
| 80 |
+
|
| 81 |
try:
|
| 82 |
+
while True:
|
| 83 |
+
data = await websocket.receive_text()
|
| 84 |
+
|
| 85 |
+
if data.startswith("{") and "text" in data:
|
| 86 |
+
import json
|
| 87 |
+
payload = json.loads(data)
|
| 88 |
+
text = payload.get("text", "")
|
| 89 |
+
voice = payload.get("voice", DEFAULT_VOICE)
|
| 90 |
+
settings["length_scale"] = float(payload.get("length_scale", 1.0))
|
| 91 |
+
settings["noise_scale"] = float(payload.get("noise_scale", 0.33))
|
| 92 |
+
settings["noise_w"] = float(payload.get("noise_w", 0.5))
|
| 93 |
+
|
| 94 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
| 95 |
+
cmd = [
|
| 96 |
+
"piper",
|
| 97 |
+
"--model", str(VOICES_DIR / f"{voice}.onnx"),
|
| 98 |
+
"--output_file", tmp.name,
|
| 99 |
+
"--text", text,
|
| 100 |
+
"--length_scale", str(settings["length_scale"]),
|
| 101 |
+
"--noise_scale", str(settings["noise_scale"]),
|
| 102 |
+
"--noise_w", str(settings["noise_w"]),
|
| 103 |
+
]
|
| 104 |
+
subprocess.run(cmd, check=True)
|
| 105 |
+
|
| 106 |
+
with open(tmp.name, "rb") as f:
|
| 107 |
+
audio = f.read()
|
| 108 |
+
await websocket.send_bytes(audio)
|
| 109 |
+
|
| 110 |
+
tmp.close()
|
| 111 |
+
os.unlink(tmp.name)
|
| 112 |
|
| 113 |
except Exception as e:
|
| 114 |
+
print(f"[TTS WS Error] {e}")
|
| 115 |
+
finally:
|
| 116 |
+
wave_writer.close()
|
| 117 |
+
await websocket.close()
|
|
|
|
|
|
|
|
|
|
|
|