Percy3822 commited on
Commit
565b0ed
·
verified ·
1 Parent(s): 579def3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, time, uuid, asyncio, subprocess, shlex
2
+ from pathlib import Path
3
+ from typing import List, Dict, Optional
4
+
5
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
6
+ from fastapi.responses import JSONResponse, FileResponse
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.staticfiles import StaticFiles
9
+
10
+ VOICE_DIR = Path(os.environ.get("VOICE_DIR", "/home/user/voices"))
11
+ FILES_DIR = Path(os.environ.get("FILES_DIR", "/tmp/tts_app/files"))
12
+
13
+ for d in (VOICE_DIR, FILES_DIR):
14
+ d.mkdir(parents=True, exist_ok=True)
15
+
16
+ def list_voices() -> List[str]:
17
+ # voice = pair of .onnx and .onnx.json with same base
18
+ voices = []
19
+ for onnx in VOICE_DIR.glob("*.onnx"):
20
+ cfg = onnx.with_suffix(onnx.suffix + ".json")
21
+ if cfg.exists():
22
+ voices.append(onnx.stem) # e.g. "en_US-amy-medium.onnx" -> "en_US-amy-medium"
23
+ # strip trailing ".onnx" from stem if present
24
+ cleaned = []
25
+ for v in voices:
26
+ cleaned.append(v.replace(".onnx", "")) # tolerate different stems
27
+ return sorted(set(cleaned))
28
+
29
+ def voice_paths(voice: str):
30
+ # accept either "en_US-amy-medium" or "en_US-amy-medium.onnx"
31
+ base = voice.replace(".onnx", "")
32
+ onnx = VOICE_DIR / f"{base}.onnx"
33
+ cfg = VOICE_DIR / f"{base}.onnx.json"
34
+ if not onnx.exists() or not cfg.exists():
35
+ raise FileNotFoundError(f"Voice '{voice}' not found. Available: {list_voices()}")
36
+ return onnx, cfg
37
+
38
+ def read_sample_rate(cfg_path: Path) -> int:
39
+ try:
40
+ cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
41
+ # common piper key
42
+ sr = cfg.get("audio", {}).get("sample_rate") or cfg.get("sample_rate") or 22050
43
+ return int(sr)
44
+ except Exception:
45
+ return 22050
46
+
47
+ app = FastAPI(title="TTS (Piper CLI)")
48
+
49
+ # Allow browser clients
50
+ app.add_middleware(
51
+ CORSMiddleware,
52
+ allow_origins=[""], allow_credentials=True, allow_methods=[""], allow_headers=["*"],
53
+ )
54
+
55
+ # serve generated wavs
56
+ app.mount("/file", StaticFiles(directory=str(FILES_DIR)), name="file")
57
+
58
+ @app.get("/health")
59
+ def health():
60
+ voices = list_voices()
61
+ return {
62
+ "ok": True,
63
+ "engine": "piper-tts (CLI, CPU)",
64
+ "available_voices": voices,
65
+ "default_voice": voices[0] if voices else None,
66
+ "files_dir": str(FILES_DIR),
67
+ "voice_dir": str(VOICE_DIR),
68
+ }
69
+
70
+ @app.post("/speak")
71
+ async def speak(body: Dict = Body(...)):
72
+ """
73
+ HTTP fallback: synthesize full WAV, then return URL.
74
+ body = { "text": "...", "voice": "en_US-amy-medium" }
75
+ """
76
+ text = (body.get("text") or "").strip()
77
+ voice = (body.get("voice") or (list_voices()[0] if list_voices() else None))
78
+ if not text:
79
+ return JSONResponse({"ok": False, "error": "No text"}, status_code=400)
80
+ if not voice:
81
+ return JSONResponse({"ok": False, "error": "No voices available"}, status_code=500)
82
+
83
+ try:
84
+ onnx, cfg = voice_paths(voice)
85
+ except FileNotFoundError as e:
86
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=400)
87
+
88
+ out_name = f"tts-{int(time.time()*1000)}-{uuid.uuid4().hex[:8]}.wav"
89
+ out_path = FILES_DIR / out_name
90
+
91
+ cmd = f"piper --model {shlex.quote(str(onnx))} --config {shlex.quote(str(cfg))} --output_file {shlex.quote(str(out_path))}"
92
+ try:
93
+ p = subprocess.run(
94
+ shlex.split(cmd),
95
+ input=text.encode("utf-8"),
96
+ stdout=subprocess.PIPE,
97
+ stderr=subprocess.PIPE,
98
+ check=True,
99
+ )
100
+ except subprocess.CalledProcessError as e:
101
+ return JSONResponse({"ok": False, "error": "Piper failed", "detail": e.stderr.decode("utf-8","ignore")}, status_code=500)
102
+
103
+ return {"ok": True, "audio_url": f"/file/{out_name}"}
104
+
105
+ @app.websocket("/ws/tts")
106
+ async def ws_tts(ws: WebSocket):
107
+ await ws.accept()
108
+ try:
109
+ # 1) init from client
110
+ init = await ws.receive_json()
111
+ if init.get("type") != "init":
112
+ await ws.send_json({"event": "error", "error": "Expected init"})
113
+ await ws.close()
114
+ return
115
+
116
+ voice = init.get("voice") or (list_voices()[0] if list_voices() else None)
117
+ if not voice:
118
+ await ws.send_json({"event": "error", "error": "No voices available"})
119
+ await ws.close()
120
+ return
121
+
122
+ try:
123
+ onnx, cfg = voice_paths(voice)
124
+ except FileNotFoundError as e:
125
+ await ws.send_json({"event": "error", "error": str(e)})
126
+ await ws.close()
127
+ return
128
+
129
+ sr = read_sample_rate(cfg)
130
+ await ws.send_json({"event": "ready", "sr": sr})
131
+
132
+ # 2) wait speak message
133
+ msg = await ws.receive_json()
134
+ if msg.get("type") != "speak" or not msg.get("text"):
135
+ await ws.send_json({"event": "error", "error": "Expected speak with text"})
136
+ await ws.close()
137
+ return
138
+
139
+ text = msg["text"]
140
+
141
+ # 3) start Piper in raw streaming mode, read stdout chunks and forward as binary PCM
142
+ cmd = [
143
+ "piper",
144
+ "--model", str(onnx),
145
+ "--config", str(cfg),
146
+ "--output_raw", "-", # 16-bit PCM little-endian
147
+ "--sentence_silence", "0.2",
148
+ ]
149
+ proc = await asyncio.create_subprocess_exec(
150
+ *cmd,
151
+ stdin=asyncio.subprocess.PIPE,
152
+ stdout=asyncio.subprocess.PIPE,
153
+ stderr=asyncio.subprocess.PIPE,
154
+ )
155
+
156
+ # feed JSONL with text
157
+ payload = json.dumps({"text": text}) + "\n"
158
+ proc.stdin.write(payload.encode("utf-8"))
159
+ await proc.stdin.drain()
160
+ proc.stdin.close()
161
+
162
+ async def pump_stdout():
163
+ try:
164
+ while True:
165
+ chunk = await proc.stdout.read(4096)
166
+ if not chunk:
167
+ break
168
+ # send as binary frame
169
+ await ws.send_bytes(chunk)
170
+ except Exception as e:
171
+ # swallow to allow clean close
172
+ pass
173
+
174
+ async def wait_and_emit():
175
+ rc = await proc.wait()
176
+ # flush any stderr as info event (not fatal)
177
+ try:
178
+ err = await proc.stderr.read()
179
+ except Exception:
180
+ err = b""
181
+ await ws.send_json({"event": "done", "rc": rc, "stderr": err.decode("utf-8","ignore")})
182
+
183
+ await asyncio.gather(pump_stdout(), wait_and_emit())
184
+
185
+ except WebSocketDisconnect:
186
+ pass
187
+ except Exception as e:
188
+ try:
189
+ await ws.send_json({"event":"error","error":repr(e)})
190
+ except Exception:
191
+ pass
192
+ finally:
193
+ try:
194
+ await ws.close()
195
+ except Exception:
196
+ pass