Percy3822 commited on
Commit
8df8ab9
·
verified ·
1 Parent(s): ac493b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -308
app.py CHANGED
@@ -1,41 +1,28 @@
1
- import os
2
- import io
3
- import re
4
- import time
5
- import json
6
- import shutil
7
- import pathlib
8
- import asyncio
9
- import tempfile
10
- import subprocess
11
- from typing import Optional, List
12
-
13
- import aiofiles
14
- import requests
15
- from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
16
- from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
17
  from fastapi.middleware.cors import CORSMiddleware
 
 
18
 
19
- # --------------------------
20
  # CONFIG & PATHS (robust writable base)
21
- # --------------------------
22
- import os, pathlib, tempfile
23
-
24
  def _first_writable(candidates):
25
  for p in candidates:
26
  try:
27
  path = pathlib.Path(p).resolve()
28
  path.mkdir(parents=True, exist_ok=True)
29
- # sanity: try to create a tiny temp inside
30
- (path / ".write_test").write_text("ok", encoding="utf-8")
31
- (path / ".write_test").unlink(missing_ok=True)
32
  return path
33
  except Exception:
34
  continue
35
- # last resort: use Python's temp dir
36
- return pathlib.Path(tempfile.gettempdir()).resolve() / "tts_app"
 
37
 
38
- # allow override via env var, else try common writable roots in HF Spaces
39
  _env_base = os.environ.get("TTS_BASE_DIR", "").strip()
40
  _candidates = []
41
  if _env_base:
@@ -43,178 +30,32 @@ if _env_base:
43
  _candidates += ["/data/tts_app", "/tmp/tts_app", "/home/user/tts_app"]
44
 
45
  BASE_DIR = _first_writable(_candidates)
46
- BASE_DIR.mkdir(parents=True, exist_ok=True)
47
-
48
  FILES_DIR = BASE_DIR / "files"
49
  VOICES_DIR = BASE_DIR / "voices"
50
  FILES_DIR.mkdir(parents=True, exist_ok=True)
51
  VOICES_DIR.mkdir(parents=True, exist_ok=True)
52
- # Known, reliable Piper model URLs (tiny selections). You can add more later.
53
- # These are Rhasspy / Piper HF mirror direct archives that contain .onnx + .onnx.json.
54
- KNOWN_VOICES = {
55
- # Natural & lightweight
56
- "en_US-amy-medium": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.tar.gz?download=true",
57
- "en_US-lessac-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/high/en_US-lessac-high.tar.gz?download=true",
58
- "en_US-libritts-high": "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/libritts/high/en_US-libritts-high.tar.gz?download=true",
59
- }
60
-
61
- # Sensible defaults
62
- DEFAULT_VOICE_ORDER = ["en_US-lessac-high", "en_US-libritts-high", "en_US-amy-medium"]
63
- DEFAULT_RATE_WPM = 170
64
-
65
- # --------------------------
66
- # UTILS
67
- # --------------------------
68
- def voice_files(voice_name: str):
69
- """Return model + config paths if present; else None."""
70
- vdir = VOICES_DIR / voice_name
71
- model = vdir / f"{voice_name}.onnx"
72
- cfg = vdir / f"{voice_name}.onnx.json"
73
- if model.exists() and cfg.exists():
74
- return str(model), str(cfg)
75
- return None
76
-
77
- def list_available_voices() -> List[str]:
78
- out = []
79
- for p in VOICES_DIR.iterdir():
80
- if not p.is_dir():
81
- continue
82
- name = p.name
83
- if voice_files(name):
84
- out.append(name)
85
- return sorted(out)
86
-
87
- def ensure_piper():
88
- if not os.path.isfile(PIPER_BIN):
89
- raise RuntimeError("piper CLI not found in container PATH")
90
-
91
- def untar_to_dir(tar_path: str, target_dir: pathlib.Path):
92
- import tarfile
93
- target_dir.mkdir(parents=True, exist_ok=True)
94
- with tarfile.open(tar_path, "r:gz") as tar:
95
- tar.extractall(path=target_dir)
96
-
97
- def download_voice_archive(url: str, dest_dir: pathlib.Path) -> str:
98
- dest_dir.mkdir(parents=True, exist_ok=True)
99
- with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as tmp:
100
- r = requests.get(url, stream=True, timeout=120)
101
- r.raise_for_status()
102
- for chunk in r.iter_content(chunk_size=1 << 20):
103
- if chunk:
104
- tmp.write(chunk)
105
- tmp.flush()
106
- return tmp.name
107
-
108
- def find_first_usable_voice(preferred: Optional[str] = None) -> Optional[str]:
109
- avail = list_available_voices()
110
- if preferred and preferred in avail:
111
- return preferred
112
- for v in DEFAULT_VOICE_ORDER:
113
- if v in avail:
114
- return v
115
- return avail[0] if avail else None
116
-
117
- def normalize_text(text: str) -> str:
118
- t = (text or "").strip()
119
- if not t:
120
- return t
121
- # Ensure final punctuation so Piper doesn’t “trail off”
122
- if t[-1] not in ".!?":
123
- t += "."
124
- return t
125
-
126
- async def run_piper_to_wav(text: str, voice: str, rate_wpm: int) -> str:
127
- """Run piper CLI once, produce a single WAV file path."""
128
- ensure_piper()
129
- files = voice_files(voice)
130
- if not files:
131
- raise RuntimeError(f"Voice '{voice}' not provisioned")
132
- model, cfg = files
133
- out_path = FILES_DIR / f"tts-{int(time.time()*1000)}.wav"
134
- cmd = [
135
- PIPER_BIN,
136
- "--model", model,
137
- "--config", cfg,
138
- "--output_file", str(out_path),
139
- "--length_scale", "1.08",
140
- "--noise_scale", "0.33",
141
- "--noise_w", "0.9",
142
- ]
143
- # Rate via SSML prosody hack: wrap text
144
- # Piper respects SSML rate attributes for some voices; if not, length_scale above helps.
145
- ssml = f"<speak><prosody rate='{rate_wpm}wpm'>{text}</prosody></speak>"
146
-
147
- proc = await asyncio.create_subprocess_exec(
148
- *cmd,
149
- stdin=asyncio.subprocess.PIPE,
150
- stdout=asyncio.subprocess.DEVNULL,
151
- stderr=asyncio.subprocess.PIPE,
152
- )
153
- _, err = await proc.communicate(input=ssml.encode("utf-8"))
154
- if proc.returncode != 0:
155
- raise RuntimeError(f"piper failed: {err.decode('utf-8', 'ignore')}")
156
- return str(out_path)
157
-
158
- async def chunked_ws_speak(ws: WebSocket, text: str, voice: str, rate_wpm: int):
159
- """
160
- Chunk text into sentences; synth each chunk to WAV sequentially,
161
- stream WAV bytes in small blocks so the client can play immediately.
162
- """
163
- # Sentence-ish split
164
- parts = [p.strip() for p in re.split(r'(?<=[\.\!\?])\s+|\n+', text) if p.strip()]
165
- if not parts:
166
- parts = [text]
167
-
168
- # Let the client know sample rate; we don’t know in advance reliably, so we announce late.
169
- for idx, sent in enumerate(parts, 1):
170
- sent = normalize_text(sent)
171
- try:
172
- wav_path = await run_piper_to_wav(sent, voice, rate_wpm)
173
- except Exception as e:
174
- await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
175
- return
176
-
177
- # Peek WAV header to extract SR/channels
178
- import wave
179
- with wave.open(wav_path, "rb") as w:
180
- sr = w.getframerate()
181
- ch = w.getnchannels()
182
- sampw = w.getsampwidth()
183
- frames = w.getnframes()
184
-
185
- if idx == 1:
186
- await ws.send_text(json.dumps({"event": "ready", "sr": sr, "channels": ch}))
187
-
188
- # Stream bytes in ~32 KiB blocks
189
- async with aiofiles.open(wav_path, "rb") as f:
190
- while True:
191
- chunk = await f.read(32768)
192
- if not chunk:
193
- break
194
- await ws.send_bytes(chunk)
195
- await asyncio.sleep(0) # yield
196
 
197
- # optional: delete temp wav to save space
198
- try:
199
- os.remove(wav_path)
200
- except:
201
- pass
202
-
203
- await ws.send_text(json.dumps({"event": "done"}))
204
-
205
- # --------------------------
206
- # FASTAPI APP
207
- # --------------------------
208
- app = FastAPI(title="ActualTTS (Piper CLI, CPU)", version="1.0.0")
209
  app.add_middleware(
210
  CORSMiddleware,
211
- allow_origins=["*"], allow_credentials=True,
212
- allow_methods=[""], allow_headers=[""],
 
 
213
  )
214
 
215
- @app.get("/", response_class=PlainTextResponse)
216
- def root():
217
- return "ActualTTS: use /health, POST /speak, WS /ws/tts, POST /provision"
 
 
 
 
 
 
218
 
219
  @app.get("/health")
220
  def health():
@@ -223,133 +64,52 @@ def health():
223
  "engine": "piper-tts (CLI, CPU)",
224
  "default_voice": None,
225
  "voice_dir": str(VOICES_DIR),
226
- "available_voices": list_available_voices(),
227
  "files_dir": str(FILES_DIR),
228
  }
229
 
230
- @app.post("/provision")
231
- def provision(voice: str = Body(..., embed=True)):
232
- """
233
- Download and extract a Piper voice into VOICES_DIR.
234
- Body: { "voice": "en_US-amy-medium" }
235
- """
236
- url = KNOWN_VOICES.get(voice)
237
- if not url:
238
- return JSONResponse({"ok": False, "error": f"Unknown voice '{voice}'"}, status_code=400)
239
- try:
240
- tar_path = download_voice_archive(url, VOICES_DIR)
241
- target = VOICES_DIR / voice
242
- untar_to_dir(tar_path, target)
243
- os.unlink(tar_path)
244
- if not voice_files(voice):
245
- return JSONResponse({"ok": False, "error": "Provision finished but model files not found"}, status_code=500)
246
- return {"ok": True, "voice": voice, "available_voices": list_available_voices()}
247
- except Exception as e:
248
- return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
249
-
250
  @app.post("/speak")
251
- async def speak(
252
- text: str = Body(..., embed=True),
253
- voice: Optional[str] = Body(None, embed=True),
254
- rate_wpm: int = Body(DEFAULT_RATE_WPM, embed=True),
255
- ):
256
- text = normalize_text(text)
257
- v = voice or find_first_usable_voice()
258
- if not v:
259
- return JSONResponse(
260
- {"ok": False, "error": "No voices available. POST /provision {voice: ...} first."},
261
- status_code=400,
262
- )
263
- try:
264
- wav_path = await run_piper_to_wav(text, v, rate_wpm)
265
- except Exception as e:
266
- return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
267
 
268
- rel = "/file/" + os.path.basename(wav_path)
269
- return {"ok": True, "audio_url": rel, "voice": v}
 
270
 
271
- @app.get("/file/{name}")
272
- async def get_file(name: str):
273
- path = FILES_DIR / name
274
- if not path.exists():
275
- return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
276
- return FileResponse(path, media_type="audio/wav")
277
 
278
- @app.websocket("/ws/tts")
279
- async def ws_tts(ws: WebSocket):
280
- await ws.accept()
281
- voice = None
282
- rate = DEFAULT_RATE_WPM
283
- try:
284
- # Expect an init first
285
- init = await ws.receive_text()
286
- try:
287
- msg = json.loads(init)
288
- except Exception:
289
- await ws.send_text(json.dumps({"event": "error", "detail": "bad init json"}))
290
- await ws.close()
291
- return
292
- if not (isinstance(msg, dict) and msg.get("event") == "init"):
293
- await ws.send_text(json.dumps({"event": "error", "detail": "first message must be {'event':'init'}"}))
294
- await ws.close()
295
- return
296
- # Resolve voice
297
- v_in = msg.get("voice")
298
- if v_in:
299
- if not voice_files(v_in):
300
- await ws.send_text(json.dumps({"event": "error", "detail": f"Voice '{v_in}' not provisioned"}))
301
- await ws.close()
302
- return
303
- voice = v_in
304
- else:
305
- voice = find_first_usable_voice()
306
- if not voice:
307
- await ws.send_text(json.dumps({"event": "error", "detail": "No voices. POST /provision first."}))
308
- await ws.close()
309
- return
310
-
311
- try:
312
- rate = int(msg.get("rate_wpm", DEFAULT_RATE_WPM))
313
- except Exception:
314
- rate = DEFAULT_RATE_WPM
315
 
316
- # Next message must be speak
317
- nxt = await ws.receive_text()
318
- try:
319
- m2 = json.loads(nxt)
320
- except Exception:
321
- await ws.send_text(json.dumps({"event": "error", "detail": "bad speak json"}))
322
- await ws.close()
323
- return
324
- if m2.get("event") != "speak":
325
- await ws.send_text(json.dumps({"event": "error", "detail": "expected {'event':'speak','text':...}"}))
326
- await ws.close()
327
- return
328
-
329
- text = normalize_text(m2.get("text", ""))
330
- if not text:
331
- await ws.send_text(json.dumps({"event": "error", "detail": "empty text"}))
332
- await ws.close()
333
- return
334
-
335
- # Stream
336
- await chunked_ws_speak(ws, text, voice=voice, rate_wpm=rate)
337
 
338
- except WebSocketDisconnect:
339
- return
340
- except Exception as e:
341
- try:
342
- await ws.send_text(json.dumps({"event": "error", "detail": str(e)}))
343
- finally:
344
- try:
345
- await ws.close()
346
- except:
347
- pass
348
 
349
- # For HF Spaces (uvicorn entry)
350
- def start():
351
- import uvicorn
352
- uvicorn.run(app, host="0.0.0.0", port=7860)
353
 
354
- if __name__ == "__main__":
355
- start()
 
 
 
 
 
1
+ import os, pathlib, tempfile, shutil, uuid
2
+ from fastapi import FastAPI, HTTPException, Request
3
+ from fastapi.responses import JSONResponse
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ import subprocess
7
 
8
+ # ----------------------------------
9
  # CONFIG & PATHS (robust writable base)
10
+ # ----------------------------------
 
 
11
  def _first_writable(candidates):
12
  for p in candidates:
13
  try:
14
  path = pathlib.Path(p).resolve()
15
  path.mkdir(parents=True, exist_ok=True)
16
+ test_file = path / ".write_test"
17
+ test_file.write_text("ok", encoding="utf-8")
18
+ test_file.unlink(missing_ok=True)
19
  return path
20
  except Exception:
21
  continue
22
+ fallback = pathlib.Path(tempfile.gettempdir()) / "tts_app"
23
+ fallback.mkdir(parents=True, exist_ok=True)
24
+ return fallback.resolve()
25
 
 
26
  _env_base = os.environ.get("TTS_BASE_DIR", "").strip()
27
  _candidates = []
28
  if _env_base:
 
30
  _candidates += ["/data/tts_app", "/tmp/tts_app", "/home/user/tts_app"]
31
 
32
  BASE_DIR = _first_writable(_candidates)
 
 
33
  FILES_DIR = BASE_DIR / "files"
34
  VOICES_DIR = BASE_DIR / "voices"
35
  FILES_DIR.mkdir(parents=True, exist_ok=True)
36
  VOICES_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # ----------------------------------
39
+ # FASTAPI SETUP
40
+ # ----------------------------------
41
+ app = FastAPI()
 
 
 
 
 
 
 
 
42
  app.add_middleware(
43
  CORSMiddleware,
44
+ allow_origins=["*"],
45
+ allow_credentials=True,
46
+ allow_methods=["*"],
47
+ allow_headers=["*"],
48
  )
49
 
50
+ # ----------------------------------
51
+ # UTILITIES
52
+ # ----------------------------------
53
+ class TTSRequest(BaseModel):
54
+ text: str
55
+ voice: str = "en_US-libritts-high"
56
+ length_scale: float = 1.0
57
+ noise_scale: float = 0.33
58
+ noise_w: float = 0.8
59
 
60
  @app.get("/health")
61
  def health():
 
64
  "engine": "piper-tts (CLI, CPU)",
65
  "default_voice": None,
66
  "voice_dir": str(VOICES_DIR),
67
+ "available_voices": sorted([v.name for v in VOICES_DIR.glob("/*.onnx")]),
68
  "files_dir": str(FILES_DIR),
69
  }
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  @app.post("/speak")
72
+ def speak(body: TTSRequest):
73
+ if not body.text.strip():
74
+ raise HTTPException(status_code=400, detail="Empty text")
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ text_id = str(uuid.uuid4())[:8]
77
+ txt_path = FILES_DIR / f"{text_id}.txt"
78
+ wav_path = FILES_DIR / f"{text_id}.wav"
79
 
80
+ txt_path.write_text(body.text.strip(), encoding="utf-8")
 
 
 
 
 
81
 
82
+ # --------------------------
83
+ # Run piper
84
+ # --------------------------
85
+ voice = body.voice
86
+ voice_path = VOICES_DIR / voice / "model.onnx"
87
+ if not voice_path.exists():
88
+ return JSONResponse(content={"ok": False, "error": f"Voice not found: {voice}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ cmd = [
91
+ "piper",
92
+ "--model", str(voice_path),
93
+ "--output_file", str(wav_path),
94
+ "--length_scale", str(body.length_scale),
95
+ "--noise_scale", str(body.noise_scale),
96
+ "--noise_w", str(body.noise_w),
97
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ try:
100
+ with open(txt_path, "r", encoding="utf-8") as f:
101
+ subprocess.run(cmd, stdin=f, check=True)
102
+ except subprocess.CalledProcessError as e:
103
+ return JSONResponse(content={"ok": False, "error": str(e)})
 
 
 
 
 
104
 
105
+ return {
106
+ "ok": True,
107
+ "audio_url": f"/files/{wav_path.name}",
108
+ }
109
 
110
+ @app.get("/files/{filename}")
111
+ def get_file(filename: str):
112
+ f = FILES_DIR / filename
113
+ if not f.exists():
114
+ raise HTTPException(status_code=404, detail="File not found")
115
+ return JSONResponse(content={"url": f"/files/{filename}"})