Percy3822 commited on
Commit
dffc848
·
verified ·
1 Parent(s): 3d86a9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -72
app.py CHANGED
@@ -1,91 +1,117 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
- from fastapi.responses import FileResponse
3
  from pathlib import Path
4
- import uuid
 
 
 
5
  import subprocess
6
- import os
7
- import shutil
8
 
9
  app = FastAPI()
10
 
11
- # ========== Dynamic VOICES_DIR fix ==========
12
- # Try /tmp/tts_app/voices first
13
- base_dirs = [Path("/tmp/tts_app/voices"), Path("/data/voices"), Path("/home/user/voices"), Path.cwd() / "voices"]
14
-
15
- for path in base_dirs:
16
- try:
17
- path.mkdir(parents=True, exist_ok=True)
18
- test_file = path / ".write_test"
19
- test_file.write_text("ok")
20
- test_file.unlink()
21
- VOICES_DIR = path
22
- break
23
- except Exception:
24
- continue
25
- else:
26
- raise RuntimeError("❌ Could not find any writable directory for VOICES_DIR.")
27
-
28
- FILES_DIR = VOICES_DIR.parent / "files"
29
  FILES_DIR.mkdir(parents=True, exist_ok=True)
 
 
30
 
31
- # ========== Piper engine config ==========
32
- VOICE_TAGS = [
33
- "en_US-libritts-high", # Humanlike, CPU-friendly (VITS-based)
34
- "en_US-amy-medium", # Classic Piper Amy
35
- "en_US-lessac-high", # LJSpeech-derived VITS
36
- ]
37
-
38
  @app.get("/health")
39
- def health():
40
  return {
41
  "ok": True,
42
  "engine": "piper-tts (CLI, CPU)",
 
43
  "voice_dir": str(VOICES_DIR),
 
44
  "files_dir": str(FILES_DIR),
45
- "default_voice": None,
46
- "available_voices": VOICE_TAGS,
47
  }
48
 
 
 
 
 
 
 
 
 
 
49
  @app.post("/speak")
50
- async def speak(request: Request):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
- payload = await request.json()
53
- text = payload.get("text", "").strip()
54
- voice = payload.get("voice", VOICE_TAGS[0])
55
- length_scale = float(payload.get("length_scale", 1.08))
56
- noise_scale = float(payload.get("noise_scale", 0.33))
57
- noise_w = float(payload.get("noise_w", 0.8))
58
-
59
- if not text:
60
- raise HTTPException(status_code=400, detail="No text provided")
61
-
62
- output_file = FILES_DIR / f"{uuid.uuid4().hex}.wav"
63
- command = [
64
- "piper",
65
- "--model", f"voices/{voice}.onnx",
66
- "--output_file", str(output_file),
67
- "--text", text,
68
- "--length_scale", str(length_scale),
69
- "--noise_scale", str(noise_scale),
70
- "--noise_w", str(noise_w),
71
- ]
72
-
73
- result = subprocess.run(command, capture_output=True, text=True)
74
- if result.returncode != 0 or not output_file.exists():
75
- raise HTTPException(status_code=500, detail=f"TTS failed: {result.stderr.strip()}")
76
-
77
- return {
78
- "ok": True,
79
- "voice": voice,
80
- "audio_url": f"/file/{output_file.name}"
81
- }
82
 
83
  except Exception as e:
84
- raise HTTPException(status_code=500, detail=str(e))
85
-
86
- @app.get("/file/{filename}")
87
- async def get_file(filename: str):
88
- path = FILES_DIR / filename
89
- if path.exists():
90
- return FileResponse(path)
91
- raise HTTPException(status_code=404, detail="File not found")
 
1
+ # app.py
2
+ import os, io, time, uuid, shutil, tempfile
3
  from pathlib import Path
4
+ from fastapi import FastAPI, Request
5
+ from fastapi.responses import FileResponse, StreamingResponse, JSONResponse
6
+ from fastapi.websockets import WebSocket
7
+ from pydantic import BaseModel
8
  import subprocess
9
+ import wave
 
10
 
11
  app = FastAPI()
12
 
13
+ # ========== CONFIG ==========
14
+ ROOT_DIR = Path(_file_).parent.resolve()
15
+ VOICES_DIR = ROOT_DIR / "voices"
16
+ FILES_DIR = ROOT_DIR / "files"
17
+ VOICES_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  FILES_DIR.mkdir(parents=True, exist_ok=True)
19
+ DEFAULT_VOICE = "en_US-libritts-high" # Replace with actual voice file
20
+ DEFAULT_SR = 22050
21
 
22
+ # ========== HEALTH ==========
 
 
 
 
 
 
23
  @app.get("/health")
24
+ async def health():
25
  return {
26
  "ok": True,
27
  "engine": "piper-tts (CLI, CPU)",
28
+ "default_voice": DEFAULT_VOICE,
29
  "voice_dir": str(VOICES_DIR),
30
+ "available_voices": [f.stem for f in VOICES_DIR.glob("*.onnx")],
31
  "files_dir": str(FILES_DIR),
 
 
32
  }
33
 
34
+ # ========== SPEAK (HTTP) ==========
35
+ class SpeakRequest(BaseModel):
36
+ text: str
37
+ voice: str = DEFAULT_VOICE
38
+ rate_wpm: int = 170
39
+ length_scale: float = 1.0
40
+ noise_scale: float = 0.33
41
+ noise_w: float = 0.5
42
+
43
  @app.post("/speak")
44
+ async def speak(req: SpeakRequest):
45
+ out_path = FILES_DIR / f"{uuid.uuid4().hex}.wav"
46
+ voice_path = VOICES_DIR / f"{req.voice}.onnx"
47
+
48
+ if not voice_path.exists():
49
+ return JSONResponse({"error": "Voice not found."}, status_code=404)
50
+
51
+ cmd = [
52
+ "piper",
53
+ "--model", str(voice_path),
54
+ "--output_file", str(out_path),
55
+ "--text", req.text,
56
+ "--length_scale", str(req.length_scale),
57
+ "--noise_scale", str(req.noise_scale),
58
+ "--noise_w", str(req.noise_w),
59
+ ]
60
+ subprocess.run(cmd, check=True)
61
+
62
+ return FileResponse(out_path, media_type="audio/wav")
63
+
64
+ # ========== STREAM (WebSocket) ==========
65
+ @app.websocket("/ws/tts")
66
+ async def tts_stream(websocket: WebSocket):
67
+ await websocket.accept()
68
+ voice = DEFAULT_VOICE
69
+ settings = {
70
+ "length_scale": 1.0,
71
+ "noise_scale": 0.33,
72
+ "noise_w": 0.5
73
+ }
74
+
75
+ temp_file = FILES_DIR / f"{uuid.uuid4().hex}.wav"
76
+ wave_writer = wave.open(str(temp_file), 'wb')
77
+ wave_writer.setnchannels(1)
78
+ wave_writer.setsampwidth(2)
79
+ wave_writer.setframerate(DEFAULT_SR)
80
+
81
  try:
82
+ while True:
83
+ data = await websocket.receive_text()
84
+
85
+ if data.startswith("{") and "text" in data:
86
+ import json
87
+ payload = json.loads(data)
88
+ text = payload.get("text", "")
89
+ voice = payload.get("voice", DEFAULT_VOICE)
90
+ settings["length_scale"] = float(payload.get("length_scale", 1.0))
91
+ settings["noise_scale"] = float(payload.get("noise_scale", 0.33))
92
+ settings["noise_w"] = float(payload.get("noise_w", 0.5))
93
+
94
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
95
+ cmd = [
96
+ "piper",
97
+ "--model", str(VOICES_DIR / f"{voice}.onnx"),
98
+ "--output_file", tmp.name,
99
+ "--text", text,
100
+ "--length_scale", str(settings["length_scale"]),
101
+ "--noise_scale", str(settings["noise_scale"]),
102
+ "--noise_w", str(settings["noise_w"]),
103
+ ]
104
+ subprocess.run(cmd, check=True)
105
+
106
+ with open(tmp.name, "rb") as f:
107
+ audio = f.read()
108
+ await websocket.send_bytes(audio)
109
+
110
+ tmp.close()
111
+ os.unlink(tmp.name)
112
 
113
  except Exception as e:
114
+ print(f"[TTS WS Error] {e}")
115
+ finally:
116
+ wave_writer.close()
117
+ await websocket.close()