Percy3822 commited on
Commit
b447642
·
verified ·
1 Parent(s): d62389c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -77
app.py CHANGED
@@ -1,96 +1,152 @@
1
- import os, json, time, asyncio, subprocess, wave, io
2
- from pathlib import Path
3
- from fastapi import FastAPI, WebSocket, Body
 
 
4
  from fastapi.middleware.cors import CORSMiddleware
5
- from fastapi.responses import JSONResponse
 
 
 
 
 
6
 
7
- VOICE_DIR = Path(os.environ.get("VOICE_DIR","/home/user/voices"))
8
- FILES_DIR = Path(os.environ.get("FILES_DIR","/home/user/files"))
9
- DEFAULT_VOICE = os.environ.get("DEFAULT_VOICE","en_US-amy-medium")
10
- VOICE_ONNX = VOICE_DIR / f"{DEFAULT_VOICE}.onnx"
11
- VOICE_CFG = VOICE_DIR / f"{DEFAULT_VOICE}.onnx.json"
12
 
13
- app = FastAPI(title="TTS (Piper CLI)")
 
 
14
  app.add_middleware(
15
- CORSMiddleware, allow_origins=[""], allow_methods=[""],
16
- allow_headers=["*"], allow_credentials=True,
 
 
17
  )
18
 
19
- with open(VOICE_CFG, "r", encoding="utf-8") as f:
20
- cfg = json.load(f)
21
- SAMPLE_RATE = int(cfg.get("audio",{}).get("sample_rate", 22050))
22
 
23
  @app.get("/health")
24
  def health():
25
  return {
26
- "ok": True, "engine": "piper-cli",
 
27
  "default_voice": DEFAULT_VOICE,
28
- "available_voices": [DEFAULT_VOICE] if VOICE_ONNX.exists() else [],
29
- "sample_rate": SAMPLE_RATE, "files_dir": str(FILES_DIR)
30
  }
31
 
32
- def _spawn_piper(text: str):
33
- cmd = [
34
- "piper",
35
- "--model", str(VOICE_ONNX),
36
- "--config", str(VOICE_CFG),
37
- "--output_raw", "-", # 16-bit PCM, little-endian to stdout
38
- ]
39
- p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
40
- p.stdin.write((text.strip() + "\n").encode("utf-8"))
41
- p.stdin.close()
42
- return p
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  @app.websocket("/ws/tts")
45
  async def ws_tts(ws: WebSocket):
46
  await ws.accept()
47
- # init
48
- init = await ws.receive_text()
49
  try:
50
- data = json.loads(init)
51
- except:
52
- data = {}
53
- # send ready with SR
54
- await ws.send_text(json.dumps({"event":"ready","sr":SAMPLE_RATE}))
55
-
56
- # wait speak
57
- speak = await ws.receive_text()
58
- msg = json.loads(speak)
59
- text = msg.get("text","")
60
-
61
- p = _spawn_piper(text)
62
- loop = asyncio.get_event_loop()
63
-
64
- async def stream_stdout():
65
- while True:
66
- chunk = await loop.run_in_executor(None, p.stdout.read, 4096)
67
- if not chunk: break
68
- await ws.send_bytes(chunk)
69
-
70
- await stream_stdout()
71
- rc = p.wait(timeout=60)
72
- await ws.send_text(json.dumps({"event":"done","rc":rc}))
73
- await ws.close()
74
 
75
- @app.post("/speak")
76
- async def speak(body: dict = Body(...)):
77
- text = body.get("text","").strip()
78
- if not text:
79
- return JSONResponse({"ok":False,"error":"empty text"}, 400)
80
- p = _spawn_piper(text)
81
- raw = p.stdout.read()
82
- p.wait(timeout=60)
83
-
84
- # Wrap RAW PCM to WAV
85
- buf = io.BytesIO()
86
- with wave.open(buf, "wb") as wf:
87
- wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(SAMPLE_RATE)
88
- wf.writeframes(raw)
89
- wav = buf.getvalue()
90
-
91
- fname = f"tts-{int(time.time()*1000)}.wav"
92
- path = FILES_DIR / fname
93
- FILES_DIR.mkdir(parents=True, exist_ok=True)
94
- with open(path, "wb") as f:
95
- f.write(wav)
96
- return {"ok": True, "audio_url": f"/file/{fname}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, io, time, json, asyncio, tempfile, wave, uuid, shlex
2
+ from typing import Optional
3
+
4
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Body
5
+ from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ import anyio
8
+
9
+ APP_NAME = "ActualTTS (espeak-ng)"
10
+ BASE_DIR = "/tmp/actual_tts"
11
+ FILES_DIR = os.path.join(BASE_DIR, "files")
12
+ os.makedirs(FILES_DIR, exist_ok=True)
13
 
14
+ DEFAULT_VOICE = "en-us" # espeak voice id
15
+ DEFAULT_RATE_WPM = 170 # speaking speed
 
 
 
16
 
17
+ app = FastAPI(title=APP_NAME)
18
+
19
+ # CORS: allow local client
20
  app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=["*"],
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
  )
26
 
27
+ @app.get("/", response_class=PlainTextResponse)
28
+ def root():
29
+ return "OK"
30
 
31
  @app.get("/health")
32
  def health():
33
  return {
34
+ "ok": True,
35
+ "engine": "espeak-ng",
36
  "default_voice": DEFAULT_VOICE,
37
+ "files_dir": FILES_DIR,
38
+ "tip": "WebSocket /ws/tts (init, then speak), or POST /speak",
39
  }
40
 
41
+ def espeak_cmd(text: str, voice: str = DEFAULT_VOICE, rate_wpm: int = DEFAULT_RATE_WPM):
42
+ # --stdout makes espeak-ng write a valid WAV to stdout
43
+ # -v voice, -s speed(WPM)
44
+ # We quote text via shell=False + pass list args
45
+ return ["espeak-ng", "--stdout", "-v", voice, "-s", str(rate_wpm), text]
46
+
47
+ async def synth_to_file(text: str, voice: str, rate_wpm: int) -> str:
48
+ """Run espeak-ng once, capture its WAV, write to a file, return path."""
49
+ tmp_path = os.path.join(FILES_DIR, f"tts-{int(time.time()*1000)}.wav")
50
+ proc = await asyncio.create_subprocess_exec(
51
+ *espeak_cmd(text, voice, rate_wpm),
52
+ stdout=asyncio.subprocess.PIPE,
53
+ stderr=asyncio.subprocess.PIPE,
54
+ )
55
+ wav_bytes, err = await proc.communicate()
56
+ if proc.returncode != 0 or not wav_bytes:
57
+ raise RuntimeError(f"espeak-ng failed rc={proc.returncode}, err={err.decode('utf-8','ignore')}")
58
+ with open(tmp_path, "wb") as f:
59
+ f.write(wav_bytes)
60
+ return tmp_path
61
+
62
+ @app.post("/speak")
63
+ async def speak_post(payload: dict = Body(...)):
64
+ text: str = payload.get("text", "")
65
+ voice: str = payload.get("voice") or DEFAULT_VOICE
66
+ rate_wpm: int = int(payload.get("rate_wpm", DEFAULT_RATE_WPM))
67
+ if not text.strip():
68
+ return JSONResponse({"ok": False, "error": "no text"}, status_code=400)
69
+ try:
70
+ path = await synth_to_file(text, voice, rate_wpm)
71
+ except Exception as e:
72
+ return JSONResponse({"ok": False, "error": "Synthesis failed", "detail": str(e)}, status_code=500)
73
+ rel = f"/file/{os.path.basename(path)}"
74
+ return {"ok": True, "audio_url": rel}
75
 
76
+ @app.get("/file/{fname}")
77
+ async def get_file(fname: str):
78
+ path = os.path.join(FILES_DIR, fname)
79
+ if not os.path.isfile(path):
80
+ return JSONResponse({"ok": False, "error": "not found"}, status_code=404)
81
+ return FileResponse(path, media_type="audio/wav")
82
+
83
+ # --------- WebSocket Streaming TTS ----------
84
+ # Protocol:
85
+ # Client sends: {"event":"init","voice":"en-us","rate_wpm":170}
86
+ # Server replies: {"event":"ready","sr":22050}
87
+ # Client sends: {"event":"speak","text":"..."}
88
+ # Server streams: binary frames with WAV bytes as they are produced
89
+ # then {"event":"done"}
90
  @app.websocket("/ws/tts")
91
  async def ws_tts(ws: WebSocket):
92
  await ws.accept()
93
+ voice = DEFAULT_VOICE
94
+ rate_wpm = DEFAULT_RATE_WPM
95
  try:
96
+ # Expect init first
97
+ first = await ws.receive_text()
98
+ msg = json.loads(first)
99
+ if msg.get("event") != "init":
100
+ await ws.send_text(json.dumps({"event":"error","detail":"first message must be {'event':'init',...}"}))
101
+ await ws.close(code=1002)
102
+ return
103
+ if "voice" in msg and msg["voice"]:
104
+ voice = msg["voice"]
105
+ if "rate_wpm" in msg:
106
+ try:
107
+ rate_wpm = int(msg["rate_wpm"])
108
+ except:
109
+ pass
 
 
 
 
 
 
 
 
 
 
110
 
111
+ # Tell client our sample-rate. espeak-ng emits 22050 Hz PCM.
112
+ await ws.send_text(json.dumps({"event":"ready","sr":22050}))
113
+
114
+ # Wait for speak
115
+ nxt = await ws.receive_text()
116
+ data = json.loads(nxt)
117
+ if data.get("event") != "speak" or not data.get("text"):
118
+ await ws.send_text(json.dumps({"event":"error","detail":"need {'event':'speak','text':...}"}))
119
+ await ws.close()
120
+ return
121
+
122
+ text = data["text"]
123
+
124
+ # Spawn espeak-ng and stream its stdout as binary chunks
125
+ proc = await asyncio.create_subprocess_exec(
126
+ *espeak_cmd(text, voice, rate_wpm),
127
+ stdout=asyncio.subprocess.PIPE,
128
+ stderr=asyncio.subprocess.PIPE,
129
+ )
130
+
131
+ # espeak-ng writes a full WAV header then data. We just forward in chunks.
132
+ try:
133
+ while True:
134
+ chunk = await proc.stdout.read(4096)
135
+ if not chunk:
136
+ break
137
+ await ws.send_bytes(chunk)
138
+ rc = await proc.wait()
139
+ if rc != 0:
140
+ err = (await proc.stderr.read()).decode("utf-8","ignore")
141
+ await ws.send_text(json.dumps({"event":"error","detail":f"espeak-ng failed rc={rc}: {err[:200]}" }))
142
+ await ws.send_text(json.dumps({"event":"done"}))
143
+ finally:
144
+ with anyio.move_on_after(0.1):
145
+ proc.kill()
146
+ except WebSocketDisconnect:
147
+ pass
148
+ except Exception as e:
149
+ with anyio.move_on_after(0.1):
150
+ await ws.send_text(json.dumps({"event":"error","detail":str(e)}))
151
+ with anyio.move_on_after(0.1):
152
+ await ws.close()