shreyas-joshi Cursor commited on
Commit
f8a7e1d
·
1 Parent(s): bbe8cec

Deploy existing backend in Docker Space on port 7860

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+ FROM python:3.12-slim
3
+
4
+ WORKDIR /app/backend
5
+
6
+ # System deps for lxml/bs4 + general networking
7
+ RUN apt-get update \
8
+ && apt-get install -y --no-install-recommends \
9
+ curl \
10
+ ca-certificates \
11
+ gcc \
12
+ g++ \
13
+ libc6-dev \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install uv
17
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
18
+ && ln -s /root/.local/bin/uv /usr/local/bin/uv
19
+
20
+ # Copy dependency metadata first for better layer caching
21
+ COPY backend/pyproject.toml backend/uv.lock* /app/backend/
22
+
23
+ # Create venv + install deps
24
+ RUN uv venv --python 3.12 \
25
+ && uv sync
26
+
27
+ # Copy backend app code
28
+ COPY backend /app/backend
29
+
30
+ EXPOSE 7860
31
+
32
+ # Keep backend logic unchanged, but bind Space app to 7860.
33
+ CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python -c \"import uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=7860)\""]
backend/Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+ FROM python:3.12-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # System deps for lxml/bs4 + general networking
7
+ RUN apt-get update \
8
+ && apt-get install -y --no-install-recommends \
9
+ curl \
10
+ ca-certificates \
11
+ gcc \
12
+ g++ \
13
+ libc6-dev \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install uv
17
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
18
+ && ln -s /root/.local/bin/uv /usr/local/bin/uv
19
+
20
+ # Copy dependency metadata first for better layer caching
21
+ COPY pyproject.toml uv.lock* /app/
22
+
23
+ # Create venv + install deps
24
+ RUN uv venv --python 3.12 \
25
+ && uv sync
26
+
27
+ # Copy app code
28
+ COPY . /app/
29
+
30
+ EXPOSE 8000
31
+
32
+ # Ensure models exist, then start server (avoid `uv run` here to prevent any
33
+ # auto-sync behavior re-installing CPU onnxruntime).
34
+ CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python server.py"]
backend/download_models.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ # Kokoro v1.0 (recommended): larger voice pack.
5
+ MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
6
+ VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
7
+
8
+ def download_file(url, path):
9
+ print(f"Downloading {url} to {path}...")
10
+ response = requests.get(url, stream=True)
11
+ if response.status_code == 200:
12
+ with open(path, 'wb') as f:
13
+ for chunk in response.iter_content(chunk_size=8192):
14
+ f.write(chunk)
15
+ print(f"Downloaded {path}")
16
+ else:
17
+ print(f"Failed to download {url}")
18
+
19
+ if __name__ == "__main__":
20
+ os.makedirs("models", exist_ok=True)
21
+ if not os.path.exists("models/kokoro-v1.0.onnx"):
22
+ download_file(MODEL_URL, "models/kokoro-v1.0.onnx")
23
+ if not os.path.exists("models/voices-v1.0.bin"):
24
+ download_file(VOICES_URL, "models/voices-v1.0.bin")
backend/pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "ln-tts-backend"
3
+ version = "0.1.0"
4
+ description = "FastAPI backend for LN-TTS (NovelCool scraping + local TTS streaming)"
5
+ requires-python = ">=3.10,<3.13"
6
+ dependencies = [
7
+ "fastapi>=0.128.0",
8
+ "uvicorn[standard]>=0.30.0",
9
+ "aiohttp>=3.9.5",
10
+ "beautifulsoup4>=4.12.3",
11
+ "lxml>=5.2.2",
12
+ "numpy>=1.26.0",
13
+ "onnxruntime>=1.20.0",
14
+ "kokoro-onnx>=0.2.6",
15
+ "requests>=2.32.0",
16
+ ]
17
+
18
+ [tool.uv]
19
+ package = false
backend/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.128.0
2
+ uvicorn[standard]>=0.30.0
3
+ aiohttp>=3.9.5
4
+ beautifulsoup4>=4.12.3
5
+ lxml>=5.2.2
6
+ numpy>=1.26.0
7
+ onnxruntime>=1.20.0
8
+ kokoro-onnx>=0.2.6
9
+ requests>=2.32.0
backend/scraper.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp
2
+ from bs4 import BeautifulSoup
3
+ import re
4
+ from urllib.parse import urljoin
5
+
6
+
7
+ class NovelCoolScraper:
8
+ def __init__(self):
9
+ self.headers = {
10
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
11
+ }
12
+
13
+ async def scrape_chapter(self, url: str):
14
+ async with aiohttp.ClientSession() as session:
15
+ async with session.get(url, headers=self.headers) as response:
16
+ if response.status != 200:
17
+ raise Exception(f"Failed to fetch page: {response.status}")
18
+ html = await response.text()
19
+
20
+ # NovelCool pages can be large; lxml parser is more reliable here.
21
+ soup = BeautifulSoup(html, 'lxml')
22
+
23
+ # Extract Title
24
+ title = "Unknown Chapter"
25
+ title_tag = soup.find('h1')
26
+ if title_tag:
27
+ title = title_tag.get_text(strip=True)
28
+ else:
29
+ page_title = soup.find('title')
30
+ if page_title:
31
+ t = page_title.get_text(strip=True)
32
+ # e.g. "Shadow Slave Chapter 15 - Novel Cool - Best online light novel reading website"
33
+ title = t.split(' - Novel Cool', 1)[0].strip() or t
34
+
35
+ # Extract Content
36
+ # In the HTML variant commonly returned to scripted clients, the actual
37
+ # chapter content lives under: div.site-content > div.overflow-hidden
38
+ content_div = soup.select_one('div.site-content div.overflow-hidden')
39
+
40
+ if not content_div:
41
+ # Fallback: pick the div with the most <p> tags.
42
+ best = None
43
+ best_count = 0
44
+ for div in soup.find_all('div'):
45
+ ps = div.find_all('p')
46
+ if len(ps) > best_count:
47
+ best_count = len(ps)
48
+ best = div
49
+ content_div = best
50
+
51
+ if not content_div:
52
+ raise Exception("Could not find chapter content container")
53
+
54
+ paragraphs = []
55
+ for p in content_div.find_all('p'):
56
+ classes = p.get('class') or []
57
+ txt = p.get_text(' ', strip=True)
58
+ if not txt:
59
+ continue
60
+ if 'chapter-end-mark' in classes or txt.lower().strip() == 'chapter end':
61
+ break
62
+ paragraphs.append(txt)
63
+
64
+ if not paragraphs:
65
+ raw_text = content_div.get_text(separator='\n', strip=True)
66
+ paragraphs = [line for line in raw_text.split('\n') if line.strip()]
67
+
68
+ content = "\n".join(paragraphs)
69
+
70
+ # Extract Next/Prev Links
71
+ next_link = None
72
+ prev_link = None
73
+
74
+ for a in soup.find_all('a', href=True):
75
+ t = a.get_text(" ", strip=True)
76
+ href = a.get('href')
77
+ if not href:
78
+ continue
79
+ if '/chapter/' not in href:
80
+ continue
81
+ if not next_link and 'Next' in t:
82
+ next_link = href
83
+ if not prev_link and 'Prev' in t:
84
+ prev_link = href
85
+ if next_link and prev_link:
86
+ break
87
+
88
+ if next_link:
89
+ next_link = urljoin(url, next_link)
90
+ if prev_link:
91
+ prev_link = urljoin(url, prev_link)
92
+
93
+ return {
94
+ "title": title,
95
+ "content": paragraphs, # Return list of paragraphs for easier chunking
96
+ "next_url": next_link,
97
+ "prev_url": prev_link
98
+ }
99
+
100
+ async def scrape_novel_index(self, novel_url: str):
101
+ """Scrape a NovelCool novel page and return a list of chapter links."""
102
+ async with aiohttp.ClientSession() as session:
103
+ async with session.get(novel_url, headers=self.headers) as response:
104
+ if response.status != 200:
105
+ raise Exception(f"Failed to fetch page: {response.status}")
106
+ html = await response.text()
107
+
108
+ soup = BeautifulSoup(html, 'lxml')
109
+ links = []
110
+ seen = set()
111
+
112
+ for a in soup.find_all('a', href=True):
113
+ href = a.get('href')
114
+ if not href:
115
+ continue
116
+ if '/chapter/' not in href:
117
+ continue
118
+ abs_url = urljoin(novel_url, href)
119
+ if abs_url in seen:
120
+ continue
121
+ seen.add(abs_url)
122
+ title = a.get_text(' ', strip=True)
123
+ if not title:
124
+ # Some chapter links have empty text (icons). Skip.
125
+ continue
126
+ # Best-effort chapter number parsing.
127
+ m = re.search(r"(?:Chapter|C)\s*(\d+)", title, flags=re.IGNORECASE)
128
+ n = int(m.group(1)) if m else None
129
+ links.append({"n": n, "title": title, "url": abs_url})
130
+
131
+ # Sort by chapter number when possible.
132
+ def chapter_key(item):
133
+ n = item.get('n')
134
+ if isinstance(n, int):
135
+ return n
136
+ # fallback: keep stable ordering
137
+ return 10**9
138
+
139
+ links.sort(key=chapter_key)
140
+ return links
141
+
142
+ if __name__ == "__main__":
143
+ import asyncio
144
+ scraper = NovelCoolScraper()
145
+ # Test with user provided URL
146
+ url = "https://www.novelcool.com/chapter/Shadow-Slave-Chapter-15/7332162/"
147
+ try:
148
+ result = asyncio.run(scraper.scrape_chapter(url))
149
+ print(f"Title: {result['title']}")
150
+ print(f"Paragraphs: {len(result['content'])}")
151
+ print(f"Next: {result['next_url']}")
152
+ except Exception as e:
153
+ print(f"Error: {e}")
backend/server.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import json
5
+ import asyncio
6
+ import logging
7
+ from scraper import NovelCoolScraper
8
+ from tts import TTSEngine
9
+ import traceback
10
+ from contextlib import asynccontextmanager
11
+ import time
12
+
13
+ # Serialize logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @asynccontextmanager
19
+ async def lifespan(app: FastAPI):
20
+ # Startup
21
+ try:
22
+ logger.info("Initializing TTS Engine...")
23
+ try:
24
+ import onnxruntime as ort
25
+
26
+ logger.info(f"ONNX Runtime providers: {ort.get_available_providers()}")
27
+ except Exception:
28
+ pass
29
+ app.state.tts = TTSEngine()
30
+ logger.info("TTS Engine initialized.")
31
+ except Exception as e:
32
+ logger.error(f"Failed to initialize TTS Engine: {e}")
33
+ app.state.tts = None
34
+
35
+ app.state.scraper = NovelCoolScraper()
36
+ app.state.novel_index_cache = {}
37
+ yield
38
+ # Shutdown
39
+ app.state.tts = None
40
+ app.state.scraper = None
41
+ app.state.novel_index_cache = None
42
+
43
+
44
+ app = FastAPI(lifespan=lifespan)
45
+
46
+ app.add_middleware(
47
+ CORSMiddleware,
48
+ allow_origins=["*"],
49
+ allow_credentials=False,
50
+ allow_methods=["*"],
51
+ allow_headers=["*"],
52
+ )
53
+
54
+ @app.get("/health")
55
+ async def health():
56
+ return {"ok": True, "tts_ready": app.state.tts is not None}
57
+
58
+
59
+ @app.get("/voices")
60
+ async def voices():
61
+ if not app.state.tts:
62
+ return {"voices": [], "error": "TTS Engine not initialized"}
63
+ return {"voices": app.state.tts.list_voices()}
64
+
65
+
66
+ @app.get("/novel_index")
67
+ async def novel_index(url: str):
68
+ if not url:
69
+ return {"chapters": [], "error": "url is required"}
70
+ chapters = await app.state.scraper.scrape_novel_index(url)
71
+ return {"chapters": chapters}
72
+
73
+
74
+ async def _get_cached_novel_index(novel_url: str):
75
+ """Return cached chapter list for a novel URL, scraping once per TTL."""
76
+ if not novel_url:
77
+ raise HTTPException(status_code=400, detail="url is required")
78
+
79
+ cache = app.state.novel_index_cache
80
+ if cache is None:
81
+ cache = {}
82
+ app.state.novel_index_cache = cache
83
+
84
+ ttl_s = 30 * 60 # 30 minutes
85
+ now = time.monotonic()
86
+ entry = cache.get(novel_url)
87
+ if entry is not None:
88
+ age = now - float(entry.get("ts", 0.0))
89
+ if age < ttl_s:
90
+ return entry.get("chapters") or []
91
+
92
+ chapters = await app.state.scraper.scrape_novel_index(novel_url)
93
+ cache[novel_url] = {"ts": now, "chapters": chapters}
94
+ return chapters
95
+
96
+
97
+ @app.get("/novel_meta")
98
+ async def novel_meta(url: str):
99
+ chapters = await _get_cached_novel_index(url)
100
+ max_n = 0
101
+ for c in chapters:
102
+ try:
103
+ n = c.get("n") if isinstance(c, dict) else None
104
+ if isinstance(n, int) and n > max_n:
105
+ max_n = n
106
+ except Exception:
107
+ pass
108
+ return {"count": max_n if max_n > 0 else len(chapters)}
109
+
110
+
111
+ @app.get("/novel_chapter")
112
+ async def novel_chapter(url: str, n: int):
113
+ chapters = await _get_cached_novel_index(url)
114
+ # Prefer resolving by parsed chapter number, not list position.
115
+ resolved: dict | None = None
116
+ max_n = 0
117
+ for c in chapters:
118
+ if not isinstance(c, dict):
119
+ continue
120
+ cn = c.get("n")
121
+ if isinstance(cn, int) and cn > max_n:
122
+ max_n = cn
123
+ if isinstance(cn, int) and cn == n:
124
+ resolved = c
125
+ break
126
+
127
+ limit = max_n if max_n > 0 else len(chapters)
128
+ if n < 1 or n > limit:
129
+ raise HTTPException(status_code=400, detail=f"chapter n must be between 1 and {limit}")
130
+
131
+ if resolved is None:
132
+ # Fallback: old positional behavior.
133
+ item = chapters[n - 1] if (n - 1) < len(chapters) else {}
134
+ else:
135
+ item = resolved
136
+ return {"n": n, "title": item.get("title"), "url": item.get("url")}
137
+
138
+ @app.websocket("/ws")
139
+ async def websocket_endpoint(websocket: WebSocket):
140
+ await websocket.accept()
141
+ cancel_event = asyncio.Event()
142
+
143
+ try:
144
+ while True:
145
+ data = await websocket.receive_text()
146
+ try:
147
+ message = json.loads(data)
148
+ command = message.get("command")
149
+
150
+ if command == "scrape":
151
+ url = message.get("url")
152
+ if not url:
153
+ await websocket.send_json({"error": "URL is required"})
154
+ continue
155
+
156
+ logger.info(f"Scraping URL: {url}")
157
+ try:
158
+ result = await app.state.scraper.scrape_chapter(url)
159
+ await websocket.send_json({"type": "scrape_result", "data": result})
160
+ except Exception as e:
161
+ logger.error(f"Scrape error: {e}")
162
+ await websocket.send_json({"type": "error", "message": str(e)})
163
+
164
+ elif command == "tts":
165
+ text = message.get("text")
166
+ voice = message.get("voice", "af_bella")
167
+ speed = message.get("speed", 1.0)
168
+
169
+ if not text:
170
+ await websocket.send_json({"error": "Text is required"})
171
+ continue
172
+
173
+ logger.info(f"Streaming TTS for text length: {len(text)}")
174
+ if not app.state.tts:
175
+ await websocket.send_json({"error": "TTS Engine not initialized"})
176
+ continue
177
+
178
+ # Ensure voice is valid for the loaded voice pack.
179
+ try:
180
+ available = app.state.tts.list_voices()
181
+ if available and voice not in available:
182
+ voice = available[0]
183
+ except Exception:
184
+ pass
185
+
186
+ # Stream audio
187
+ try:
188
+ async for _, audio_chunk in app.state.tts.generate_audio_stream(
189
+ text,
190
+ voice=voice,
191
+ speed=float(speed),
192
+ prefetch_sentences=3,
193
+ frame_ms=200,
194
+ cancel_event=cancel_event,
195
+ ):
196
+ await websocket.send_bytes(audio_chunk)
197
+
198
+ await websocket.send_json({"type": "tts_complete"})
199
+ except Exception as e:
200
+ logger.error(f"TTS error: {e}")
201
+ await websocket.send_json({"type": "error", "message": str(e)})
202
+
203
+ elif command == "play":
204
+ # Single-shot: scrape the chapter, then stream it sentence-by-sentence.
205
+ url = message.get("url")
206
+ voice = message.get("voice", "af_bella")
207
+ speed = float(message.get("speed", 1.0))
208
+ prefetch = int(message.get("prefetch", 3))
209
+ frame_ms = int(message.get("frame_ms", 200))
210
+ start_paragraph = int(message.get("start_paragraph", 0) or 0)
211
+
212
+ if not url:
213
+ await websocket.send_json({"type": "error", "message": "URL is required"})
214
+ continue
215
+ if not app.state.tts:
216
+ await websocket.send_json({"type": "error", "message": "TTS Engine not initialized"})
217
+ continue
218
+
219
+ cancel_event.clear()
220
+ paused = False
221
+
222
+ logger.info(f"Play request: url={url} voice={voice} speed={speed}")
223
+
224
+ # Ensure voice is valid for the loaded voice pack.
225
+ try:
226
+ available = app.state.tts.list_voices()
227
+ if available and voice not in available:
228
+ voice = available[0]
229
+ except Exception:
230
+ pass
231
+ try:
232
+ chapter = await app.state.scraper.scrape_chapter(url)
233
+ except Exception as e:
234
+ await websocket.send_json({"type": "error", "message": str(e)})
235
+ continue
236
+
237
+ title = chapter.get("title")
238
+ paragraphs = chapter.get("content") or []
239
+
240
+ if start_paragraph < 0:
241
+ start_paragraph = 0
242
+ if start_paragraph > len(paragraphs):
243
+ start_paragraph = max(0, len(paragraphs) - 1)
244
+
245
+ paragraphs_slice = paragraphs[start_paragraph:] if start_paragraph else paragraphs
246
+ await websocket.send_json(
247
+ {
248
+ "type": "chapter_info",
249
+ "title": title,
250
+ "url": url,
251
+ "next_url": chapter.get("next_url"),
252
+ "prev_url": chapter.get("prev_url"),
253
+ "paragraphs": paragraphs,
254
+ "start_paragraph": start_paragraph,
255
+ "audio": {
256
+ "encoding": "pcm_s16le",
257
+ "sample_rate": app.state.tts.sample_rate,
258
+ "channels": 1,
259
+ "frame_ms": frame_ms,
260
+ },
261
+ }
262
+ )
263
+
264
+ last_key = None
265
+ try:
266
+ control_task: asyncio.Task[str] | None = asyncio.create_task(websocket.receive_text())
267
+
268
+ async def handle_control_payload(payload: str) -> None:
269
+ nonlocal paused
270
+ try:
271
+ msg = json.loads(payload)
272
+ except json.JSONDecodeError:
273
+ return
274
+ cmd = msg.get("command")
275
+ if cmd == "pause":
276
+ paused = True
277
+ elif cmd == "resume":
278
+ paused = False
279
+ elif cmd == "stop":
280
+ cancel_event.set()
281
+
282
+ async for p_idx, s_idx, sentence, audio_frame in app.state.tts.generate_audio_stream_paragraphs(
283
+ paragraphs_slice,
284
+ voice=voice,
285
+ speed=speed,
286
+ prefetch_sentences=prefetch,
287
+ frame_ms=frame_ms,
288
+ cancel_event=cancel_event,
289
+ ):
290
+ # Consume any pending control messages without concurrent receives.
291
+ if control_task is not None and control_task.done():
292
+ try:
293
+ await handle_control_payload(control_task.result())
294
+ except WebSocketDisconnect:
295
+ cancel_event.set()
296
+ control_task = asyncio.create_task(websocket.receive_text())
297
+
298
+ if paused and control_task is not None:
299
+ control_task.cancel()
300
+ control_task = None
301
+
302
+ while paused and not cancel_event.is_set():
303
+ # Block until we get a control message.
304
+ try:
305
+ payload = await websocket.receive_text()
306
+ except WebSocketDisconnect:
307
+ cancel_event.set()
308
+ break
309
+ await handle_control_payload(payload)
310
+
311
+ if not paused and not cancel_event.is_set() and control_task is None:
312
+ control_task = asyncio.create_task(websocket.receive_text())
313
+
314
+ if cancel_event.is_set():
315
+ break
316
+ key = (p_idx + start_paragraph, s_idx, sentence)
317
+ if key != last_key:
318
+ last_key = key
319
+ await websocket.send_json(
320
+ {
321
+ "type": "sentence",
322
+ "text": sentence,
323
+ "paragraph_index": int(p_idx + start_paragraph),
324
+ "sentence_index": int(s_idx),
325
+ }
326
+ )
327
+ await websocket.send_bytes(audio_frame)
328
+
329
+ # Pace frames close to real-time so UI updates (sentence highlighting)
330
+ # match what is audible, even when synthesis runs faster than realtime.
331
+ try:
332
+ await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
333
+ except Exception:
334
+ pass
335
+
336
+ if control_task is not None:
337
+ control_task.cancel()
338
+
339
+ await websocket.send_json(
340
+ {
341
+ "type": "chapter_complete",
342
+ "next_url": chapter.get("next_url"),
343
+ "prev_url": chapter.get("prev_url"),
344
+ }
345
+ )
346
+ except Exception as e:
347
+ logger.error(f"Play stream error: {e}")
348
+ await websocket.send_json({"type": "error", "message": str(e)})
349
+
350
+ else:
351
+ await websocket.send_json({"error": "Unknown command"})
352
+
353
+ except json.JSONDecodeError:
354
+ await websocket.send_json({"error": "Invalid JSON"})
355
+ except Exception as e:
356
+ logger.error(f"Error processing message: {e}")
357
+ traceback.print_exc()
358
+ await websocket.send_json({"error": "Internal server error"})
359
+
360
+ except WebSocketDisconnect:
361
+ logger.info("Client disconnected")
362
+ except Exception as e:
363
+ logger.error(f"WebSocket error: {e}")
364
+
365
+ if __name__ == "__main__":
366
+ uvicorn.run(app, host="0.0.0.0", port=8000)
backend/tts.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import numpy as np
4
+ import onnxruntime as ort
5
+ from kokoro_onnx import Kokoro
6
+ import asyncio
7
+ import json
8
+ import inspect
9
+ from typing import AsyncIterator, Iterable, List, Optional
10
+ import contextlib
11
+ from pathlib import Path
12
+ import zipfile
13
+
14
+ class TTSEngine:
15
+ def __init__(
16
+ self,
17
+ model_path: str = "models/kokoro-v1.0.onnx",
18
+ voices_path: str = "models/voices-v1.0.bin",
19
+ ):
20
+ # Ensure models exist
21
+ if not os.path.exists(model_path):
22
+ raise FileNotFoundError(f"Model not found at {model_path}. Run download_models.py first.")
23
+
24
+ self.model_path = model_path
25
+ self.voices_path = voices_path
26
+
27
+ # Newer kokoro-onnx versions support the v1.0 voices bundle (voices-v1.0.bin).
28
+ # We also keep backward-compatible support for voices.json/voices.npz.
29
+ self._ensure_voices_file()
30
+
31
+ self.sample_rate = 24000 # Kokoro default
32
+ self._voices_cache: Optional[List[str]] = None
33
+
34
+ # CPU-only mode for maximum compatibility.
35
+ self.providers = ["CPUExecutionProvider"]
36
+
37
+ # kokoro_onnx API varies by version; try passing providers if supported.
38
+ kokoro_sig = inspect.signature(Kokoro)
39
+ if "providers" in kokoro_sig.parameters:
40
+ self.kokoro = Kokoro(self.model_path, self.voices_path, providers=self.providers)
41
+ else:
42
+ self.kokoro = Kokoro(self.model_path, self.voices_path)
43
+
44
+ def list_voices(self) -> List[str]:
45
+ if self._voices_cache is not None:
46
+ return self._voices_cache
47
+
48
+ p = Path(self.voices_path)
49
+ voices: List[str] = []
50
+ if p.suffix == ".bin":
51
+ # voices-v1.0.bin is a zip containing <voice_id>.npy entries.
52
+ try:
53
+ with zipfile.ZipFile(str(p), "r") as z:
54
+ for name in z.namelist():
55
+ if not name.endswith(".npy"):
56
+ continue
57
+ voice_id = name[: -len(".npy")]
58
+ if voice_id:
59
+ voices.append(voice_id)
60
+ except zipfile.BadZipFile as e:
61
+ raise ValueError(f"Invalid voices bundle (expected zip): {p}") from e
62
+ voices = sorted(set(voices))
63
+ elif p.suffix == ".npz":
64
+ # np.load returns an NpzFile mapping of arrays.
65
+ with np.load(str(p)) as z:
66
+ voices = sorted(list(z.files))
67
+ elif p.suffix == ".json":
68
+ with p.open("r", encoding="utf-8") as f:
69
+ data = json.load(f)
70
+ if isinstance(data, dict):
71
+ voices = sorted([str(k) for k in data.keys()])
72
+ elif isinstance(data, list):
73
+ voices = sorted([str(v) for v in data])
74
+
75
+ self._voices_cache = voices
76
+ return voices
77
+
78
+ def _ensure_voices_file(self) -> None:
79
+ p = Path(self.voices_path)
80
+ if p.exists() and p.suffix in {".bin", ".npz", ".npy", ".json"}:
81
+ return
82
+
83
+ # Try common fallbacks in models/.
84
+ candidates = [
85
+ Path("models/voices-v1.0.bin"),
86
+ Path("models/voices.npz"),
87
+ Path("models/voices.json"),
88
+ ]
89
+ for c in candidates:
90
+ if c.exists():
91
+ self.voices_path = str(c)
92
+ return
93
+
94
+ raise FileNotFoundError(
95
+ f"Voices file not found. Expected {self.voices_path} or one of: {', '.join(str(c) for c in candidates)}"
96
+ )
97
+
98
+ def split_sentences(self, text: str) -> List[str]:
99
+ # Heuristic sentence splitting suited for light novels.
100
+ sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s+", text)
101
+ return [s.strip() for s in sentences if s and s.strip()]
102
+
103
+ def split_paragraphs(self, paragraphs: List[str]) -> List[tuple[int, int, str, bool]]:
104
+ """Flatten paragraphs into (paragraph_index, sentence_index, sentence_text, is_last_in_paragraph)."""
105
+ out: List[tuple[int, int, str, bool]] = []
106
+ for p_idx, p in enumerate(paragraphs):
107
+ p = (p or "").strip()
108
+ if not p:
109
+ continue
110
+ sentences = self.split_sentences(p)
111
+ if not sentences:
112
+ sentences = [p]
113
+ for s_idx, s in enumerate(sentences):
114
+ out.append((p_idx, s_idx, s, s_idx == (len(sentences) - 1)))
115
+ return out
116
+
117
+ def _iter_pcm_frames(self, pcm16: bytes, frame_bytes: int) -> Iterable[bytes]:
118
+ if frame_bytes <= 0:
119
+ yield pcm16
120
+ return
121
+ for i in range(0, len(pcm16), frame_bytes):
122
+ yield pcm16[i : i + frame_bytes]
123
+
124
+ async def synthesize_sentence_pcm16(self, sentence: str, voice: str, speed: float) -> bytes:
125
+ loop = asyncio.get_running_loop()
126
+ audio, _ = await loop.run_in_executor(None, self.kokoro.create, sentence, voice, speed)
127
+ audio_int16 = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16)
128
+ return audio_int16.tobytes()
129
+
130
+ async def generate_audio_stream(
131
+ self,
132
+ text: str,
133
+ voice: str = "af_bella",
134
+ speed: float = 1.0,
135
+ prefetch_sentences: int = 3,
136
+ frame_ms: int = 200,
137
+ cancel_event: Optional[asyncio.Event] = None,
138
+ ) -> AsyncIterator[tuple[str, bytes]]:
139
+ """Yield (sentence_text, pcm16_frame_bytes) in a continuous stream.
140
+
141
+ This pre-synthesizes up to `prefetch_sentences` sentences ahead to reduce
142
+ boundary pauses, and yields audio in fixed-duration frames.
143
+ """
144
+ sentences = self.split_sentences(text)
145
+ queue: asyncio.Queue[Optional[tuple[str, bytes]]] = asyncio.Queue(maxsize=max(1, prefetch_sentences))
146
+
147
+ frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
148
+ frame_bytes = frame_samples * 2 # int16 mono
149
+
150
+ async def producer() -> None:
151
+ try:
152
+ for s in sentences:
153
+ if cancel_event is not None and cancel_event.is_set():
154
+ break
155
+ if not s:
156
+ continue
157
+ pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
158
+ await queue.put((s, pcm16))
159
+ finally:
160
+ await queue.put(None)
161
+
162
+ producer_task = asyncio.create_task(producer())
163
+ try:
164
+ while True:
165
+ item = await queue.get()
166
+ if item is None:
167
+ break
168
+ sentence, pcm16 = item
169
+ for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
170
+ if cancel_event is not None and cancel_event.is_set():
171
+ return
172
+ yield (sentence, frame)
173
+ finally:
174
+ producer_task.cancel()
175
+ with contextlib.suppress(Exception):
176
+ await producer_task
177
+
178
+ async def generate_audio_stream_paragraphs(
179
+ self,
180
+ paragraphs: List[str],
181
+ voice: str = "af_bella",
182
+ speed: float = 1.0,
183
+ prefetch_sentences: int = 3,
184
+ frame_ms: int = 200,
185
+ cancel_event: Optional[asyncio.Event] = None,
186
+ *,
187
+ pause_sentence_ms: int = 120,
188
+ pause_period_ms: int = 180,
189
+ pause_exclaim_ms: int = 200,
190
+ pause_question_ms: int = 260,
191
+ pause_paragraph_extra_ms: int = 240,
192
+ ) -> AsyncIterator[tuple[int, int, str, bytes]]:
193
+ """Yield (paragraph_index, sentence_index, sentence_text, pcm16_frame_bytes).
194
+
195
+ Adds a small silence pause after each sentence, and a larger one at paragraph boundaries.
196
+ """
197
+ segments = self.split_paragraphs(paragraphs)
198
+ queue: asyncio.Queue[Optional[tuple[int, int, str, bytes, int]]] = asyncio.Queue(
199
+ maxsize=max(1, prefetch_sentences)
200
+ )
201
+
202
+ frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
203
+ frame_bytes = frame_samples * 2 # int16 mono
204
+
205
+ def pause_ms_for(sentence: str, is_last_in_paragraph: bool) -> int:
206
+ s = sentence.rstrip()
207
+ base = pause_sentence_ms
208
+ if s.endswith('?'):
209
+ base = pause_question_ms
210
+ elif s.endswith('!'):
211
+ base = pause_exclaim_ms
212
+ elif s.endswith('.'):
213
+ base = pause_period_ms
214
+ if is_last_in_paragraph:
215
+ base += pause_paragraph_extra_ms
216
+ return max(0, int(base))
217
+
218
+ async def producer() -> None:
219
+ try:
220
+ for p_idx, s_idx, s, is_last in segments:
221
+ if cancel_event is not None and cancel_event.is_set():
222
+ break
223
+ if not s:
224
+ continue
225
+ pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
226
+ pause_ms = pause_ms_for(s, is_last)
227
+ await queue.put((p_idx, s_idx, s, pcm16, pause_ms))
228
+ finally:
229
+ await queue.put(None)
230
+
231
+ producer_task = asyncio.create_task(producer())
232
+ try:
233
+ while True:
234
+ item = await queue.get()
235
+ if item is None:
236
+ break
237
+ p_idx, s_idx, sentence, pcm16, pause_ms = item
238
+ for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
239
+ if cancel_event is not None and cancel_event.is_set():
240
+ return
241
+ yield (p_idx, s_idx, sentence, frame)
242
+
243
+ if pause_ms > 0:
244
+ silence_samples = int(self.sample_rate * (pause_ms / 1000.0))
245
+ silence_bytes = silence_samples * 2
246
+ # Chunk silence into normal frames.
247
+ silence = b"\x00" * silence_bytes
248
+ for frame in self._iter_pcm_frames(silence, frame_bytes=frame_bytes):
249
+ if cancel_event is not None and cancel_event.is_set():
250
+ return
251
+ yield (p_idx, s_idx, sentence, frame)
252
+ finally:
253
+ producer_task.cancel()
254
+ with contextlib.suppress(Exception):
255
+ await producer_task
256
+
257
+ if __name__ == "__main__":
258
+ # Test
259
+ async def test():
260
+ tts = TTSEngine()
261
+ text = "Hello world! This is a test of the automatic text to speech system. It should be fast."
262
+ count = 0
263
+ async for chunk in tts.generate_audio_stream(text):
264
+ count += len(chunk)
265
+ print(f"Generated chunk of size {len(chunk)}")
266
+ print(f"Total bytes: {count}")
267
+
268
+ conn = asyncio.run(test())
backend/uv.lock ADDED
The diff for this file is too large to render. See raw diff