Spaces:
Sleeping
Sleeping
Commit ·
f8a7e1d
1
Parent(s): bbe8cec
Deploy existing backend in Docker Space on port 7860
Browse filesCo-authored-by: Cursor <cursoragent@cursor.com>
- Dockerfile +33 -0
- backend/Dockerfile +34 -0
- backend/download_models.py +24 -0
- backend/pyproject.toml +19 -0
- backend/requirements.txt +9 -0
- backend/scraper.py +153 -0
- backend/server.py +366 -0
- backend/tts.py +268 -0
- backend/uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax=docker/dockerfile:1
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app/backend
|
| 5 |
+
|
| 6 |
+
# System deps for lxml/bs4 + general networking
|
| 7 |
+
RUN apt-get update \
|
| 8 |
+
&& apt-get install -y --no-install-recommends \
|
| 9 |
+
curl \
|
| 10 |
+
ca-certificates \
|
| 11 |
+
gcc \
|
| 12 |
+
g++ \
|
| 13 |
+
libc6-dev \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# Install uv
|
| 17 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
| 18 |
+
&& ln -s /root/.local/bin/uv /usr/local/bin/uv
|
| 19 |
+
|
| 20 |
+
# Copy dependency metadata first for better layer caching
|
| 21 |
+
COPY backend/pyproject.toml backend/uv.lock* /app/backend/
|
| 22 |
+
|
| 23 |
+
# Create venv + install deps
|
| 24 |
+
RUN uv venv --python 3.12 \
|
| 25 |
+
&& uv sync
|
| 26 |
+
|
| 27 |
+
# Copy backend app code
|
| 28 |
+
COPY backend /app/backend
|
| 29 |
+
|
| 30 |
+
EXPOSE 7860
|
| 31 |
+
|
| 32 |
+
# Keep backend logic unchanged, but bind Space app to 7860.
|
| 33 |
+
CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python -c \"import uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=7860)\""]
|
backend/Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax=docker/dockerfile:1
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# System deps for lxml/bs4 + general networking
|
| 7 |
+
RUN apt-get update \
|
| 8 |
+
&& apt-get install -y --no-install-recommends \
|
| 9 |
+
curl \
|
| 10 |
+
ca-certificates \
|
| 11 |
+
gcc \
|
| 12 |
+
g++ \
|
| 13 |
+
libc6-dev \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
# Install uv
|
| 17 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
|
| 18 |
+
&& ln -s /root/.local/bin/uv /usr/local/bin/uv
|
| 19 |
+
|
| 20 |
+
# Copy dependency metadata first for better layer caching
|
| 21 |
+
COPY pyproject.toml uv.lock* /app/
|
| 22 |
+
|
| 23 |
+
# Create venv + install deps
|
| 24 |
+
RUN uv venv --python 3.12 \
|
| 25 |
+
&& uv sync
|
| 26 |
+
|
| 27 |
+
# Copy app code
|
| 28 |
+
COPY . /app/
|
| 29 |
+
|
| 30 |
+
EXPOSE 8000
|
| 31 |
+
|
| 32 |
+
# Ensure models exist, then start server (avoid `uv run` here to prevent any
|
| 33 |
+
# auto-sync behavior re-installing CPU onnxruntime).
|
| 34 |
+
CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python server.py"]
|
backend/download_models.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
|
| 4 |
+
# Kokoro v1.0 (recommended): larger voice pack.
|
| 5 |
+
MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
|
| 6 |
+
VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
|
| 7 |
+
|
| 8 |
+
def download_file(url, path):
|
| 9 |
+
print(f"Downloading {url} to {path}...")
|
| 10 |
+
response = requests.get(url, stream=True)
|
| 11 |
+
if response.status_code == 200:
|
| 12 |
+
with open(path, 'wb') as f:
|
| 13 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 14 |
+
f.write(chunk)
|
| 15 |
+
print(f"Downloaded {path}")
|
| 16 |
+
else:
|
| 17 |
+
print(f"Failed to download {url}")
|
| 18 |
+
|
| 19 |
+
if __name__ == "__main__":
|
| 20 |
+
os.makedirs("models", exist_ok=True)
|
| 21 |
+
if not os.path.exists("models/kokoro-v1.0.onnx"):
|
| 22 |
+
download_file(MODEL_URL, "models/kokoro-v1.0.onnx")
|
| 23 |
+
if not os.path.exists("models/voices-v1.0.bin"):
|
| 24 |
+
download_file(VOICES_URL, "models/voices-v1.0.bin")
|
backend/pyproject.toml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "ln-tts-backend"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "FastAPI backend for LN-TTS (NovelCool scraping + local TTS streaming)"
|
| 5 |
+
requires-python = ">=3.10,<3.13"
|
| 6 |
+
dependencies = [
|
| 7 |
+
"fastapi>=0.128.0",
|
| 8 |
+
"uvicorn[standard]>=0.30.0",
|
| 9 |
+
"aiohttp>=3.9.5",
|
| 10 |
+
"beautifulsoup4>=4.12.3",
|
| 11 |
+
"lxml>=5.2.2",
|
| 12 |
+
"numpy>=1.26.0",
|
| 13 |
+
"onnxruntime>=1.20.0",
|
| 14 |
+
"kokoro-onnx>=0.2.6",
|
| 15 |
+
"requests>=2.32.0",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
[tool.uv]
|
| 19 |
+
package = false
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.128.0
|
| 2 |
+
uvicorn[standard]>=0.30.0
|
| 3 |
+
aiohttp>=3.9.5
|
| 4 |
+
beautifulsoup4>=4.12.3
|
| 5 |
+
lxml>=5.2.2
|
| 6 |
+
numpy>=1.26.0
|
| 7 |
+
onnxruntime>=1.20.0
|
| 8 |
+
kokoro-onnx>=0.2.6
|
| 9 |
+
requests>=2.32.0
|
backend/scraper.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import aiohttp
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import re
|
| 4 |
+
from urllib.parse import urljoin
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class NovelCoolScraper:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.headers = {
|
| 10 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
async def scrape_chapter(self, url: str):
|
| 14 |
+
async with aiohttp.ClientSession() as session:
|
| 15 |
+
async with session.get(url, headers=self.headers) as response:
|
| 16 |
+
if response.status != 200:
|
| 17 |
+
raise Exception(f"Failed to fetch page: {response.status}")
|
| 18 |
+
html = await response.text()
|
| 19 |
+
|
| 20 |
+
# NovelCool pages can be large; lxml parser is more reliable here.
|
| 21 |
+
soup = BeautifulSoup(html, 'lxml')
|
| 22 |
+
|
| 23 |
+
# Extract Title
|
| 24 |
+
title = "Unknown Chapter"
|
| 25 |
+
title_tag = soup.find('h1')
|
| 26 |
+
if title_tag:
|
| 27 |
+
title = title_tag.get_text(strip=True)
|
| 28 |
+
else:
|
| 29 |
+
page_title = soup.find('title')
|
| 30 |
+
if page_title:
|
| 31 |
+
t = page_title.get_text(strip=True)
|
| 32 |
+
# e.g. "Shadow Slave Chapter 15 - Novel Cool - Best online light novel reading website"
|
| 33 |
+
title = t.split(' - Novel Cool', 1)[0].strip() or t
|
| 34 |
+
|
| 35 |
+
# Extract Content
|
| 36 |
+
# In the HTML variant commonly returned to scripted clients, the actual
|
| 37 |
+
# chapter content lives under: div.site-content > div.overflow-hidden
|
| 38 |
+
content_div = soup.select_one('div.site-content div.overflow-hidden')
|
| 39 |
+
|
| 40 |
+
if not content_div:
|
| 41 |
+
# Fallback: pick the div with the most <p> tags.
|
| 42 |
+
best = None
|
| 43 |
+
best_count = 0
|
| 44 |
+
for div in soup.find_all('div'):
|
| 45 |
+
ps = div.find_all('p')
|
| 46 |
+
if len(ps) > best_count:
|
| 47 |
+
best_count = len(ps)
|
| 48 |
+
best = div
|
| 49 |
+
content_div = best
|
| 50 |
+
|
| 51 |
+
if not content_div:
|
| 52 |
+
raise Exception("Could not find chapter content container")
|
| 53 |
+
|
| 54 |
+
paragraphs = []
|
| 55 |
+
for p in content_div.find_all('p'):
|
| 56 |
+
classes = p.get('class') or []
|
| 57 |
+
txt = p.get_text(' ', strip=True)
|
| 58 |
+
if not txt:
|
| 59 |
+
continue
|
| 60 |
+
if 'chapter-end-mark' in classes or txt.lower().strip() == 'chapter end':
|
| 61 |
+
break
|
| 62 |
+
paragraphs.append(txt)
|
| 63 |
+
|
| 64 |
+
if not paragraphs:
|
| 65 |
+
raw_text = content_div.get_text(separator='\n', strip=True)
|
| 66 |
+
paragraphs = [line for line in raw_text.split('\n') if line.strip()]
|
| 67 |
+
|
| 68 |
+
content = "\n".join(paragraphs)
|
| 69 |
+
|
| 70 |
+
# Extract Next/Prev Links
|
| 71 |
+
next_link = None
|
| 72 |
+
prev_link = None
|
| 73 |
+
|
| 74 |
+
for a in soup.find_all('a', href=True):
|
| 75 |
+
t = a.get_text(" ", strip=True)
|
| 76 |
+
href = a.get('href')
|
| 77 |
+
if not href:
|
| 78 |
+
continue
|
| 79 |
+
if '/chapter/' not in href:
|
| 80 |
+
continue
|
| 81 |
+
if not next_link and 'Next' in t:
|
| 82 |
+
next_link = href
|
| 83 |
+
if not prev_link and 'Prev' in t:
|
| 84 |
+
prev_link = href
|
| 85 |
+
if next_link and prev_link:
|
| 86 |
+
break
|
| 87 |
+
|
| 88 |
+
if next_link:
|
| 89 |
+
next_link = urljoin(url, next_link)
|
| 90 |
+
if prev_link:
|
| 91 |
+
prev_link = urljoin(url, prev_link)
|
| 92 |
+
|
| 93 |
+
return {
|
| 94 |
+
"title": title,
|
| 95 |
+
"content": paragraphs, # Return list of paragraphs for easier chunking
|
| 96 |
+
"next_url": next_link,
|
| 97 |
+
"prev_url": prev_link
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
async def scrape_novel_index(self, novel_url: str):
|
| 101 |
+
"""Scrape a NovelCool novel page and return a list of chapter links."""
|
| 102 |
+
async with aiohttp.ClientSession() as session:
|
| 103 |
+
async with session.get(novel_url, headers=self.headers) as response:
|
| 104 |
+
if response.status != 200:
|
| 105 |
+
raise Exception(f"Failed to fetch page: {response.status}")
|
| 106 |
+
html = await response.text()
|
| 107 |
+
|
| 108 |
+
soup = BeautifulSoup(html, 'lxml')
|
| 109 |
+
links = []
|
| 110 |
+
seen = set()
|
| 111 |
+
|
| 112 |
+
for a in soup.find_all('a', href=True):
|
| 113 |
+
href = a.get('href')
|
| 114 |
+
if not href:
|
| 115 |
+
continue
|
| 116 |
+
if '/chapter/' not in href:
|
| 117 |
+
continue
|
| 118 |
+
abs_url = urljoin(novel_url, href)
|
| 119 |
+
if abs_url in seen:
|
| 120 |
+
continue
|
| 121 |
+
seen.add(abs_url)
|
| 122 |
+
title = a.get_text(' ', strip=True)
|
| 123 |
+
if not title:
|
| 124 |
+
# Some chapter links have empty text (icons). Skip.
|
| 125 |
+
continue
|
| 126 |
+
# Best-effort chapter number parsing.
|
| 127 |
+
m = re.search(r"(?:Chapter|C)\s*(\d+)", title, flags=re.IGNORECASE)
|
| 128 |
+
n = int(m.group(1)) if m else None
|
| 129 |
+
links.append({"n": n, "title": title, "url": abs_url})
|
| 130 |
+
|
| 131 |
+
# Sort by chapter number when possible.
|
| 132 |
+
def chapter_key(item):
|
| 133 |
+
n = item.get('n')
|
| 134 |
+
if isinstance(n, int):
|
| 135 |
+
return n
|
| 136 |
+
# fallback: keep stable ordering
|
| 137 |
+
return 10**9
|
| 138 |
+
|
| 139 |
+
links.sort(key=chapter_key)
|
| 140 |
+
return links
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
import asyncio
|
| 144 |
+
scraper = NovelCoolScraper()
|
| 145 |
+
# Test with user provided URL
|
| 146 |
+
url = "https://www.novelcool.com/chapter/Shadow-Slave-Chapter-15/7332162/"
|
| 147 |
+
try:
|
| 148 |
+
result = asyncio.run(scraper.scrape_chapter(url))
|
| 149 |
+
print(f"Title: {result['title']}")
|
| 150 |
+
print(f"Paragraphs: {len(result['content'])}")
|
| 151 |
+
print(f"Next: {result['next_url']}")
|
| 152 |
+
except Exception as e:
|
| 153 |
+
print(f"Error: {e}")
|
backend/server.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uvicorn
|
| 2 |
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
import json
|
| 5 |
+
import asyncio
|
| 6 |
+
import logging
|
| 7 |
+
from scraper import NovelCoolScraper
|
| 8 |
+
from tts import TTSEngine
|
| 9 |
+
import traceback
|
| 10 |
+
from contextlib import asynccontextmanager
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
# Serialize logging
|
| 14 |
+
logging.basicConfig(level=logging.INFO)
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@asynccontextmanager
|
| 19 |
+
async def lifespan(app: FastAPI):
|
| 20 |
+
# Startup
|
| 21 |
+
try:
|
| 22 |
+
logger.info("Initializing TTS Engine...")
|
| 23 |
+
try:
|
| 24 |
+
import onnxruntime as ort
|
| 25 |
+
|
| 26 |
+
logger.info(f"ONNX Runtime providers: {ort.get_available_providers()}")
|
| 27 |
+
except Exception:
|
| 28 |
+
pass
|
| 29 |
+
app.state.tts = TTSEngine()
|
| 30 |
+
logger.info("TTS Engine initialized.")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
logger.error(f"Failed to initialize TTS Engine: {e}")
|
| 33 |
+
app.state.tts = None
|
| 34 |
+
|
| 35 |
+
app.state.scraper = NovelCoolScraper()
|
| 36 |
+
app.state.novel_index_cache = {}
|
| 37 |
+
yield
|
| 38 |
+
# Shutdown
|
| 39 |
+
app.state.tts = None
|
| 40 |
+
app.state.scraper = None
|
| 41 |
+
app.state.novel_index_cache = None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
app = FastAPI(lifespan=lifespan)
|
| 45 |
+
|
| 46 |
+
app.add_middleware(
|
| 47 |
+
CORSMiddleware,
|
| 48 |
+
allow_origins=["*"],
|
| 49 |
+
allow_credentials=False,
|
| 50 |
+
allow_methods=["*"],
|
| 51 |
+
allow_headers=["*"],
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
@app.get("/health")
|
| 55 |
+
async def health():
|
| 56 |
+
return {"ok": True, "tts_ready": app.state.tts is not None}
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.get("/voices")
|
| 60 |
+
async def voices():
|
| 61 |
+
if not app.state.tts:
|
| 62 |
+
return {"voices": [], "error": "TTS Engine not initialized"}
|
| 63 |
+
return {"voices": app.state.tts.list_voices()}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@app.get("/novel_index")
|
| 67 |
+
async def novel_index(url: str):
|
| 68 |
+
if not url:
|
| 69 |
+
return {"chapters": [], "error": "url is required"}
|
| 70 |
+
chapters = await app.state.scraper.scrape_novel_index(url)
|
| 71 |
+
return {"chapters": chapters}
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
async def _get_cached_novel_index(novel_url: str):
|
| 75 |
+
"""Return cached chapter list for a novel URL, scraping once per TTL."""
|
| 76 |
+
if not novel_url:
|
| 77 |
+
raise HTTPException(status_code=400, detail="url is required")
|
| 78 |
+
|
| 79 |
+
cache = app.state.novel_index_cache
|
| 80 |
+
if cache is None:
|
| 81 |
+
cache = {}
|
| 82 |
+
app.state.novel_index_cache = cache
|
| 83 |
+
|
| 84 |
+
ttl_s = 30 * 60 # 30 minutes
|
| 85 |
+
now = time.monotonic()
|
| 86 |
+
entry = cache.get(novel_url)
|
| 87 |
+
if entry is not None:
|
| 88 |
+
age = now - float(entry.get("ts", 0.0))
|
| 89 |
+
if age < ttl_s:
|
| 90 |
+
return entry.get("chapters") or []
|
| 91 |
+
|
| 92 |
+
chapters = await app.state.scraper.scrape_novel_index(novel_url)
|
| 93 |
+
cache[novel_url] = {"ts": now, "chapters": chapters}
|
| 94 |
+
return chapters
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
@app.get("/novel_meta")
|
| 98 |
+
async def novel_meta(url: str):
|
| 99 |
+
chapters = await _get_cached_novel_index(url)
|
| 100 |
+
max_n = 0
|
| 101 |
+
for c in chapters:
|
| 102 |
+
try:
|
| 103 |
+
n = c.get("n") if isinstance(c, dict) else None
|
| 104 |
+
if isinstance(n, int) and n > max_n:
|
| 105 |
+
max_n = n
|
| 106 |
+
except Exception:
|
| 107 |
+
pass
|
| 108 |
+
return {"count": max_n if max_n > 0 else len(chapters)}
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
@app.get("/novel_chapter")
|
| 112 |
+
async def novel_chapter(url: str, n: int):
|
| 113 |
+
chapters = await _get_cached_novel_index(url)
|
| 114 |
+
# Prefer resolving by parsed chapter number, not list position.
|
| 115 |
+
resolved: dict | None = None
|
| 116 |
+
max_n = 0
|
| 117 |
+
for c in chapters:
|
| 118 |
+
if not isinstance(c, dict):
|
| 119 |
+
continue
|
| 120 |
+
cn = c.get("n")
|
| 121 |
+
if isinstance(cn, int) and cn > max_n:
|
| 122 |
+
max_n = cn
|
| 123 |
+
if isinstance(cn, int) and cn == n:
|
| 124 |
+
resolved = c
|
| 125 |
+
break
|
| 126 |
+
|
| 127 |
+
limit = max_n if max_n > 0 else len(chapters)
|
| 128 |
+
if n < 1 or n > limit:
|
| 129 |
+
raise HTTPException(status_code=400, detail=f"chapter n must be between 1 and {limit}")
|
| 130 |
+
|
| 131 |
+
if resolved is None:
|
| 132 |
+
# Fallback: old positional behavior.
|
| 133 |
+
item = chapters[n - 1] if (n - 1) < len(chapters) else {}
|
| 134 |
+
else:
|
| 135 |
+
item = resolved
|
| 136 |
+
return {"n": n, "title": item.get("title"), "url": item.get("url")}
|
| 137 |
+
|
| 138 |
+
@app.websocket("/ws")
|
| 139 |
+
async def websocket_endpoint(websocket: WebSocket):
|
| 140 |
+
await websocket.accept()
|
| 141 |
+
cancel_event = asyncio.Event()
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
while True:
|
| 145 |
+
data = await websocket.receive_text()
|
| 146 |
+
try:
|
| 147 |
+
message = json.loads(data)
|
| 148 |
+
command = message.get("command")
|
| 149 |
+
|
| 150 |
+
if command == "scrape":
|
| 151 |
+
url = message.get("url")
|
| 152 |
+
if not url:
|
| 153 |
+
await websocket.send_json({"error": "URL is required"})
|
| 154 |
+
continue
|
| 155 |
+
|
| 156 |
+
logger.info(f"Scraping URL: {url}")
|
| 157 |
+
try:
|
| 158 |
+
result = await app.state.scraper.scrape_chapter(url)
|
| 159 |
+
await websocket.send_json({"type": "scrape_result", "data": result})
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logger.error(f"Scrape error: {e}")
|
| 162 |
+
await websocket.send_json({"type": "error", "message": str(e)})
|
| 163 |
+
|
| 164 |
+
elif command == "tts":
|
| 165 |
+
text = message.get("text")
|
| 166 |
+
voice = message.get("voice", "af_bella")
|
| 167 |
+
speed = message.get("speed", 1.0)
|
| 168 |
+
|
| 169 |
+
if not text:
|
| 170 |
+
await websocket.send_json({"error": "Text is required"})
|
| 171 |
+
continue
|
| 172 |
+
|
| 173 |
+
logger.info(f"Streaming TTS for text length: {len(text)}")
|
| 174 |
+
if not app.state.tts:
|
| 175 |
+
await websocket.send_json({"error": "TTS Engine not initialized"})
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
# Ensure voice is valid for the loaded voice pack.
|
| 179 |
+
try:
|
| 180 |
+
available = app.state.tts.list_voices()
|
| 181 |
+
if available and voice not in available:
|
| 182 |
+
voice = available[0]
|
| 183 |
+
except Exception:
|
| 184 |
+
pass
|
| 185 |
+
|
| 186 |
+
# Stream audio
|
| 187 |
+
try:
|
| 188 |
+
async for _, audio_chunk in app.state.tts.generate_audio_stream(
|
| 189 |
+
text,
|
| 190 |
+
voice=voice,
|
| 191 |
+
speed=float(speed),
|
| 192 |
+
prefetch_sentences=3,
|
| 193 |
+
frame_ms=200,
|
| 194 |
+
cancel_event=cancel_event,
|
| 195 |
+
):
|
| 196 |
+
await websocket.send_bytes(audio_chunk)
|
| 197 |
+
|
| 198 |
+
await websocket.send_json({"type": "tts_complete"})
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"TTS error: {e}")
|
| 201 |
+
await websocket.send_json({"type": "error", "message": str(e)})
|
| 202 |
+
|
| 203 |
+
elif command == "play":
|
| 204 |
+
# Single-shot: scrape the chapter, then stream it sentence-by-sentence.
|
| 205 |
+
url = message.get("url")
|
| 206 |
+
voice = message.get("voice", "af_bella")
|
| 207 |
+
speed = float(message.get("speed", 1.0))
|
| 208 |
+
prefetch = int(message.get("prefetch", 3))
|
| 209 |
+
frame_ms = int(message.get("frame_ms", 200))
|
| 210 |
+
start_paragraph = int(message.get("start_paragraph", 0) or 0)
|
| 211 |
+
|
| 212 |
+
if not url:
|
| 213 |
+
await websocket.send_json({"type": "error", "message": "URL is required"})
|
| 214 |
+
continue
|
| 215 |
+
if not app.state.tts:
|
| 216 |
+
await websocket.send_json({"type": "error", "message": "TTS Engine not initialized"})
|
| 217 |
+
continue
|
| 218 |
+
|
| 219 |
+
cancel_event.clear()
|
| 220 |
+
paused = False
|
| 221 |
+
|
| 222 |
+
logger.info(f"Play request: url={url} voice={voice} speed={speed}")
|
| 223 |
+
|
| 224 |
+
# Ensure voice is valid for the loaded voice pack.
|
| 225 |
+
try:
|
| 226 |
+
available = app.state.tts.list_voices()
|
| 227 |
+
if available and voice not in available:
|
| 228 |
+
voice = available[0]
|
| 229 |
+
except Exception:
|
| 230 |
+
pass
|
| 231 |
+
try:
|
| 232 |
+
chapter = await app.state.scraper.scrape_chapter(url)
|
| 233 |
+
except Exception as e:
|
| 234 |
+
await websocket.send_json({"type": "error", "message": str(e)})
|
| 235 |
+
continue
|
| 236 |
+
|
| 237 |
+
title = chapter.get("title")
|
| 238 |
+
paragraphs = chapter.get("content") or []
|
| 239 |
+
|
| 240 |
+
if start_paragraph < 0:
|
| 241 |
+
start_paragraph = 0
|
| 242 |
+
if start_paragraph > len(paragraphs):
|
| 243 |
+
start_paragraph = max(0, len(paragraphs) - 1)
|
| 244 |
+
|
| 245 |
+
paragraphs_slice = paragraphs[start_paragraph:] if start_paragraph else paragraphs
|
| 246 |
+
await websocket.send_json(
|
| 247 |
+
{
|
| 248 |
+
"type": "chapter_info",
|
| 249 |
+
"title": title,
|
| 250 |
+
"url": url,
|
| 251 |
+
"next_url": chapter.get("next_url"),
|
| 252 |
+
"prev_url": chapter.get("prev_url"),
|
| 253 |
+
"paragraphs": paragraphs,
|
| 254 |
+
"start_paragraph": start_paragraph,
|
| 255 |
+
"audio": {
|
| 256 |
+
"encoding": "pcm_s16le",
|
| 257 |
+
"sample_rate": app.state.tts.sample_rate,
|
| 258 |
+
"channels": 1,
|
| 259 |
+
"frame_ms": frame_ms,
|
| 260 |
+
},
|
| 261 |
+
}
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
last_key = None
|
| 265 |
+
try:
|
| 266 |
+
control_task: asyncio.Task[str] | None = asyncio.create_task(websocket.receive_text())
|
| 267 |
+
|
| 268 |
+
async def handle_control_payload(payload: str) -> None:
|
| 269 |
+
nonlocal paused
|
| 270 |
+
try:
|
| 271 |
+
msg = json.loads(payload)
|
| 272 |
+
except json.JSONDecodeError:
|
| 273 |
+
return
|
| 274 |
+
cmd = msg.get("command")
|
| 275 |
+
if cmd == "pause":
|
| 276 |
+
paused = True
|
| 277 |
+
elif cmd == "resume":
|
| 278 |
+
paused = False
|
| 279 |
+
elif cmd == "stop":
|
| 280 |
+
cancel_event.set()
|
| 281 |
+
|
| 282 |
+
async for p_idx, s_idx, sentence, audio_frame in app.state.tts.generate_audio_stream_paragraphs(
|
| 283 |
+
paragraphs_slice,
|
| 284 |
+
voice=voice,
|
| 285 |
+
speed=speed,
|
| 286 |
+
prefetch_sentences=prefetch,
|
| 287 |
+
frame_ms=frame_ms,
|
| 288 |
+
cancel_event=cancel_event,
|
| 289 |
+
):
|
| 290 |
+
# Consume any pending control messages without concurrent receives.
|
| 291 |
+
if control_task is not None and control_task.done():
|
| 292 |
+
try:
|
| 293 |
+
await handle_control_payload(control_task.result())
|
| 294 |
+
except WebSocketDisconnect:
|
| 295 |
+
cancel_event.set()
|
| 296 |
+
control_task = asyncio.create_task(websocket.receive_text())
|
| 297 |
+
|
| 298 |
+
if paused and control_task is not None:
|
| 299 |
+
control_task.cancel()
|
| 300 |
+
control_task = None
|
| 301 |
+
|
| 302 |
+
while paused and not cancel_event.is_set():
|
| 303 |
+
# Block until we get a control message.
|
| 304 |
+
try:
|
| 305 |
+
payload = await websocket.receive_text()
|
| 306 |
+
except WebSocketDisconnect:
|
| 307 |
+
cancel_event.set()
|
| 308 |
+
break
|
| 309 |
+
await handle_control_payload(payload)
|
| 310 |
+
|
| 311 |
+
if not paused and not cancel_event.is_set() and control_task is None:
|
| 312 |
+
control_task = asyncio.create_task(websocket.receive_text())
|
| 313 |
+
|
| 314 |
+
if cancel_event.is_set():
|
| 315 |
+
break
|
| 316 |
+
key = (p_idx + start_paragraph, s_idx, sentence)
|
| 317 |
+
if key != last_key:
|
| 318 |
+
last_key = key
|
| 319 |
+
await websocket.send_json(
|
| 320 |
+
{
|
| 321 |
+
"type": "sentence",
|
| 322 |
+
"text": sentence,
|
| 323 |
+
"paragraph_index": int(p_idx + start_paragraph),
|
| 324 |
+
"sentence_index": int(s_idx),
|
| 325 |
+
}
|
| 326 |
+
)
|
| 327 |
+
await websocket.send_bytes(audio_frame)
|
| 328 |
+
|
| 329 |
+
# Pace frames close to real-time so UI updates (sentence highlighting)
|
| 330 |
+
# match what is audible, even when synthesis runs faster than realtime.
|
| 331 |
+
try:
|
| 332 |
+
await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
|
| 333 |
+
except Exception:
|
| 334 |
+
pass
|
| 335 |
+
|
| 336 |
+
if control_task is not None:
|
| 337 |
+
control_task.cancel()
|
| 338 |
+
|
| 339 |
+
await websocket.send_json(
|
| 340 |
+
{
|
| 341 |
+
"type": "chapter_complete",
|
| 342 |
+
"next_url": chapter.get("next_url"),
|
| 343 |
+
"prev_url": chapter.get("prev_url"),
|
| 344 |
+
}
|
| 345 |
+
)
|
| 346 |
+
except Exception as e:
|
| 347 |
+
logger.error(f"Play stream error: {e}")
|
| 348 |
+
await websocket.send_json({"type": "error", "message": str(e)})
|
| 349 |
+
|
| 350 |
+
else:
|
| 351 |
+
await websocket.send_json({"error": "Unknown command"})
|
| 352 |
+
|
| 353 |
+
except json.JSONDecodeError:
|
| 354 |
+
await websocket.send_json({"error": "Invalid JSON"})
|
| 355 |
+
except Exception as e:
|
| 356 |
+
logger.error(f"Error processing message: {e}")
|
| 357 |
+
traceback.print_exc()
|
| 358 |
+
await websocket.send_json({"error": "Internal server error"})
|
| 359 |
+
|
| 360 |
+
except WebSocketDisconnect:
|
| 361 |
+
logger.info("Client disconnected")
|
| 362 |
+
except Exception as e:
|
| 363 |
+
logger.error(f"WebSocket error: {e}")
|
| 364 |
+
|
| 365 |
+
if __name__ == "__main__":
|
| 366 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
backend/tts.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import numpy as np
|
| 4 |
+
import onnxruntime as ort
|
| 5 |
+
from kokoro_onnx import Kokoro
|
| 6 |
+
import asyncio
|
| 7 |
+
import json
|
| 8 |
+
import inspect
|
| 9 |
+
from typing import AsyncIterator, Iterable, List, Optional
|
| 10 |
+
import contextlib
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import zipfile
|
| 13 |
+
|
| 14 |
+
class TTSEngine:
|
| 15 |
+
def __init__(
|
| 16 |
+
self,
|
| 17 |
+
model_path: str = "models/kokoro-v1.0.onnx",
|
| 18 |
+
voices_path: str = "models/voices-v1.0.bin",
|
| 19 |
+
):
|
| 20 |
+
# Ensure models exist
|
| 21 |
+
if not os.path.exists(model_path):
|
| 22 |
+
raise FileNotFoundError(f"Model not found at {model_path}. Run download_models.py first.")
|
| 23 |
+
|
| 24 |
+
self.model_path = model_path
|
| 25 |
+
self.voices_path = voices_path
|
| 26 |
+
|
| 27 |
+
# Newer kokoro-onnx versions support the v1.0 voices bundle (voices-v1.0.bin).
|
| 28 |
+
# We also keep backward-compatible support for voices.json/voices.npz.
|
| 29 |
+
self._ensure_voices_file()
|
| 30 |
+
|
| 31 |
+
self.sample_rate = 24000 # Kokoro default
|
| 32 |
+
self._voices_cache: Optional[List[str]] = None
|
| 33 |
+
|
| 34 |
+
# CPU-only mode for maximum compatibility.
|
| 35 |
+
self.providers = ["CPUExecutionProvider"]
|
| 36 |
+
|
| 37 |
+
# kokoro_onnx API varies by version; try passing providers if supported.
|
| 38 |
+
kokoro_sig = inspect.signature(Kokoro)
|
| 39 |
+
if "providers" in kokoro_sig.parameters:
|
| 40 |
+
self.kokoro = Kokoro(self.model_path, self.voices_path, providers=self.providers)
|
| 41 |
+
else:
|
| 42 |
+
self.kokoro = Kokoro(self.model_path, self.voices_path)
|
| 43 |
+
|
| 44 |
+
def list_voices(self) -> List[str]:
|
| 45 |
+
if self._voices_cache is not None:
|
| 46 |
+
return self._voices_cache
|
| 47 |
+
|
| 48 |
+
p = Path(self.voices_path)
|
| 49 |
+
voices: List[str] = []
|
| 50 |
+
if p.suffix == ".bin":
|
| 51 |
+
# voices-v1.0.bin is a zip containing <voice_id>.npy entries.
|
| 52 |
+
try:
|
| 53 |
+
with zipfile.ZipFile(str(p), "r") as z:
|
| 54 |
+
for name in z.namelist():
|
| 55 |
+
if not name.endswith(".npy"):
|
| 56 |
+
continue
|
| 57 |
+
voice_id = name[: -len(".npy")]
|
| 58 |
+
if voice_id:
|
| 59 |
+
voices.append(voice_id)
|
| 60 |
+
except zipfile.BadZipFile as e:
|
| 61 |
+
raise ValueError(f"Invalid voices bundle (expected zip): {p}") from e
|
| 62 |
+
voices = sorted(set(voices))
|
| 63 |
+
elif p.suffix == ".npz":
|
| 64 |
+
# np.load returns an NpzFile mapping of arrays.
|
| 65 |
+
with np.load(str(p)) as z:
|
| 66 |
+
voices = sorted(list(z.files))
|
| 67 |
+
elif p.suffix == ".json":
|
| 68 |
+
with p.open("r", encoding="utf-8") as f:
|
| 69 |
+
data = json.load(f)
|
| 70 |
+
if isinstance(data, dict):
|
| 71 |
+
voices = sorted([str(k) for k in data.keys()])
|
| 72 |
+
elif isinstance(data, list):
|
| 73 |
+
voices = sorted([str(v) for v in data])
|
| 74 |
+
|
| 75 |
+
self._voices_cache = voices
|
| 76 |
+
return voices
|
| 77 |
+
|
| 78 |
+
def _ensure_voices_file(self) -> None:
|
| 79 |
+
p = Path(self.voices_path)
|
| 80 |
+
if p.exists() and p.suffix in {".bin", ".npz", ".npy", ".json"}:
|
| 81 |
+
return
|
| 82 |
+
|
| 83 |
+
# Try common fallbacks in models/.
|
| 84 |
+
candidates = [
|
| 85 |
+
Path("models/voices-v1.0.bin"),
|
| 86 |
+
Path("models/voices.npz"),
|
| 87 |
+
Path("models/voices.json"),
|
| 88 |
+
]
|
| 89 |
+
for c in candidates:
|
| 90 |
+
if c.exists():
|
| 91 |
+
self.voices_path = str(c)
|
| 92 |
+
return
|
| 93 |
+
|
| 94 |
+
raise FileNotFoundError(
|
| 95 |
+
f"Voices file not found. Expected {self.voices_path} or one of: {', '.join(str(c) for c in candidates)}"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
def split_sentences(self, text: str) -> List[str]:
|
| 99 |
+
# Heuristic sentence splitting suited for light novels.
|
| 100 |
+
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s+", text)
|
| 101 |
+
return [s.strip() for s in sentences if s and s.strip()]
|
| 102 |
+
|
| 103 |
+
def split_paragraphs(self, paragraphs: List[str]) -> List[tuple[int, int, str, bool]]:
|
| 104 |
+
"""Flatten paragraphs into (paragraph_index, sentence_index, sentence_text, is_last_in_paragraph)."""
|
| 105 |
+
out: List[tuple[int, int, str, bool]] = []
|
| 106 |
+
for p_idx, p in enumerate(paragraphs):
|
| 107 |
+
p = (p or "").strip()
|
| 108 |
+
if not p:
|
| 109 |
+
continue
|
| 110 |
+
sentences = self.split_sentences(p)
|
| 111 |
+
if not sentences:
|
| 112 |
+
sentences = [p]
|
| 113 |
+
for s_idx, s in enumerate(sentences):
|
| 114 |
+
out.append((p_idx, s_idx, s, s_idx == (len(sentences) - 1)))
|
| 115 |
+
return out
|
| 116 |
+
|
| 117 |
+
def _iter_pcm_frames(self, pcm16: bytes, frame_bytes: int) -> Iterable[bytes]:
|
| 118 |
+
if frame_bytes <= 0:
|
| 119 |
+
yield pcm16
|
| 120 |
+
return
|
| 121 |
+
for i in range(0, len(pcm16), frame_bytes):
|
| 122 |
+
yield pcm16[i : i + frame_bytes]
|
| 123 |
+
|
| 124 |
+
async def synthesize_sentence_pcm16(self, sentence: str, voice: str, speed: float) -> bytes:
|
| 125 |
+
loop = asyncio.get_running_loop()
|
| 126 |
+
audio, _ = await loop.run_in_executor(None, self.kokoro.create, sentence, voice, speed)
|
| 127 |
+
audio_int16 = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16)
|
| 128 |
+
return audio_int16.tobytes()
|
| 129 |
+
|
| 130 |
+
async def generate_audio_stream(
|
| 131 |
+
self,
|
| 132 |
+
text: str,
|
| 133 |
+
voice: str = "af_bella",
|
| 134 |
+
speed: float = 1.0,
|
| 135 |
+
prefetch_sentences: int = 3,
|
| 136 |
+
frame_ms: int = 200,
|
| 137 |
+
cancel_event: Optional[asyncio.Event] = None,
|
| 138 |
+
) -> AsyncIterator[tuple[str, bytes]]:
|
| 139 |
+
"""Yield (sentence_text, pcm16_frame_bytes) in a continuous stream.
|
| 140 |
+
|
| 141 |
+
This pre-synthesizes up to `prefetch_sentences` sentences ahead to reduce
|
| 142 |
+
boundary pauses, and yields audio in fixed-duration frames.
|
| 143 |
+
"""
|
| 144 |
+
sentences = self.split_sentences(text)
|
| 145 |
+
queue: asyncio.Queue[Optional[tuple[str, bytes]]] = asyncio.Queue(maxsize=max(1, prefetch_sentences))
|
| 146 |
+
|
| 147 |
+
frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
|
| 148 |
+
frame_bytes = frame_samples * 2 # int16 mono
|
| 149 |
+
|
| 150 |
+
async def producer() -> None:
|
| 151 |
+
try:
|
| 152 |
+
for s in sentences:
|
| 153 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 154 |
+
break
|
| 155 |
+
if not s:
|
| 156 |
+
continue
|
| 157 |
+
pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
|
| 158 |
+
await queue.put((s, pcm16))
|
| 159 |
+
finally:
|
| 160 |
+
await queue.put(None)
|
| 161 |
+
|
| 162 |
+
producer_task = asyncio.create_task(producer())
|
| 163 |
+
try:
|
| 164 |
+
while True:
|
| 165 |
+
item = await queue.get()
|
| 166 |
+
if item is None:
|
| 167 |
+
break
|
| 168 |
+
sentence, pcm16 = item
|
| 169 |
+
for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
|
| 170 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 171 |
+
return
|
| 172 |
+
yield (sentence, frame)
|
| 173 |
+
finally:
|
| 174 |
+
producer_task.cancel()
|
| 175 |
+
with contextlib.suppress(Exception):
|
| 176 |
+
await producer_task
|
| 177 |
+
|
| 178 |
+
async def generate_audio_stream_paragraphs(
|
| 179 |
+
self,
|
| 180 |
+
paragraphs: List[str],
|
| 181 |
+
voice: str = "af_bella",
|
| 182 |
+
speed: float = 1.0,
|
| 183 |
+
prefetch_sentences: int = 3,
|
| 184 |
+
frame_ms: int = 200,
|
| 185 |
+
cancel_event: Optional[asyncio.Event] = None,
|
| 186 |
+
*,
|
| 187 |
+
pause_sentence_ms: int = 120,
|
| 188 |
+
pause_period_ms: int = 180,
|
| 189 |
+
pause_exclaim_ms: int = 200,
|
| 190 |
+
pause_question_ms: int = 260,
|
| 191 |
+
pause_paragraph_extra_ms: int = 240,
|
| 192 |
+
) -> AsyncIterator[tuple[int, int, str, bytes]]:
|
| 193 |
+
"""Yield (paragraph_index, sentence_index, sentence_text, pcm16_frame_bytes).
|
| 194 |
+
|
| 195 |
+
Adds a small silence pause after each sentence, and a larger one at paragraph boundaries.
|
| 196 |
+
"""
|
| 197 |
+
segments = self.split_paragraphs(paragraphs)
|
| 198 |
+
queue: asyncio.Queue[Optional[tuple[int, int, str, bytes, int]]] = asyncio.Queue(
|
| 199 |
+
maxsize=max(1, prefetch_sentences)
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
|
| 203 |
+
frame_bytes = frame_samples * 2 # int16 mono
|
| 204 |
+
|
| 205 |
+
def pause_ms_for(sentence: str, is_last_in_paragraph: bool) -> int:
|
| 206 |
+
s = sentence.rstrip()
|
| 207 |
+
base = pause_sentence_ms
|
| 208 |
+
if s.endswith('?'):
|
| 209 |
+
base = pause_question_ms
|
| 210 |
+
elif s.endswith('!'):
|
| 211 |
+
base = pause_exclaim_ms
|
| 212 |
+
elif s.endswith('.'):
|
| 213 |
+
base = pause_period_ms
|
| 214 |
+
if is_last_in_paragraph:
|
| 215 |
+
base += pause_paragraph_extra_ms
|
| 216 |
+
return max(0, int(base))
|
| 217 |
+
|
| 218 |
+
async def producer() -> None:
|
| 219 |
+
try:
|
| 220 |
+
for p_idx, s_idx, s, is_last in segments:
|
| 221 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 222 |
+
break
|
| 223 |
+
if not s:
|
| 224 |
+
continue
|
| 225 |
+
pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
|
| 226 |
+
pause_ms = pause_ms_for(s, is_last)
|
| 227 |
+
await queue.put((p_idx, s_idx, s, pcm16, pause_ms))
|
| 228 |
+
finally:
|
| 229 |
+
await queue.put(None)
|
| 230 |
+
|
| 231 |
+
producer_task = asyncio.create_task(producer())
|
| 232 |
+
try:
|
| 233 |
+
while True:
|
| 234 |
+
item = await queue.get()
|
| 235 |
+
if item is None:
|
| 236 |
+
break
|
| 237 |
+
p_idx, s_idx, sentence, pcm16, pause_ms = item
|
| 238 |
+
for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
|
| 239 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 240 |
+
return
|
| 241 |
+
yield (p_idx, s_idx, sentence, frame)
|
| 242 |
+
|
| 243 |
+
if pause_ms > 0:
|
| 244 |
+
silence_samples = int(self.sample_rate * (pause_ms / 1000.0))
|
| 245 |
+
silence_bytes = silence_samples * 2
|
| 246 |
+
# Chunk silence into normal frames.
|
| 247 |
+
silence = b"\x00" * silence_bytes
|
| 248 |
+
for frame in self._iter_pcm_frames(silence, frame_bytes=frame_bytes):
|
| 249 |
+
if cancel_event is not None and cancel_event.is_set():
|
| 250 |
+
return
|
| 251 |
+
yield (p_idx, s_idx, sentence, frame)
|
| 252 |
+
finally:
|
| 253 |
+
producer_task.cancel()
|
| 254 |
+
with contextlib.suppress(Exception):
|
| 255 |
+
await producer_task
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
# Test
|
| 259 |
+
async def test():
|
| 260 |
+
tts = TTSEngine()
|
| 261 |
+
text = "Hello world! This is a test of the automatic text to speech system. It should be fast."
|
| 262 |
+
count = 0
|
| 263 |
+
async for chunk in tts.generate_audio_stream(text):
|
| 264 |
+
count += len(chunk)
|
| 265 |
+
print(f"Generated chunk of size {len(chunk)}")
|
| 266 |
+
print(f"Total bytes: {count}")
|
| 267 |
+
|
| 268 |
+
conn = asyncio.run(test())
|
backend/uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|