Spaces:

shreyas-joshi
/

CoreReader

Sleeping

App Files Files Community

shreyas-joshi Cursor commited on Feb 17

Commit

f8a7e1d

1 Parent(s): bbe8cec

Deploy existing backend in Docker Space on port 7860

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (9) hide show

Dockerfile +33 -0
backend/Dockerfile +34 -0
backend/download_models.py +24 -0
backend/pyproject.toml +19 -0
backend/requirements.txt +9 -0
backend/scraper.py +153 -0
backend/server.py +366 -0
backend/tts.py +268 -0
backend/uv.lock +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# syntax=docker/dockerfile:1
+FROM python:3.12-slim
+WORKDIR /app/backend
+# System deps for lxml/bs4 + general networking
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
+    curl \
+    ca-certificates \
+    gcc \
+    g++ \
+    libc6-dev \
+  && rm -rf /var/lib/apt/lists/*
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+  && ln -s /root/.local/bin/uv /usr/local/bin/uv
+# Copy dependency metadata first for better layer caching
+COPY backend/pyproject.toml backend/uv.lock* /app/backend/
+# Create venv + install deps
+RUN uv venv --python 3.12 \
+  && uv sync
+# Copy backend app code
+COPY backend /app/backend
+EXPOSE 7860
+# Keep backend logic unchanged, but bind Space app to 7860.
+CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python -c \"import uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=7860)\""]

backend/Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# syntax=docker/dockerfile:1
+FROM python:3.12-slim
+WORKDIR /app
+# System deps for lxml/bs4 + general networking
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends \
+    curl \
+    ca-certificates \
+    gcc \
+    g++ \
+    libc6-dev \
+  && rm -rf /var/lib/apt/lists/*
+# Install uv
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
+  && ln -s /root/.local/bin/uv /usr/local/bin/uv
+# Copy dependency metadata first for better layer caching
+COPY pyproject.toml uv.lock* /app/
+# Create venv + install deps
+RUN uv venv --python 3.12 \
+  && uv sync
+# Copy app code
+COPY . /app/
+EXPOSE 8000
+# Ensure models exist, then start server (avoid `uv run` here to prevent any
+# auto-sync behavior re-installing CPU onnxruntime).
+CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python server.py"]

backend/download_models.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+import requests
+# Kokoro v1.0 (recommended): larger voice pack.
+MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
+VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
+def download_file(url, path):
+    print(f"Downloading {url} to {path}...")
+    response = requests.get(url, stream=True)
+    if response.status_code == 200:
+        with open(path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Downloaded {path}")
+    else:
+        print(f"Failed to download {url}")
+if __name__ == "__main__":
+    os.makedirs("models", exist_ok=True)
+    if not os.path.exists("models/kokoro-v1.0.onnx"):
+        download_file(MODEL_URL, "models/kokoro-v1.0.onnx")
+    if not os.path.exists("models/voices-v1.0.bin"):
+        download_file(VOICES_URL, "models/voices-v1.0.bin")

backend/pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[project]
+name = "ln-tts-backend"
+version = "0.1.0"
+description = "FastAPI backend for LN-TTS (NovelCool scraping + local TTS streaming)"
+requires-python = ">=3.10,<3.13"
+dependencies = [
+  "fastapi>=0.128.0",
+  "uvicorn[standard]>=0.30.0",
+  "aiohttp>=3.9.5",
+  "beautifulsoup4>=4.12.3",
+  "lxml>=5.2.2",
+  "numpy>=1.26.0",
+  "onnxruntime>=1.20.0",
+  "kokoro-onnx>=0.2.6",
+  "requests>=2.32.0",
+]
+[tool.uv]
+package = false

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi>=0.128.0
+uvicorn[standard]>=0.30.0
+aiohttp>=3.9.5
+beautifulsoup4>=4.12.3
+lxml>=5.2.2
+numpy>=1.26.0
+onnxruntime>=1.20.0
+kokoro-onnx>=0.2.6
+requests>=2.32.0

backend/scraper.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import aiohttp
+from bs4 import BeautifulSoup
+import re
+from urllib.parse import urljoin
+class NovelCoolScraper:
+    def __init__(self):
+        self.headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+    async def scrape_chapter(self, url: str):
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=self.headers) as response:
+                if response.status != 200:
+                    raise Exception(f"Failed to fetch page: {response.status}")
+                html = await response.text()
+        # NovelCool pages can be large; lxml parser is more reliable here.
+        soup = BeautifulSoup(html, 'lxml')
+        # Extract Title
+        title = "Unknown Chapter"
+        title_tag = soup.find('h1')
+        if title_tag:
+            title = title_tag.get_text(strip=True)
+        else:
+            page_title = soup.find('title')
+            if page_title:
+                t = page_title.get_text(strip=True)
+                # e.g. "Shadow Slave Chapter 15 - Novel Cool - Best online light novel reading website"
+                title = t.split(' - Novel Cool', 1)[0].strip() or t
+        # Extract Content
+        # In the HTML variant commonly returned to scripted clients, the actual
+        # chapter content lives under: div.site-content > div.overflow-hidden
+        content_div = soup.select_one('div.site-content div.overflow-hidden')
+        if not content_div:
+            # Fallback: pick the div with the most <p> tags.
+            best = None
+            best_count = 0
+            for div in soup.find_all('div'):
+                ps = div.find_all('p')
+                if len(ps) > best_count:
+                    best_count = len(ps)
+                    best = div
+            content_div = best
+        if not content_div:
+            raise Exception("Could not find chapter content container")
+        paragraphs = []
+        for p in content_div.find_all('p'):
+            classes = p.get('class') or []
+            txt = p.get_text(' ', strip=True)
+            if not txt:
+                continue
+            if 'chapter-end-mark' in classes or txt.lower().strip() == 'chapter end':
+                break
+            paragraphs.append(txt)
+        if not paragraphs:
+            raw_text = content_div.get_text(separator='\n', strip=True)
+            paragraphs = [line for line in raw_text.split('\n') if line.strip()]
+        content = "\n".join(paragraphs)
+        # Extract Next/Prev Links
+        next_link = None
+        prev_link = None
+        for a in soup.find_all('a', href=True):
+            t = a.get_text(" ", strip=True)
+            href = a.get('href')
+            if not href:
+                continue
+            if '/chapter/' not in href:
+                continue
+            if not next_link and 'Next' in t:
+                next_link = href
+            if not prev_link and 'Prev' in t:
+                prev_link = href
+            if next_link and prev_link:
+                break
+        if next_link:
+            next_link = urljoin(url, next_link)
+        if prev_link:
+            prev_link = urljoin(url, prev_link)
+        return {
+            "title": title,
+            "content": paragraphs, # Return list of paragraphs for easier chunking
+            "next_url": next_link,
+            "prev_url": prev_link
+        }
+    async def scrape_novel_index(self, novel_url: str):
+        """Scrape a NovelCool novel page and return a list of chapter links."""
+        async with aiohttp.ClientSession() as session:
+            async with session.get(novel_url, headers=self.headers) as response:
+                if response.status != 200:
+                    raise Exception(f"Failed to fetch page: {response.status}")
+                html = await response.text()
+        soup = BeautifulSoup(html, 'lxml')
+        links = []
+        seen = set()
+        for a in soup.find_all('a', href=True):
+            href = a.get('href')
+            if not href:
+                continue
+            if '/chapter/' not in href:
+                continue
+            abs_url = urljoin(novel_url, href)
+            if abs_url in seen:
+                continue
+            seen.add(abs_url)
+            title = a.get_text(' ', strip=True)
+            if not title:
+                # Some chapter links have empty text (icons). Skip.
+                continue
+            # Best-effort chapter number parsing.
+            m = re.search(r"(?:Chapter|C)\s*(\d+)", title, flags=re.IGNORECASE)
+            n = int(m.group(1)) if m else None
+            links.append({"n": n, "title": title, "url": abs_url})
+        # Sort by chapter number when possible.
+        def chapter_key(item):
+            n = item.get('n')
+            if isinstance(n, int):
+                return n
+            # fallback: keep stable ordering
+            return 10**9
+        links.sort(key=chapter_key)
+        return links
+if __name__ == "__main__":
+    import asyncio
+    scraper = NovelCoolScraper()
+    # Test with user provided URL
+    url = "https://www.novelcool.com/chapter/Shadow-Slave-Chapter-15/7332162/"
+    try:
+        result = asyncio.run(scraper.scrape_chapter(url))
+        print(f"Title: {result['title']}")
+        print(f"Paragraphs: {len(result['content'])}")
+        print(f"Next: {result['next_url']}")
+    except Exception as e:
+        print(f"Error: {e}")

backend/server.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import uvicorn
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import json
+import asyncio
+import logging
+from scraper import NovelCoolScraper
+from tts import TTSEngine
+import traceback
+from contextlib import asynccontextmanager
+import time
+# Serialize logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    try:
+        logger.info("Initializing TTS Engine...")
+        try:
+            import onnxruntime as ort
+            logger.info(f"ONNX Runtime providers: {ort.get_available_providers()}")
+        except Exception:
+            pass
+        app.state.tts = TTSEngine()
+        logger.info("TTS Engine initialized.")
+    except Exception as e:
+        logger.error(f"Failed to initialize TTS Engine: {e}")
+        app.state.tts = None
+    app.state.scraper = NovelCoolScraper()
+    app.state.novel_index_cache = {}
+    yield
+    # Shutdown
+    app.state.tts = None
+    app.state.scraper = None
+    app.state.novel_index_cache = None
+app = FastAPI(lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/health")
+async def health():
+    return {"ok": True, "tts_ready": app.state.tts is not None}
+@app.get("/voices")
+async def voices():
+    if not app.state.tts:
+        return {"voices": [], "error": "TTS Engine not initialized"}
+    return {"voices": app.state.tts.list_voices()}
+@app.get("/novel_index")
+async def novel_index(url: str):
+    if not url:
+        return {"chapters": [], "error": "url is required"}
+    chapters = await app.state.scraper.scrape_novel_index(url)
+    return {"chapters": chapters}
+async def _get_cached_novel_index(novel_url: str):
+    """Return cached chapter list for a novel URL, scraping once per TTL."""
+    if not novel_url:
+        raise HTTPException(status_code=400, detail="url is required")
+    cache = app.state.novel_index_cache
+    if cache is None:
+        cache = {}
+        app.state.novel_index_cache = cache
+    ttl_s = 30 * 60  # 30 minutes
+    now = time.monotonic()
+    entry = cache.get(novel_url)
+    if entry is not None:
+        age = now - float(entry.get("ts", 0.0))
+        if age < ttl_s:
+            return entry.get("chapters") or []
+    chapters = await app.state.scraper.scrape_novel_index(novel_url)
+    cache[novel_url] = {"ts": now, "chapters": chapters}
+    return chapters
+@app.get("/novel_meta")
+async def novel_meta(url: str):
+    chapters = await _get_cached_novel_index(url)
+    max_n = 0
+    for c in chapters:
+        try:
+            n = c.get("n") if isinstance(c, dict) else None
+            if isinstance(n, int) and n > max_n:
+                max_n = n
+        except Exception:
+            pass
+    return {"count": max_n if max_n > 0 else len(chapters)}
+@app.get("/novel_chapter")
+async def novel_chapter(url: str, n: int):
+    chapters = await _get_cached_novel_index(url)
+    # Prefer resolving by parsed chapter number, not list position.
+    resolved: dict | None = None
+    max_n = 0
+    for c in chapters:
+        if not isinstance(c, dict):
+            continue
+        cn = c.get("n")
+        if isinstance(cn, int) and cn > max_n:
+            max_n = cn
+        if isinstance(cn, int) and cn == n:
+            resolved = c
+            break
+    limit = max_n if max_n > 0 else len(chapters)
+    if n < 1 or n > limit:
+        raise HTTPException(status_code=400, detail=f"chapter n must be between 1 and {limit}")
+    if resolved is None:
+        # Fallback: old positional behavior.
+        item = chapters[n - 1] if (n - 1) < len(chapters) else {}
+    else:
+        item = resolved
+    return {"n": n, "title": item.get("title"), "url": item.get("url")}
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    cancel_event = asyncio.Event()
+    try:
+        while True:
+            data = await websocket.receive_text()
+            try:
+                message = json.loads(data)
+                command = message.get("command")
+                if command == "scrape":
+                    url = message.get("url")
+                    if not url:
+                        await websocket.send_json({"error": "URL is required"})
+                        continue
+                    logger.info(f"Scraping URL: {url}")
+                    try:
+                        result = await app.state.scraper.scrape_chapter(url)
+                        await websocket.send_json({"type": "scrape_result", "data": result})
+                    except Exception as e:
+                         logger.error(f"Scrape error: {e}")
+                         await websocket.send_json({"type": "error", "message": str(e)})
+                elif command == "tts":
+                    text = message.get("text")
+                    voice = message.get("voice", "af_bella")
+                    speed = message.get("speed", 1.0)
+                    if not text:
+                        await websocket.send_json({"error": "Text is required"})
+                        continue
+                    logger.info(f"Streaming TTS for text length: {len(text)}")
+                    if not app.state.tts:
+                         await websocket.send_json({"error": "TTS Engine not initialized"})
+                         continue
+                    # Ensure voice is valid for the loaded voice pack.
+                    try:
+                        available = app.state.tts.list_voices()
+                        if available and voice not in available:
+                            voice = available[0]
+                    except Exception:
+                        pass
+                    # Stream audio
+                    try:
+                        async for _, audio_chunk in app.state.tts.generate_audio_stream(
+                            text,
+                            voice=voice,
+                            speed=float(speed),
+                            prefetch_sentences=3,
+                            frame_ms=200,
+                            cancel_event=cancel_event,
+                        ):
+                            await websocket.send_bytes(audio_chunk)
+                        await websocket.send_json({"type": "tts_complete"})
+                    except Exception as e:
+                        logger.error(f"TTS error: {e}")
+                        await websocket.send_json({"type": "error", "message": str(e)})
+                elif command == "play":
+                    # Single-shot: scrape the chapter, then stream it sentence-by-sentence.
+                    url = message.get("url")
+                    voice = message.get("voice", "af_bella")
+                    speed = float(message.get("speed", 1.0))
+                    prefetch = int(message.get("prefetch", 3))
+                    frame_ms = int(message.get("frame_ms", 200))
+                    start_paragraph = int(message.get("start_paragraph", 0) or 0)
+                    if not url:
+                        await websocket.send_json({"type": "error", "message": "URL is required"})
+                        continue
+                    if not app.state.tts:
+                        await websocket.send_json({"type": "error", "message": "TTS Engine not initialized"})
+                        continue
+                    cancel_event.clear()
+                    paused = False
+                    logger.info(f"Play request: url={url} voice={voice} speed={speed}")
+                    # Ensure voice is valid for the loaded voice pack.
+                    try:
+                        available = app.state.tts.list_voices()
+                        if available and voice not in available:
+                            voice = available[0]
+                    except Exception:
+                        pass
+                    try:
+                        chapter = await app.state.scraper.scrape_chapter(url)
+                    except Exception as e:
+                        await websocket.send_json({"type": "error", "message": str(e)})
+                        continue
+                    title = chapter.get("title")
+                    paragraphs = chapter.get("content") or []
+                    if start_paragraph < 0:
+                        start_paragraph = 0
+                    if start_paragraph > len(paragraphs):
+                        start_paragraph = max(0, len(paragraphs) - 1)
+                    paragraphs_slice = paragraphs[start_paragraph:] if start_paragraph else paragraphs
+                    await websocket.send_json(
+                        {
+                            "type": "chapter_info",
+                            "title": title,
+                            "url": url,
+                            "next_url": chapter.get("next_url"),
+                            "prev_url": chapter.get("prev_url"),
+                            "paragraphs": paragraphs,
+                            "start_paragraph": start_paragraph,
+                            "audio": {
+                                "encoding": "pcm_s16le",
+                                "sample_rate": app.state.tts.sample_rate,
+                                "channels": 1,
+                                "frame_ms": frame_ms,
+                            },
+                        }
+                    )
+                    last_key = None
+                    try:
+                        control_task: asyncio.Task[str] | None = asyncio.create_task(websocket.receive_text())
+                        async def handle_control_payload(payload: str) -> None:
+                            nonlocal paused
+                            try:
+                                msg = json.loads(payload)
+                            except json.JSONDecodeError:
+                                return
+                            cmd = msg.get("command")
+                            if cmd == "pause":
+                                paused = True
+                            elif cmd == "resume":
+                                paused = False
+                            elif cmd == "stop":
+                                cancel_event.set()
+                        async for p_idx, s_idx, sentence, audio_frame in app.state.tts.generate_audio_stream_paragraphs(
+                            paragraphs_slice,
+                            voice=voice,
+                            speed=speed,
+                            prefetch_sentences=prefetch,
+                            frame_ms=frame_ms,
+                            cancel_event=cancel_event,
+                        ):
+                            # Consume any pending control messages without concurrent receives.
+                            if control_task is not None and control_task.done():
+                                try:
+                                    await handle_control_payload(control_task.result())
+                                except WebSocketDisconnect:
+                                    cancel_event.set()
+                                control_task = asyncio.create_task(websocket.receive_text())
+                            if paused and control_task is not None:
+                                control_task.cancel()
+                                control_task = None
+                            while paused and not cancel_event.is_set():
+                                # Block until we get a control message.
+                                try:
+                                    payload = await websocket.receive_text()
+                                except WebSocketDisconnect:
+                                    cancel_event.set()
+                                    break
+                                await handle_control_payload(payload)
+                            if not paused and not cancel_event.is_set() and control_task is None:
+                                control_task = asyncio.create_task(websocket.receive_text())
+                            if cancel_event.is_set():
+                                break
+                            key = (p_idx + start_paragraph, s_idx, sentence)
+                            if key != last_key:
+                                last_key = key
+                                await websocket.send_json(
+                                    {
+                                        "type": "sentence",
+                                        "text": sentence,
+                                        "paragraph_index": int(p_idx + start_paragraph),
+                                        "sentence_index": int(s_idx),
+                                    }
+                                )
+                            await websocket.send_bytes(audio_frame)
+                            # Pace frames close to real-time so UI updates (sentence highlighting)
+                            # match what is audible, even when synthesis runs faster than realtime.
+                            try:
+                                await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
+                            except Exception:
+                                pass
+                        if control_task is not None:
+                            control_task.cancel()
+                        await websocket.send_json(
+                            {
+                                "type": "chapter_complete",
+                                "next_url": chapter.get("next_url"),
+                                "prev_url": chapter.get("prev_url"),
+                            }
+                        )
+                    except Exception as e:
+                        logger.error(f"Play stream error: {e}")
+                        await websocket.send_json({"type": "error", "message": str(e)})
+                else:
+                    await websocket.send_json({"error": "Unknown command"})
+            except json.JSONDecodeError:
+                await websocket.send_json({"error": "Invalid JSON"})
+            except Exception as e:
+                logger.error(f"Error processing message: {e}")
+                traceback.print_exc()
+                await websocket.send_json({"error": "Internal server error"})
+    except WebSocketDisconnect:
+        logger.info("Client disconnected")
+    except Exception as e:
+        logger.error(f"WebSocket error: {e}")
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

backend/tts.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import os
+import re
+import numpy as np
+import onnxruntime as ort
+from kokoro_onnx import Kokoro
+import asyncio
+import json
+import inspect
+from typing import AsyncIterator, Iterable, List, Optional
+import contextlib
+from pathlib import Path
+import zipfile
+class TTSEngine:
+    def __init__(
+        self,
+        model_path: str = "models/kokoro-v1.0.onnx",
+        voices_path: str = "models/voices-v1.0.bin",
+    ):
+        # Ensure models exist
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model not found at {model_path}. Run download_models.py first.")
+        self.model_path = model_path
+        self.voices_path = voices_path
+        # Newer kokoro-onnx versions support the v1.0 voices bundle (voices-v1.0.bin).
+        # We also keep backward-compatible support for voices.json/voices.npz.
+        self._ensure_voices_file()
+        self.sample_rate = 24000  # Kokoro default
+        self._voices_cache: Optional[List[str]] = None
+        # CPU-only mode for maximum compatibility.
+        self.providers = ["CPUExecutionProvider"]
+        # kokoro_onnx API varies by version; try passing providers if supported.
+        kokoro_sig = inspect.signature(Kokoro)
+        if "providers" in kokoro_sig.parameters:
+            self.kokoro = Kokoro(self.model_path, self.voices_path, providers=self.providers)
+        else:
+            self.kokoro = Kokoro(self.model_path, self.voices_path)
+    def list_voices(self) -> List[str]:
+        if self._voices_cache is not None:
+            return self._voices_cache
+        p = Path(self.voices_path)
+        voices: List[str] = []
+        if p.suffix == ".bin":
+            # voices-v1.0.bin is a zip containing <voice_id>.npy entries.
+            try:
+                with zipfile.ZipFile(str(p), "r") as z:
+                    for name in z.namelist():
+                        if not name.endswith(".npy"):
+                            continue
+                        voice_id = name[: -len(".npy")]
+                        if voice_id:
+                            voices.append(voice_id)
+            except zipfile.BadZipFile as e:
+                raise ValueError(f"Invalid voices bundle (expected zip): {p}") from e
+            voices = sorted(set(voices))
+        elif p.suffix == ".npz":
+            # np.load returns an NpzFile mapping of arrays.
+            with np.load(str(p)) as z:
+                voices = sorted(list(z.files))
+        elif p.suffix == ".json":
+            with p.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, dict):
+                voices = sorted([str(k) for k in data.keys()])
+            elif isinstance(data, list):
+                voices = sorted([str(v) for v in data])
+        self._voices_cache = voices
+        return voices
+    def _ensure_voices_file(self) -> None:
+        p = Path(self.voices_path)
+        if p.exists() and p.suffix in {".bin", ".npz", ".npy", ".json"}:
+            return
+        # Try common fallbacks in models/.
+        candidates = [
+            Path("models/voices-v1.0.bin"),
+            Path("models/voices.npz"),
+            Path("models/voices.json"),
+        ]
+        for c in candidates:
+            if c.exists():
+                self.voices_path = str(c)
+                return
+        raise FileNotFoundError(
+            f"Voices file not found. Expected {self.voices_path} or one of: {', '.join(str(c) for c in candidates)}"
+        )
+    def split_sentences(self, text: str) -> List[str]:
+        # Heuristic sentence splitting suited for light novels.
+        sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s+", text)
+        return [s.strip() for s in sentences if s and s.strip()]
+    def split_paragraphs(self, paragraphs: List[str]) -> List[tuple[int, int, str, bool]]:
+        """Flatten paragraphs into (paragraph_index, sentence_index, sentence_text, is_last_in_paragraph)."""
+        out: List[tuple[int, int, str, bool]] = []
+        for p_idx, p in enumerate(paragraphs):
+            p = (p or "").strip()
+            if not p:
+                continue
+            sentences = self.split_sentences(p)
+            if not sentences:
+                sentences = [p]
+            for s_idx, s in enumerate(sentences):
+                out.append((p_idx, s_idx, s, s_idx == (len(sentences) - 1)))
+        return out
+    def _iter_pcm_frames(self, pcm16: bytes, frame_bytes: int) -> Iterable[bytes]:
+        if frame_bytes <= 0:
+            yield pcm16
+            return
+        for i in range(0, len(pcm16), frame_bytes):
+            yield pcm16[i : i + frame_bytes]
+    async def synthesize_sentence_pcm16(self, sentence: str, voice: str, speed: float) -> bytes:
+        loop = asyncio.get_running_loop()
+        audio, _ = await loop.run_in_executor(None, self.kokoro.create, sentence, voice, speed)
+        audio_int16 = (np.clip(audio, -1.0, 1.0) * 32767).astype(np.int16)
+        return audio_int16.tobytes()
+    async def generate_audio_stream(
+        self,
+        text: str,
+        voice: str = "af_bella",
+        speed: float = 1.0,
+        prefetch_sentences: int = 3,
+        frame_ms: int = 200,
+        cancel_event: Optional[asyncio.Event] = None,
+    ) -> AsyncIterator[tuple[str, bytes]]:
+        """Yield (sentence_text, pcm16_frame_bytes) in a continuous stream.
+        This pre-synthesizes up to `prefetch_sentences` sentences ahead to reduce
+        boundary pauses, and yields audio in fixed-duration frames.
+        """
+        sentences = self.split_sentences(text)
+        queue: asyncio.Queue[Optional[tuple[str, bytes]]] = asyncio.Queue(maxsize=max(1, prefetch_sentences))
+        frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
+        frame_bytes = frame_samples * 2  # int16 mono
+        async def producer() -> None:
+            try:
+                for s in sentences:
+                    if cancel_event is not None and cancel_event.is_set():
+                        break
+                    if not s:
+                        continue
+                    pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
+                    await queue.put((s, pcm16))
+            finally:
+                await queue.put(None)
+        producer_task = asyncio.create_task(producer())
+        try:
+            while True:
+                item = await queue.get()
+                if item is None:
+                    break
+                sentence, pcm16 = item
+                for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
+                    if cancel_event is not None and cancel_event.is_set():
+                        return
+                    yield (sentence, frame)
+        finally:
+            producer_task.cancel()
+            with contextlib.suppress(Exception):
+                await producer_task
+    async def generate_audio_stream_paragraphs(
+        self,
+        paragraphs: List[str],
+        voice: str = "af_bella",
+        speed: float = 1.0,
+        prefetch_sentences: int = 3,
+        frame_ms: int = 200,
+        cancel_event: Optional[asyncio.Event] = None,
+        *,
+        pause_sentence_ms: int = 120,
+        pause_period_ms: int = 180,
+        pause_exclaim_ms: int = 200,
+        pause_question_ms: int = 260,
+        pause_paragraph_extra_ms: int = 240,
+    ) -> AsyncIterator[tuple[int, int, str, bytes]]:
+        """Yield (paragraph_index, sentence_index, sentence_text, pcm16_frame_bytes).
+        Adds a small silence pause after each sentence, and a larger one at paragraph boundaries.
+        """
+        segments = self.split_paragraphs(paragraphs)
+        queue: asyncio.Queue[Optional[tuple[int, int, str, bytes, int]]] = asyncio.Queue(
+            maxsize=max(1, prefetch_sentences)
+        )
+        frame_samples = int(self.sample_rate * (frame_ms / 1000.0))
+        frame_bytes = frame_samples * 2  # int16 mono
+        def pause_ms_for(sentence: str, is_last_in_paragraph: bool) -> int:
+            s = sentence.rstrip()
+            base = pause_sentence_ms
+            if s.endswith('?'):
+                base = pause_question_ms
+            elif s.endswith('!'):
+                base = pause_exclaim_ms
+            elif s.endswith('.'):
+                base = pause_period_ms
+            if is_last_in_paragraph:
+                base += pause_paragraph_extra_ms
+            return max(0, int(base))
+        async def producer() -> None:
+            try:
+                for p_idx, s_idx, s, is_last in segments:
+                    if cancel_event is not None and cancel_event.is_set():
+                        break
+                    if not s:
+                        continue
+                    pcm16 = await self.synthesize_sentence_pcm16(s, voice=voice, speed=speed)
+                    pause_ms = pause_ms_for(s, is_last)
+                    await queue.put((p_idx, s_idx, s, pcm16, pause_ms))
+            finally:
+                await queue.put(None)
+        producer_task = asyncio.create_task(producer())
+        try:
+            while True:
+                item = await queue.get()
+                if item is None:
+                    break
+                p_idx, s_idx, sentence, pcm16, pause_ms = item
+                for frame in self._iter_pcm_frames(pcm16, frame_bytes=frame_bytes):
+                    if cancel_event is not None and cancel_event.is_set():
+                        return
+                    yield (p_idx, s_idx, sentence, frame)
+                if pause_ms > 0:
+                    silence_samples = int(self.sample_rate * (pause_ms / 1000.0))
+                    silence_bytes = silence_samples * 2
+                    # Chunk silence into normal frames.
+                    silence = b"\x00" * silence_bytes
+                    for frame in self._iter_pcm_frames(silence, frame_bytes=frame_bytes):
+                        if cancel_event is not None and cancel_event.is_set():
+                            return
+                        yield (p_idx, s_idx, sentence, frame)
+        finally:
+            producer_task.cancel()
+            with contextlib.suppress(Exception):
+                await producer_task
+if __name__ == "__main__":
+    # Test
+    async def test():
+        tts = TTSEngine()
+        text = "Hello world! This is a test of the automatic text to speech system. It should be fast."
+        count = 0
+        async for chunk in tts.generate_audio_stream(text):
+            count += len(chunk)
+            print(f"Generated chunk of size {len(chunk)}")
+        print(f"Total bytes: {count}")
+    conn = asyncio.run(test())

backend/uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff