Spaces:
Sleeping
Sleeping
Commit ·
c37e1f4
1
Parent(s): 6497be7
Sync backend features: novel_details, realtime flag, portable PORT, Azure deploy docs
Browse files- scraper.py: add scrape_novel_details() returning title + cover_url
- server.py: add GET /novel_details endpoint; surface in homepage + /info
- server.py: WS play accepts realtime=false to skip frame-pacing sleep (fast downloads)
- Dockerfile: bind to $PORT (default 7860) for Azure portability
- README.md: full Azure Container Apps deploy guide + updated endpoint list
Co-authored-by: Cursor <cursoragent@cursor.com>
- Dockerfile +3 -2
- README.md +61 -1
- backend/scraper.py +36 -0
- backend/server.py +16 -4
Dockerfile
CHANGED
|
@@ -29,5 +29,6 @@ COPY backend /app/backend
|
|
| 29 |
|
| 30 |
EXPOSE 7860
|
| 31 |
|
| 32 |
-
#
|
| 33 |
-
|
|
|
|
|
|
| 29 |
|
| 30 |
EXPOSE 7860
|
| 31 |
|
| 32 |
+
# Hugging Face Spaces uses port 7860. Azure can use any target port; we bind to
|
| 33 |
+
# $PORT when set, defaulting to 7860.
|
| 34 |
+
CMD ["/bin/sh", "-lc", "PORT=\${PORT:-7860} && uv run python download_models.py && uv run python -c \"import os, uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=int(os.environ.get('PORT','7860')))\""]
|
README.md
CHANGED
|
@@ -7,4 +7,64 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# CoreReader Backend (Docker)
|
| 11 |
+
|
| 12 |
+
This repository is a backend-only deployment target for CoreReader / LN-TTS.
|
| 13 |
+
|
| 14 |
+
It runs a FastAPI server that:
|
| 15 |
+
- Scrapes NovelCool chapters + chapter index
|
| 16 |
+
- Runs Kokoro ONNX TTS (CPU)
|
| 17 |
+
- Streams PCM16 mono audio over WebSocket
|
| 18 |
+
|
| 19 |
+
## Endpoints
|
| 20 |
+
|
| 21 |
+
- GET /health
|
| 22 |
+
- GET /voices
|
| 23 |
+
- GET /novel_index?url=...
|
| 24 |
+
- GET /novel_details?url=... (best-effort cover URL)
|
| 25 |
+
- GET /novel_meta?url=...
|
| 26 |
+
- GET /novel_chapter?url=...&n=...
|
| 27 |
+
- WS /ws
|
| 28 |
+
|
| 29 |
+
## Use from the Flutter app
|
| 30 |
+
|
| 31 |
+
In Settings → WebSocket base URL:
|
| 32 |
+
|
| 33 |
+
- Hugging Face Spaces URL: wss://<space-subdomain>.hf.space
|
| 34 |
+
|
| 35 |
+
The app connects to: wss://<space-subdomain>.hf.space/ws
|
| 36 |
+
|
| 37 |
+
## Notes
|
| 38 |
+
|
| 39 |
+
- The container downloads models on startup via download_models.py.
|
| 40 |
+
- Offline downloads in the app use WS play with realtime=false so synthesis runs faster than real-time.
|
| 41 |
+
|
| 42 |
+
## Deploy to Azure (Container Apps)
|
| 43 |
+
|
| 44 |
+
This Docker image can be deployed to Azure Container Apps.
|
| 45 |
+
|
| 46 |
+
1) Create a resource group + registry:
|
| 47 |
+
|
| 48 |
+
- az group create -n corereader-rg -l westeurope
|
| 49 |
+
- az acr create -n <acrName> -g corereader-rg --sku Basic
|
| 50 |
+
|
| 51 |
+
2) Build + push to ACR:
|
| 52 |
+
|
| 53 |
+
- az acr build -r <acrName> -t corereader-backend:v1 .
|
| 54 |
+
|
| 55 |
+
3) Deploy a public Container App (binds to PORT, default 7860):
|
| 56 |
+
|
| 57 |
+
- az extension add --name containerapp --upgrade
|
| 58 |
+
- az containerapp env create -g corereader-rg -n corereader-env -l westeurope
|
| 59 |
+
- loginServer=$(az acr show -n <acrName> -g corereader-rg --query loginServer -o tsv)
|
| 60 |
+
- az containerapp create -g corereader-rg -n corereader-backend --environment corereader-env \
|
| 61 |
+
--image "$loginServer/corereader-backend:v1" \
|
| 62 |
+
--ingress external --target-port 7860 --registry-server "$loginServer"
|
| 63 |
+
|
| 64 |
+
4) Get the URL:
|
| 65 |
+
|
| 66 |
+
- fqdn=$(az containerapp show -g corereader-rg -n corereader-backend --query properties.configuration.ingress.fqdn -o tsv)
|
| 67 |
+
|
| 68 |
+
Paste into the Flutter app Settings:
|
| 69 |
+
|
| 70 |
+
- wss://$fqdn
|
backend/scraper.py
CHANGED
|
@@ -139,6 +139,42 @@ class NovelCoolScraper:
|
|
| 139 |
links.sort(key=chapter_key)
|
| 140 |
return links
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
if __name__ == "__main__":
|
| 143 |
import asyncio
|
| 144 |
scraper = NovelCoolScraper()
|
|
|
|
| 139 |
links.sort(key=chapter_key)
|
| 140 |
return links
|
| 141 |
|
| 142 |
+
async def scrape_novel_details(self, novel_url: str):
|
| 143 |
+
"""Scrape a NovelCool novel page and return lightweight metadata.
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
- title: best-effort title
|
| 147 |
+
- cover_url: absolute URL to the cover image, when detectable
|
| 148 |
+
"""
|
| 149 |
+
async with aiohttp.ClientSession() as session:
|
| 150 |
+
async with session.get(novel_url, headers=self.headers) as response:
|
| 151 |
+
if response.status != 200:
|
| 152 |
+
raise Exception(f"Failed to fetch page: {response.status}")
|
| 153 |
+
html = await response.text()
|
| 154 |
+
|
| 155 |
+
soup = BeautifulSoup(html, 'lxml')
|
| 156 |
+
|
| 157 |
+
title = None
|
| 158 |
+
t = soup.find('title')
|
| 159 |
+
if t:
|
| 160 |
+
raw = t.get_text(strip=True)
|
| 161 |
+
if raw:
|
| 162 |
+
title = raw.split(' - Novel Cool', 1)[0].strip() or raw
|
| 163 |
+
|
| 164 |
+
cover_url = None
|
| 165 |
+
img = soup.select_one('img.bookinfo-pic-img')
|
| 166 |
+
if not img:
|
| 167 |
+
img = soup.select_one('img[itemprop="image"]')
|
| 168 |
+
if img:
|
| 169 |
+
src = img.get('src')
|
| 170 |
+
if src:
|
| 171 |
+
cover_url = urljoin(novel_url, src)
|
| 172 |
+
|
| 173 |
+
return {
|
| 174 |
+
"title": title,
|
| 175 |
+
"cover_url": cover_url,
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
if __name__ == "__main__":
|
| 179 |
import asyncio
|
| 180 |
scraper = NovelCoolScraper()
|
backend/server.py
CHANGED
|
@@ -77,6 +77,7 @@ def _space_runtime_info(request: Request) -> dict:
|
|
| 77 |
"health": "/health",
|
| 78 |
"voices": "/voices",
|
| 79 |
"novel_index": "/novel_index?url=<novel_url>",
|
|
|
|
| 80 |
"novel_meta": "/novel_meta?url=<novel_url>",
|
| 81 |
"novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
|
| 82 |
"websocket": "/ws",
|
|
@@ -124,6 +125,7 @@ async def root(request: Request):
|
|
| 124 |
<li><code>/health</code></li>
|
| 125 |
<li><code>/voices</code></li>
|
| 126 |
<li><code>/novel_index?url=<novel_url></code></li>
|
|
|
|
| 127 |
<li><code>/novel_meta?url=<novel_url></code></li>
|
| 128 |
<li><code>/novel_chapter?url=<novel_url>&n=<chapter_number></code></li>
|
| 129 |
<li><code>/ws</code> (WebSocket)</li>
|
|
@@ -162,6 +164,14 @@ async def novel_index(url: str):
|
|
| 162 |
return {"chapters": chapters}
|
| 163 |
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
async def _get_cached_novel_index(novel_url: str):
|
| 166 |
"""Return cached chapter list for a novel URL, scraping once per TTL."""
|
| 167 |
if not novel_url:
|
|
@@ -299,6 +309,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 299 |
prefetch = int(message.get("prefetch", 3))
|
| 300 |
frame_ms = int(message.get("frame_ms", 200))
|
| 301 |
start_paragraph = int(message.get("start_paragraph", 0) or 0)
|
|
|
|
| 302 |
|
| 303 |
if not url:
|
| 304 |
await websocket.send_json({"type": "error", "message": "URL is required"})
|
|
@@ -419,10 +430,11 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 419 |
|
| 420 |
# Pace frames close to real-time so UI updates (sentence highlighting)
|
| 421 |
# match what is audible, even when synthesis runs faster than realtime.
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
| 426 |
|
| 427 |
if control_task is not None:
|
| 428 |
control_task.cancel()
|
|
|
|
| 77 |
"health": "/health",
|
| 78 |
"voices": "/voices",
|
| 79 |
"novel_index": "/novel_index?url=<novel_url>",
|
| 80 |
+
"novel_details": "/novel_details?url=<novel_url>",
|
| 81 |
"novel_meta": "/novel_meta?url=<novel_url>",
|
| 82 |
"novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
|
| 83 |
"websocket": "/ws",
|
|
|
|
| 125 |
<li><code>/health</code></li>
|
| 126 |
<li><code>/voices</code></li>
|
| 127 |
<li><code>/novel_index?url=<novel_url></code></li>
|
| 128 |
+
<li><code>/novel_details?url=<novel_url></code></li>
|
| 129 |
<li><code>/novel_meta?url=<novel_url></code></li>
|
| 130 |
<li><code>/novel_chapter?url=<novel_url>&n=<chapter_number></code></li>
|
| 131 |
<li><code>/ws</code> (WebSocket)</li>
|
|
|
|
| 164 |
return {"chapters": chapters}
|
| 165 |
|
| 166 |
|
| 167 |
+
@app.get("/novel_details")
|
| 168 |
+
async def novel_details(url: str):
|
| 169 |
+
if not url:
|
| 170 |
+
return {"title": None, "cover_url": None, "error": "url is required"}
|
| 171 |
+
details = await app.state.scraper.scrape_novel_details(url)
|
| 172 |
+
return details
|
| 173 |
+
|
| 174 |
+
|
| 175 |
async def _get_cached_novel_index(novel_url: str):
|
| 176 |
"""Return cached chapter list for a novel URL, scraping once per TTL."""
|
| 177 |
if not novel_url:
|
|
|
|
| 309 |
prefetch = int(message.get("prefetch", 3))
|
| 310 |
frame_ms = int(message.get("frame_ms", 200))
|
| 311 |
start_paragraph = int(message.get("start_paragraph", 0) or 0)
|
| 312 |
+
realtime = bool(message.get("realtime", True))
|
| 313 |
|
| 314 |
if not url:
|
| 315 |
await websocket.send_json({"type": "error", "message": "URL is required"})
|
|
|
|
| 430 |
|
| 431 |
# Pace frames close to real-time so UI updates (sentence highlighting)
|
| 432 |
# match what is audible, even when synthesis runs faster than realtime.
|
| 433 |
+
if realtime:
|
| 434 |
+
try:
|
| 435 |
+
await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
|
| 436 |
+
except Exception:
|
| 437 |
+
pass
|
| 438 |
|
| 439 |
if control_task is not None:
|
| 440 |
control_task.cancel()
|