shreyas-joshi Cursor commited on
Commit
c37e1f4
·
1 Parent(s): 6497be7

Sync backend features: novel_details, realtime flag, portable PORT, Azure deploy docs

Browse files

- scraper.py: add scrape_novel_details() returning title + cover_url
- server.py: add GET /novel_details endpoint; surface in homepage + /info
- server.py: WS play accepts realtime=false to skip frame-pacing sleep (fast downloads)
- Dockerfile: bind to $PORT (default 7860) for Azure portability
- README.md: full Azure Container Apps deploy guide + updated endpoint list

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (4) hide show
  1. Dockerfile +3 -2
  2. README.md +61 -1
  3. backend/scraper.py +36 -0
  4. backend/server.py +16 -4
Dockerfile CHANGED
@@ -29,5 +29,6 @@ COPY backend /app/backend
29
 
30
  EXPOSE 7860
31
 
32
- # Keep backend logic unchanged, but bind Space app to 7860.
33
- CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python -c \"import uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=7860)\""]
 
 
29
 
30
  EXPOSE 7860
31
 
32
+ # Hugging Face Spaces uses port 7860. Azure can use any target port; we bind to
33
+ # $PORT when set, defaulting to 7860.
34
+ CMD ["/bin/sh", "-lc", "PORT=\${PORT:-7860} && uv run python download_models.py && uv run python -c \"import os, uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=int(os.environ.get('PORT','7860')))\""]
README.md CHANGED
@@ -7,4 +7,64 @@ sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  pinned: false
8
  ---
9
 
10
+ # CoreReader Backend (Docker)
11
+
12
+ This repository is a backend-only deployment target for CoreReader / LN-TTS.
13
+
14
+ It runs a FastAPI server that:
15
+ - Scrapes NovelCool chapters + chapter index
16
+ - Runs Kokoro ONNX TTS (CPU)
17
+ - Streams PCM16 mono audio over WebSocket
18
+
19
+ ## Endpoints
20
+
21
+ - GET /health
22
+ - GET /voices
23
+ - GET /novel_index?url=...
24
+ - GET /novel_details?url=... (best-effort cover URL)
25
+ - GET /novel_meta?url=...
26
+ - GET /novel_chapter?url=...&n=...
27
+ - WS /ws
28
+
29
+ ## Use from the Flutter app
30
+
31
+ In Settings → WebSocket base URL:
32
+
33
+ - Hugging Face Spaces URL: wss://<space-subdomain>.hf.space
34
+
35
+ The app connects to: wss://<space-subdomain>.hf.space/ws
36
+
37
+ ## Notes
38
+
39
+ - The container downloads models on startup via download_models.py.
40
+ - Offline downloads in the app use WS play with realtime=false so synthesis runs faster than real-time.
41
+
42
+ ## Deploy to Azure (Container Apps)
43
+
44
+ This Docker image can be deployed to Azure Container Apps.
45
+
46
+ 1) Create a resource group + registry:
47
+
48
+ - az group create -n corereader-rg -l westeurope
49
+ - az acr create -n <acrName> -g corereader-rg --sku Basic
50
+
51
+ 2) Build + push to ACR:
52
+
53
+ - az acr build -r <acrName> -t corereader-backend:v1 .
54
+
55
+ 3) Deploy a public Container App (binds to PORT, default 7860):
56
+
57
+ - az extension add --name containerapp --upgrade
58
+ - az containerapp env create -g corereader-rg -n corereader-env -l westeurope
59
+ - loginServer=$(az acr show -n <acrName> -g corereader-rg --query loginServer -o tsv)
60
+ - az containerapp create -g corereader-rg -n corereader-backend --environment corereader-env \
61
+ --image "$loginServer/corereader-backend:v1" \
62
+ --ingress external --target-port 7860 --registry-server "$loginServer"
63
+
64
+ 4) Get the URL:
65
+
66
+ - fqdn=$(az containerapp show -g corereader-rg -n corereader-backend --query properties.configuration.ingress.fqdn -o tsv)
67
+
68
+ Paste into the Flutter app Settings:
69
+
70
+ - wss://$fqdn
backend/scraper.py CHANGED
@@ -139,6 +139,42 @@ class NovelCoolScraper:
139
  links.sort(key=chapter_key)
140
  return links
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  if __name__ == "__main__":
143
  import asyncio
144
  scraper = NovelCoolScraper()
 
139
  links.sort(key=chapter_key)
140
  return links
141
 
142
+ async def scrape_novel_details(self, novel_url: str):
143
+ """Scrape a NovelCool novel page and return lightweight metadata.
144
+
145
+ Returns:
146
+ - title: best-effort title
147
+ - cover_url: absolute URL to the cover image, when detectable
148
+ """
149
+ async with aiohttp.ClientSession() as session:
150
+ async with session.get(novel_url, headers=self.headers) as response:
151
+ if response.status != 200:
152
+ raise Exception(f"Failed to fetch page: {response.status}")
153
+ html = await response.text()
154
+
155
+ soup = BeautifulSoup(html, 'lxml')
156
+
157
+ title = None
158
+ t = soup.find('title')
159
+ if t:
160
+ raw = t.get_text(strip=True)
161
+ if raw:
162
+ title = raw.split(' - Novel Cool', 1)[0].strip() or raw
163
+
164
+ cover_url = None
165
+ img = soup.select_one('img.bookinfo-pic-img')
166
+ if not img:
167
+ img = soup.select_one('img[itemprop="image"]')
168
+ if img:
169
+ src = img.get('src')
170
+ if src:
171
+ cover_url = urljoin(novel_url, src)
172
+
173
+ return {
174
+ "title": title,
175
+ "cover_url": cover_url,
176
+ }
177
+
178
  if __name__ == "__main__":
179
  import asyncio
180
  scraper = NovelCoolScraper()
backend/server.py CHANGED
@@ -77,6 +77,7 @@ def _space_runtime_info(request: Request) -> dict:
77
  "health": "/health",
78
  "voices": "/voices",
79
  "novel_index": "/novel_index?url=<novel_url>",
 
80
  "novel_meta": "/novel_meta?url=<novel_url>",
81
  "novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
82
  "websocket": "/ws",
@@ -124,6 +125,7 @@ async def root(request: Request):
124
  <li><code>/health</code></li>
125
  <li><code>/voices</code></li>
126
  <li><code>/novel_index?url=&lt;novel_url&gt;</code></li>
 
127
  <li><code>/novel_meta?url=&lt;novel_url&gt;</code></li>
128
  <li><code>/novel_chapter?url=&lt;novel_url&gt;&amp;n=&lt;chapter_number&gt;</code></li>
129
  <li><code>/ws</code> (WebSocket)</li>
@@ -162,6 +164,14 @@ async def novel_index(url: str):
162
  return {"chapters": chapters}
163
 
164
 
 
 
 
 
 
 
 
 
165
  async def _get_cached_novel_index(novel_url: str):
166
  """Return cached chapter list for a novel URL, scraping once per TTL."""
167
  if not novel_url:
@@ -299,6 +309,7 @@ async def websocket_endpoint(websocket: WebSocket):
299
  prefetch = int(message.get("prefetch", 3))
300
  frame_ms = int(message.get("frame_ms", 200))
301
  start_paragraph = int(message.get("start_paragraph", 0) or 0)
 
302
 
303
  if not url:
304
  await websocket.send_json({"type": "error", "message": "URL is required"})
@@ -419,10 +430,11 @@ async def websocket_endpoint(websocket: WebSocket):
419
 
420
  # Pace frames close to real-time so UI updates (sentence highlighting)
421
  # match what is audible, even when synthesis runs faster than realtime.
422
- try:
423
- await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
424
- except Exception:
425
- pass
 
426
 
427
  if control_task is not None:
428
  control_task.cancel()
 
77
  "health": "/health",
78
  "voices": "/voices",
79
  "novel_index": "/novel_index?url=<novel_url>",
80
+ "novel_details": "/novel_details?url=<novel_url>",
81
  "novel_meta": "/novel_meta?url=<novel_url>",
82
  "novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
83
  "websocket": "/ws",
 
125
  <li><code>/health</code></li>
126
  <li><code>/voices</code></li>
127
  <li><code>/novel_index?url=&lt;novel_url&gt;</code></li>
128
+ <li><code>/novel_details?url=&lt;novel_url&gt;</code></li>
129
  <li><code>/novel_meta?url=&lt;novel_url&gt;</code></li>
130
  <li><code>/novel_chapter?url=&lt;novel_url&gt;&amp;n=&lt;chapter_number&gt;</code></li>
131
  <li><code>/ws</code> (WebSocket)</li>
 
164
  return {"chapters": chapters}
165
 
166
 
167
+ @app.get("/novel_details")
168
+ async def novel_details(url: str):
169
+ if not url:
170
+ return {"title": None, "cover_url": None, "error": "url is required"}
171
+ details = await app.state.scraper.scrape_novel_details(url)
172
+ return details
173
+
174
+
175
  async def _get_cached_novel_index(novel_url: str):
176
  """Return cached chapter list for a novel URL, scraping once per TTL."""
177
  if not novel_url:
 
309
  prefetch = int(message.get("prefetch", 3))
310
  frame_ms = int(message.get("frame_ms", 200))
311
  start_paragraph = int(message.get("start_paragraph", 0) or 0)
312
+ realtime = bool(message.get("realtime", True))
313
 
314
  if not url:
315
  await websocket.send_json({"type": "error", "message": "URL is required"})
 
430
 
431
  # Pace frames close to real-time so UI updates (sentence highlighting)
432
  # match what is audible, even when synthesis runs faster than realtime.
433
+ if realtime:
434
+ try:
435
+ await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
436
+ except Exception:
437
+ pass
438
 
439
  if control_task is not None:
440
  control_task.cancel()