Spaces:

shreyas-joshi
/

CoreReader

Sleeping

shreyas-joshi Cursor commited on Feb 18

Commit

c37e1f4

1 Parent(s): 6497be7

Sync backend features: novel_details, realtime flag, portable PORT, Azure deploy docs

- scraper.py: add scrape_novel_details() returning title + cover_url
- server.py: add GET /novel_details endpoint; surface in homepage + /info
- server.py: WS play accepts realtime=false to skip frame-pacing sleep (fast downloads)
- Dockerfile: bind to $PORT (default 7860) for Azure portability
- README.md: full Azure Container Apps deploy guide + updated endpoint list

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (4) hide show

Dockerfile +3 -2
README.md +61 -1
backend/scraper.py +36 -0
backend/server.py +16 -4

Dockerfile CHANGED Viewed

@@ -29,5 +29,6 @@ COPY backend /app/backend
 EXPOSE 7860
-# Keep backend logic unchanged, but bind Space app to 7860.
-CMD ["/bin/sh", "-lc", "uv run python download_models.py && uv run python -c \"import uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=7860)\""]

 EXPOSE 7860
+# Hugging Face Spaces uses port 7860. Azure can use any target port; we bind to
+# $PORT when set, defaulting to 7860.
+CMD ["/bin/sh", "-lc", "PORT=\${PORT:-7860} && uv run python download_models.py && uv run python -c \"import os, uvicorn, server; uvicorn.run(server.app, host='0.0.0.0', port=int(os.environ.get('PORT','7860')))\""]

README.md CHANGED Viewed

@@ -7,4 +7,64 @@ sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+# CoreReader Backend (Docker)
+This repository is a backend-only deployment target for CoreReader / LN-TTS.
+It runs a FastAPI server that:
+- Scrapes NovelCool chapters + chapter index
+- Runs Kokoro ONNX TTS (CPU)
+- Streams PCM16 mono audio over WebSocket
+## Endpoints
+- GET /health
+- GET /voices
+- GET /novel_index?url=...
+- GET /novel_details?url=... (best-effort cover URL)
+- GET /novel_meta?url=...
+- GET /novel_chapter?url=...&n=...
+- WS /ws
+## Use from the Flutter app
+In Settings → WebSocket base URL:
+- Hugging Face Spaces URL: wss://<space-subdomain>.hf.space
+The app connects to: wss://<space-subdomain>.hf.space/ws
+## Notes
+- The container downloads models on startup via download_models.py.
+- Offline downloads in the app use WS play with realtime=false so synthesis runs faster than real-time.
+## Deploy to Azure (Container Apps)
+This Docker image can be deployed to Azure Container Apps.
+1) Create a resource group + registry:
+- az group create -n corereader-rg -l westeurope
+- az acr create -n <acrName> -g corereader-rg --sku Basic
+2) Build + push to ACR:
+- az acr build -r <acrName> -t corereader-backend:v1 .
+3) Deploy a public Container App (binds to PORT, default 7860):
+- az extension add --name containerapp --upgrade
+- az containerapp env create -g corereader-rg -n corereader-env -l westeurope
+- loginServer=$(az acr show -n <acrName> -g corereader-rg --query loginServer -o tsv)
+- az containerapp create -g corereader-rg -n corereader-backend --environment corereader-env \
+	--image "$loginServer/corereader-backend:v1" \
+	--ingress external --target-port 7860 --registry-server "$loginServer"
+4) Get the URL:
+- fqdn=$(az containerapp show -g corereader-rg -n corereader-backend --query properties.configuration.ingress.fqdn -o tsv)
+Paste into the Flutter app Settings:
+- wss://$fqdn

backend/scraper.py CHANGED Viewed

@@ -139,6 +139,42 @@ class NovelCoolScraper:
         links.sort(key=chapter_key)
         return links
 if __name__ == "__main__":
     import asyncio
     scraper = NovelCoolScraper()

         links.sort(key=chapter_key)
         return links
+    async def scrape_novel_details(self, novel_url: str):
+        """Scrape a NovelCool novel page and return lightweight metadata.
+        Returns:
+        - title: best-effort title
+        - cover_url: absolute URL to the cover image, when detectable
+        """
+        async with aiohttp.ClientSession() as session:
+            async with session.get(novel_url, headers=self.headers) as response:
+                if response.status != 200:
+                    raise Exception(f"Failed to fetch page: {response.status}")
+                html = await response.text()
+        soup = BeautifulSoup(html, 'lxml')
+        title = None
+        t = soup.find('title')
+        if t:
+            raw = t.get_text(strip=True)
+            if raw:
+                title = raw.split(' - Novel Cool', 1)[0].strip() or raw
+        cover_url = None
+        img = soup.select_one('img.bookinfo-pic-img')
+        if not img:
+            img = soup.select_one('img[itemprop="image"]')
+        if img:
+            src = img.get('src')
+            if src:
+                cover_url = urljoin(novel_url, src)
+        return {
+            "title": title,
+            "cover_url": cover_url,
+        }
 if __name__ == "__main__":
     import asyncio
     scraper = NovelCoolScraper()

backend/server.py CHANGED Viewed

@@ -77,6 +77,7 @@ def _space_runtime_info(request: Request) -> dict:
             "health": "/health",
             "voices": "/voices",
             "novel_index": "/novel_index?url=<novel_url>",
             "novel_meta": "/novel_meta?url=<novel_url>",
             "novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
             "websocket": "/ws",
@@ -124,6 +125,7 @@ async def root(request: Request):
             <li><code>/health</code></li>
             <li><code>/voices</code></li>
             <li><code>/novel_index?url=&lt;novel_url&gt;</code></li>
             <li><code>/novel_meta?url=&lt;novel_url&gt;</code></li>
             <li><code>/novel_chapter?url=&lt;novel_url&gt;&amp;n=&lt;chapter_number&gt;</code></li>
             <li><code>/ws</code> (WebSocket)</li>
@@ -162,6 +164,14 @@ async def novel_index(url: str):
     return {"chapters": chapters}
 async def _get_cached_novel_index(novel_url: str):
     """Return cached chapter list for a novel URL, scraping once per TTL."""
     if not novel_url:
@@ -299,6 +309,7 @@ async def websocket_endpoint(websocket: WebSocket):
                     prefetch = int(message.get("prefetch", 3))
                     frame_ms = int(message.get("frame_ms", 200))
                     start_paragraph = int(message.get("start_paragraph", 0) or 0)
                     if not url:
                         await websocket.send_json({"type": "error", "message": "URL is required"})
@@ -419,10 +430,11 @@ async def websocket_endpoint(websocket: WebSocket):
                             # Pace frames close to real-time so UI updates (sentence highlighting)
                             # match what is audible, even when synthesis runs faster than realtime.
-                            try:
-                                await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
-                            except Exception:
-                                pass
                         if control_task is not None:
                             control_task.cancel()

             "health": "/health",
             "voices": "/voices",
             "novel_index": "/novel_index?url=<novel_url>",
+            "novel_details": "/novel_details?url=<novel_url>",
             "novel_meta": "/novel_meta?url=<novel_url>",
             "novel_chapter": "/novel_chapter?url=<novel_url>&n=<chapter_number>",
             "websocket": "/ws",
             <li><code>/health</code></li>
             <li><code>/voices</code></li>
             <li><code>/novel_index?url=&lt;novel_url&gt;</code></li>
+                        <li><code>/novel_details?url=&lt;novel_url&gt;</code></li>
             <li><code>/novel_meta?url=&lt;novel_url&gt;</code></li>
             <li><code>/novel_chapter?url=&lt;novel_url&gt;&amp;n=&lt;chapter_number&gt;</code></li>
             <li><code>/ws</code> (WebSocket)</li>
     return {"chapters": chapters}
+@app.get("/novel_details")
+async def novel_details(url: str):
+    if not url:
+        return {"title": None, "cover_url": None, "error": "url is required"}
+    details = await app.state.scraper.scrape_novel_details(url)
+    return details
 async def _get_cached_novel_index(novel_url: str):
     """Return cached chapter list for a novel URL, scraping once per TTL."""
     if not novel_url:
                     prefetch = int(message.get("prefetch", 3))
                     frame_ms = int(message.get("frame_ms", 200))
                     start_paragraph = int(message.get("start_paragraph", 0) or 0)
+                    realtime = bool(message.get("realtime", True))
                     if not url:
                         await websocket.send_json({"type": "error", "message": "URL is required"})
                             # Pace frames close to real-time so UI updates (sentence highlighting)
                             # match what is audible, even when synthesis runs faster than realtime.
+                            if realtime:
+                                try:
+                                    await asyncio.sleep(len(audio_frame) / (2 * app.state.tts.sample_rate))
+                                except Exception:
+                                    pass
                         if control_task is not None:
                             control_task.cancel()