| """ |
| ============================================ |
| Scraper API Routes |
| - Add novels |
| - Start/Stop scraping |
| - Get live status |
| - Delete novels |
| ============================================ |
| """ |
|
|
| import logging |
| from typing import Optional, List |
| from fastapi import APIRouter, Depends, HTTPException, status |
| from pydantic import BaseModel, Field, HttpUrl |
| from sqlalchemy.ext.asyncio import AsyncSession |
|
|
| from app.database.connection import get_db_session |
| from app.database.crud import ( |
| create_novel, |
| get_all_novels, |
| get_novel_by_id, |
| delete_novel, |
| update_novel_status, |
| ) |
| from app.database.models import NovelStatus |
| from app.scraper.scraper_engine import ( |
| start_scraping_novel, |
| stop_scraping_novel, |
| stop_all_scraping, |
| scraper_status, |
| get_active_task_ids, |
| ) |
| from app.scraper.browser_manager import browser_manager |
|
|
| logger = logging.getLogger(__name__) |
| router = APIRouter(prefix="/api", tags=["Scraper"]) |
|
|
|
|
| |
| |
| |
| class NovelAddRequest(BaseModel): |
| """Request body for adding a new novel.""" |
| title: str = Field(..., min_length=1, max_length=500, examples=["My Novel"]) |
| url: str = Field(..., min_length=10, examples=["https://example.com/novel/chapter-1"]) |
| login_email: Optional[str] = Field(None, examples=["user@email.com"]) |
| login_password: Optional[str] = Field(None, examples=["password123"]) |
| next_button_selector: Optional[str] = Field( |
| None, |
| examples=["a.next_page, a[rel='next'], .next-chap"] |
| ) |
| content_selector: Optional[str] = Field( |
| None, |
| examples=[".chapter-content, .reading-content, #chapter-content"] |
| ) |
|
|
|
|
| class NovelResponse(BaseModel): |
| """Response model for a novel.""" |
| id: int |
| title: str |
| url: str |
| status: str |
| chapters_scraped: int |
| last_error: Optional[str] = None |
| needs_intervention: bool = False |
| screenshot_path: Optional[str] = None |
| current_url: Optional[str] = None |
|
|
| class Config: |
| from_attributes = True |
|
|
|
|
| class BatchAddRequest(BaseModel): |
| """Request body for adding multiple novels at once.""" |
| novels: List[NovelAddRequest] = Field(..., min_length=1, max_length=15) |
| shared_email: Optional[str] = None |
| shared_password: Optional[str] = None |
|
|
|
|
| class StartScrapeRequest(BaseModel): |
| """Request body for starting scraping.""" |
| novel_ids: Optional[List[int]] = Field( |
| None, |
| description="Specific novel IDs to scrape. If None, scrape all queued." |
| ) |
|
|
|
|
| class StatusResponse(BaseModel): |
| """Live status of all scraping activities.""" |
| active_browsers: int |
| max_browsers: int |
| browser_initialized: bool |
| novels: dict |
| active_task_ids: list |
|
|
|
|
| |
| |
| |
| @router.post("/novels", response_model=NovelResponse, status_code=status.HTTP_201_CREATED) |
| async def add_novel( |
| request: NovelAddRequest, |
| db: AsyncSession = Depends(get_db_session), |
| ): |
| """ |
| Add a single novel to the database. |
| It will be in 'queued' status until scraping starts. |
| """ |
| try: |
| novel = await create_novel( |
| db, |
| title=request.title, |
| url=request.url, |
| login_email=request.login_email, |
| login_password=request.login_password, |
| next_button_selector=request.next_button_selector, |
| content_selector=request.content_selector, |
| ) |
| await db.commit() |
|
|
| logger.info(f"Novel added: {novel.title} (ID: {novel.id})") |
|
|
| return NovelResponse( |
| id=novel.id, |
| title=novel.title, |
| url=novel.url, |
| status=novel.status.value, |
| chapters_scraped=novel.chapters_scraped, |
| last_error=novel.last_error, |
| needs_intervention=novel.needs_intervention, |
| screenshot_path=novel.screenshot_path, |
| current_url=novel.current_url, |
| ) |
|
|
| except Exception as e: |
| logger.error(f"Error adding novel: {e}") |
| raise HTTPException( |
| status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
| detail=f"Failed to add novel: {str(e)}", |
| ) |
|
|
|
|
| @router.post("/novels/batch", status_code=status.HTTP_201_CREATED) |
| async def add_novels_batch( |
| request: BatchAddRequest, |
| db: AsyncSession = Depends(get_db_session), |
| ): |
| """ |
| Add multiple novels at once. |
| Optionally share login credentials across all novels. |
| """ |
| added_novels = [] |
| errors = [] |
|
|
| for novel_req in request.novels: |
| try: |
| email = novel_req.login_email or request.shared_email |
| password = novel_req.login_password or request.shared_password |
|
|
| novel = await create_novel( |
| db, |
| title=novel_req.title, |
| url=novel_req.url, |
| login_email=email, |
| login_password=password, |
| next_button_selector=novel_req.next_button_selector, |
| content_selector=novel_req.content_selector, |
| ) |
| added_novels.append({ |
| "id": novel.id, |
| "title": novel.title, |
| "status": "queued", |
| }) |
| except Exception as e: |
| errors.append({ |
| "title": novel_req.title, |
| "error": str(e), |
| }) |
|
|
| await db.commit() |
|
|
| return { |
| "added": added_novels, |
| "errors": errors, |
| "total_added": len(added_novels), |
| "total_errors": len(errors), |
| } |
|
|
|
|
| @router.get("/novels", response_model=List[NovelResponse]) |
| async def list_novels(db: AsyncSession = Depends(get_db_session)): |
| """Get all novels with their current status.""" |
| novels = await get_all_novels(db) |
| return [ |
| NovelResponse( |
| id=n.id, |
| title=n.title, |
| url=n.url, |
| status=n.status.value, |
| chapters_scraped=n.chapters_scraped, |
| last_error=n.last_error, |
| needs_intervention=n.needs_intervention, |
| screenshot_path=n.screenshot_path, |
| current_url=n.current_url, |
| ) |
| for n in novels |
| ] |
|
|
|
|
| @router.get("/novels/{novel_id}", response_model=NovelResponse) |
| async def get_novel( |
| novel_id: int, |
| db: AsyncSession = Depends(get_db_session), |
| ): |
| """Get a specific novel by ID.""" |
| novel = await get_novel_by_id(db, novel_id) |
| if not novel: |
| raise HTTPException(status_code=404, detail="Novel not found") |
|
|
| return NovelResponse( |
| id=novel.id, |
| title=novel.title, |
| url=novel.url, |
| status=novel.status.value, |
| chapters_scraped=novel.chapters_scraped, |
| last_error=novel.last_error, |
| needs_intervention=novel.needs_intervention, |
| screenshot_path=novel.screenshot_path, |
| current_url=novel.current_url, |
| ) |
|
|
|
|
| @router.delete("/novels/{novel_id}") |
| async def remove_novel( |
| novel_id: int, |
| db: AsyncSession = Depends(get_db_session), |
| ): |
| """Delete a novel and all its chapters.""" |
| |
| await stop_scraping_novel(novel_id) |
|
|
| deleted = await delete_novel(db, novel_id) |
| if not deleted: |
| raise HTTPException(status_code=404, detail="Novel not found") |
|
|
| await db.commit() |
| return {"message": f"Novel {novel_id} deleted successfully"} |
|
|
|
|
| @router.post("/scrape/start") |
| async def start_scraping( |
| request: StartScrapeRequest = StartScrapeRequest(), |
| db: AsyncSession = Depends(get_db_session), |
| ): |
| """ |
| Start scraping novels. |
| If novel_ids provided, scrape only those. |
| If not, scrape all novels in 'queued' status. |
| """ |
| if not browser_manager.is_initialized: |
| raise HTTPException( |
| status_code=status.HTTP_503_SERVICE_UNAVAILABLE, |
| detail="Browser not initialized yet. Please wait and try again.", |
| ) |
|
|
| novels = await get_all_novels(db) |
|
|
| if request.novel_ids: |
| novels_to_scrape = [n for n in novels if n.id in request.novel_ids] |
| else: |
| novels_to_scrape = [ |
| n for n in novels |
| if n.status in [NovelStatus.QUEUED, NovelStatus.PAUSED_ERROR] |
| ] |
|
|
| if not novels_to_scrape: |
| raise HTTPException( |
| status_code=status.HTTP_404_NOT_FOUND, |
| detail="No novels found to scrape.", |
| ) |
|
|
| started = [] |
| for novel in novels_to_scrape: |
| novel_data = { |
| "url": novel.url, |
| "title": novel.title, |
| "login_email": novel.login_email, |
| "login_password": novel.login_password, |
| "next_button_selector": novel.next_button_selector, |
| "content_selector": novel.content_selector, |
| } |
|
|
| success = await start_scraping_novel(novel.id, novel_data) |
| if success: |
| started.append({"id": novel.id, "title": novel.title}) |
|
|
| return { |
| "message": f"Started scraping {len(started)} novel(s)", |
| "started": started, |
| "max_concurrent": browser_manager.semaphore._value, |
| } |
|
|
|
|
| @router.post("/scrape/stop/{novel_id}") |
| async def stop_one_scraping(novel_id: int): |
| """Stop scraping a specific novel.""" |
| stopped = await stop_scraping_novel(novel_id) |
| if stopped: |
| return {"message": f"Novel {novel_id} scraping stopped"} |
| else: |
| raise HTTPException( |
| status_code=404, |
| detail=f"Novel {novel_id} is not actively scraping", |
| ) |
|
|
|
|
| @router.post("/scrape/stop-all") |
| async def stop_all(): |
| """Stop all active scraping tasks.""" |
| await stop_all_scraping() |
| return {"message": "All scraping tasks stopped"} |
|
|
|
|
| @router.get("/status", response_model=StatusResponse) |
| async def get_live_status(): |
| """ |
| Get real-time status of all scraping activities. |
| This endpoint is polled by the frontend every few seconds. |
| """ |
| return StatusResponse( |
| active_browsers=browser_manager.active_count, |
| max_browsers=settings.MAX_CONCURRENT_BROWSERS, |
| browser_initialized=browser_manager.is_initialized, |
| novels=scraper_status, |
| active_task_ids=get_active_task_ids(), |
| ) |
|
|
|
|
| |
| from app.config import settings |