from __future__ import annotations import time from typing import Optional from fastapi import APIRouter, Depends from app.api.deps import require_auth from app.core.logger import get_logger from app.models.schemas import ( ScrapeHealthResponse, ScrapeRequest, ScrapeResponse, ) from app.services.scraper_service import ScraperService router = APIRouter() _logger = get_logger(__name__) def get_scraper_service() -> ScraperService: return ScraperService() @router.get( "/scrape/health", response_model=ScrapeHealthResponse, summary="Check Scrapling framework health and available fetchers", ) async def scrape_health( token: str = Depends(require_auth), svc: ScraperService = Depends(get_scraper_service), ) -> ScrapeHealthResponse: _logger.info("Scrape health check") result = await svc.health() return ScrapeHealthResponse( success=True, framework="Scrapling", version=result.get("version", "unknown"), fetchers_available=result.get("available", []), ) @router.post( "/scrape/extract", response_model=ScrapeResponse, summary="Extract structured data from any webpage using CSS/XPath selectors", ) async def scrape_extract( body: ScrapeRequest, token: str = Depends(require_auth), svc: ScraperService = Depends(get_scraper_service), ) -> ScrapeResponse: _logger.info( "Scrape extract: url=%s fetcher=%s rules=%d", body.url, body.fetcher_type, len(body.rules), ) start = time.perf_counter() rules_dict = [r.model_dump() for r in body.rules] data = await svc.extract( url=body.url, fetcher_type=body.fetcher_type.value, rules=rules_dict, proxy=body.proxy, network_idle=body.network_idle, ) elapsed_ms = round((time.perf_counter() - start) * 1000, 2) error = data.pop("error", None) return ScrapeResponse( success=error is None, time_ms=elapsed_ms, url=body.url, data=data, fetcher=body.fetcher_type.value, error=error, )