Spaces:
Running
Running
| from __future__ import annotations | |
| import time | |
| from typing import Optional | |
| from fastapi import APIRouter, Depends | |
| from app.api.deps import require_auth | |
| from app.core.logger import get_logger | |
| from app.models.schemas import ( | |
| ScrapeHealthResponse, | |
| ScrapeRequest, | |
| ScrapeResponse, | |
| ) | |
| from app.services.scraper_service import ScraperService | |
| router = APIRouter() | |
| _logger = get_logger(__name__) | |
| def get_scraper_service() -> ScraperService: | |
| return ScraperService() | |
| async def scrape_health( | |
| token: str = Depends(require_auth), | |
| svc: ScraperService = Depends(get_scraper_service), | |
| ) -> ScrapeHealthResponse: | |
| _logger.info("Scrape health check") | |
| result = await svc.health() | |
| return ScrapeHealthResponse( | |
| success=True, | |
| framework="Scrapling", | |
| version=result.get("version", "unknown"), | |
| fetchers_available=result.get("available", []), | |
| ) | |
| async def scrape_extract( | |
| body: ScrapeRequest, | |
| token: str = Depends(require_auth), | |
| svc: ScraperService = Depends(get_scraper_service), | |
| ) -> ScrapeResponse: | |
| _logger.info( | |
| "Scrape extract: url=%s fetcher=%s rules=%d", | |
| body.url, body.fetcher_type, len(body.rules), | |
| ) | |
| start = time.perf_counter() | |
| rules_dict = [r.model_dump() for r in body.rules] | |
| data = await svc.extract( | |
| url=body.url, | |
| fetcher_type=body.fetcher_type.value, | |
| rules=rules_dict, | |
| proxy=body.proxy, | |
| network_idle=body.network_idle, | |
| ) | |
| elapsed_ms = round((time.perf_counter() - start) * 1000, 2) | |
| error = data.pop("error", None) | |
| return ScrapeResponse( | |
| success=error is None, | |
| time_ms=elapsed_ms, | |
| url=body.url, | |
| data=data, | |
| fetcher=body.fetcher_type.value, | |
| error=error, | |
| ) | |