Spaces:
Paused
Paused
File size: 1,540 Bytes
eb37804 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | """
Base Scraper Module
Defines the abstract base class for all scrapers in the CyberScraper-2077 project.
All scraper implementations should inherit from BaseScraper and implement its abstract methods.
"""
from abc import ABC, abstractmethod
class BaseScraper(ABC):
"""
Abstract base class for all scraper implementations.
This class defines the common interface that all scrapers must implement.
Supports async context manager protocol for proper resource cleanup.
"""
@abstractmethod
async def fetch_content(self, url: str, proxy: str | None = None) -> str:
"""
Fetch content from a given URL.
Args:
url: The URL to fetch content from
proxy: Proxy server to use for the request
Returns:
The raw content fetched from the URL
"""
pass
@abstractmethod
async def extract(self, content: str) -> dict:
"""
Extract structured data from raw content.
Args:
content: Raw content to extract data from
Returns:
Structured data extracted from the content
"""
pass
async def close(self) -> None:
"""Clean up resources. Override in subclasses that need cleanup."""
pass
async def __aenter__(self):
"""Async context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit with cleanup."""
await self.close()
return False
|