File size: 1,540 Bytes
eb37804
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Base Scraper Module

Defines the abstract base class for all scrapers in the CyberScraper-2077 project.
All scraper implementations should inherit from BaseScraper and implement its abstract methods.
"""

from abc import ABC, abstractmethod


class BaseScraper(ABC):
    """
    Abstract base class for all scraper implementations.

    This class defines the common interface that all scrapers must implement.
    Supports async context manager protocol for proper resource cleanup.
    """

    @abstractmethod
    async def fetch_content(self, url: str, proxy: str | None = None) -> str:
        """
        Fetch content from a given URL.

        Args:
            url: The URL to fetch content from
            proxy: Proxy server to use for the request

        Returns:
            The raw content fetched from the URL
        """
        pass

    @abstractmethod
    async def extract(self, content: str) -> dict:
        """
        Extract structured data from raw content.

        Args:
            content: Raw content to extract data from

        Returns:
            Structured data extracted from the content
        """
        pass

    async def close(self) -> None:
        """Clean up resources. Override in subclasses that need cleanup."""
        pass

    async def __aenter__(self):
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit with cleanup."""
        await self.close()
        return False