| from abc import ABC |
| from random import choice |
| from time import sleep as time_sleep |
| from asyncio import sleep as asyncio_sleep |
|
|
| from curl_cffi.curl import CurlError |
| from curl_cffi import CurlHttpVersion |
| from curl_cffi.requests import ( |
| BrowserTypeLiteral, |
| Session as CurlSession, |
| AsyncSession as AsyncCurlSession, |
| ) |
|
|
| from scrapling.core.utils import log |
| from scrapling.core._types import ( |
| Any, |
| Dict, |
| Tuple, |
| Unpack, |
| Optional, |
| Awaitable, |
| SUPPORTED_HTTP_METHODS, |
| ) |
|
|
| from .toolbelt.custom import Response |
| from .toolbelt.convertor import ResponseFactory |
| from .toolbelt.proxy_rotation import ProxyRotator, is_proxy_error |
| from ._browsers._types import RequestsSession, GetRequestParams, DataRequestParams, ImpersonateType |
| from .toolbelt.fingerprints import generate_headers, __default_useragent__ |
|
|
| _NO_SESSION: Any = object() |
|
|
|
|
| def _select_random_browser(impersonate: ImpersonateType) -> Optional[BrowserTypeLiteral]: |
| """ |
| Handle browser selection logic for the ` impersonate ` parameter. |
| |
| If impersonate is a list, randomly select one browser from it. |
| If it's a string or None, return as is. |
| """ |
| if isinstance(impersonate, list): |
| if not impersonate: |
| return None |
| return choice(impersonate) |
| return impersonate |
|
|
|
|
| class _ConfigurationLogic(ABC): |
| |
| __slots__ = ( |
| "_default_impersonate", |
| "_stealth", |
| "_default_proxies", |
| "_default_proxy", |
| "_default_proxy_auth", |
| "_default_timeout", |
| "_default_headers", |
| "_default_retries", |
| "_default_retry_delay", |
| "_default_follow_redirects", |
| "_default_max_redirects", |
| "_default_verify", |
| "_default_cert", |
| "_default_http3", |
| "selector_config", |
| "_is_alive", |
| "_proxy_rotator", |
| ) |
|
|
| def __init__(self, **kwargs: Unpack[RequestsSession]): |
| self._default_impersonate = kwargs.get("impersonate", "chrome") |
| self._stealth = kwargs.get("stealthy_headers", True) |
| self._default_proxies = kwargs.get("proxies") or {} |
| self._default_proxy = kwargs.get("proxy") or None |
| self._default_proxy_auth = kwargs.get("proxy_auth") or None |
| self._default_timeout = kwargs.get("timeout", 30) |
| self._default_headers = kwargs.get("headers") or {} |
| self._default_retries = kwargs.get("retries", 3) |
| self._default_retry_delay = kwargs.get("retry_delay", 1) |
| self._default_follow_redirects = kwargs.get("follow_redirects", True) |
| self._default_max_redirects = kwargs.get("max_redirects", 30) |
| self._default_verify = kwargs.get("verify", True) |
| self._default_cert = kwargs.get("cert") or None |
| self._default_http3 = kwargs.get("http3", False) |
| self.selector_config = kwargs.get("selector_config") or {} |
| self._is_alive = False |
| self._proxy_rotator: Optional[ProxyRotator] = kwargs.get("proxy_rotator") |
|
|
| if self._proxy_rotator and (self._default_proxy or self._default_proxies): |
| raise ValueError( |
| "Cannot use 'proxy_rotator' together with 'proxy' or 'proxies'. " |
| "Use either a static proxy or proxy rotation, not both." |
| ) |
|
|
| @staticmethod |
| def _get_param(kwargs: Dict, key: str, default: Any) -> Any: |
| """Get parameter from kwargs if present, otherwise return default.""" |
| return kwargs[key] if key in kwargs else default |
|
|
| def _merge_request_args(self, **method_kwargs) -> Dict[str, Any]: |
| """Merge request-specific arguments with default session arguments.""" |
| url = method_kwargs.pop("url") |
|
|
| |
| impersonate = self._get_param(method_kwargs, "impersonate", self._default_impersonate) |
| impersonate = _select_random_browser(impersonate) |
| http3_enabled = self._get_param(method_kwargs, "http3", self._default_http3) |
| stealth = self._get_param(method_kwargs, "stealth", self._stealth) |
|
|
| final_args = { |
| "url": url, |
| |
| "headers": self._headers_job( |
| url, |
| self._get_param(method_kwargs, "headers", self._default_headers), |
| stealth, |
| bool(impersonate), |
| ), |
| "proxies": self._get_param(method_kwargs, "proxies", self._default_proxies), |
| "proxy": self._get_param(method_kwargs, "proxy", self._default_proxy), |
| "proxy_auth": self._get_param(method_kwargs, "proxy_auth", self._default_proxy_auth), |
| "timeout": self._get_param(method_kwargs, "timeout", self._default_timeout), |
| "allow_redirects": self._get_param(method_kwargs, "follow_redirects", self._default_follow_redirects), |
| "max_redirects": self._get_param(method_kwargs, "max_redirects", self._default_max_redirects), |
| "verify": self._get_param(method_kwargs, "verify", self._default_verify), |
| "cert": self._get_param(method_kwargs, "cert", self._default_cert), |
| "impersonate": impersonate, |
| } |
|
|
| |
| |
| skip_keys = { |
| "impersonate", |
| "http3", |
| "stealth", |
| "headers", |
| "proxies", |
| "proxy", |
| "proxy_auth", |
| "timeout", |
| "follow_redirects", |
| "max_redirects", |
| "verify", |
| "cert", |
| "retries", |
| "retry_delay", |
| "selector_config", |
| |
| "extra_headers", |
| "google_search", |
| } |
| for k, v in method_kwargs.items(): |
| if k not in skip_keys and v is not None: |
| final_args[k] = v |
|
|
| if http3_enabled: |
| final_args["http_version"] = CurlHttpVersion.V3ONLY |
| if impersonate: |
| log.warning( |
| "The argument `http3` might cause errors if used with `impersonate` argument, try switching it off if you encounter any curl errors." |
| ) |
|
|
| return final_args |
|
|
| def _headers_job(self, url, headers: Dict, stealth: bool, impersonate_enabled: bool) -> Dict: |
| """ |
| 1. Adds a useragent to the headers if it doesn't have one |
| 2. Generates real headers and append them to current headers |
| 3. Sets a Google referer header. |
| """ |
| |
| final_headers = {**self._default_headers, **(headers if headers else {})} |
| headers_keys = {k.lower() for k in final_headers} |
| if stealth: |
| if "referer" not in headers_keys: |
| final_headers["referer"] = "https://www.google.com/" |
|
|
| if not impersonate_enabled: |
| extra_headers = generate_headers(browser_mode=False) |
| final_headers.update( |
| {k: v for k, v in extra_headers.items() if k.lower() not in headers_keys} |
| ) |
|
|
| elif "user-agent" not in headers_keys and not impersonate_enabled: |
| final_headers["User-Agent"] = __default_useragent__ |
| log.debug(f"Can't find useragent in headers so '{final_headers['User-Agent']}' was used.") |
|
|
| return final_headers |
|
|
|
|
| class _SyncSessionLogic(_ConfigurationLogic): |
| __slots__ = ("_curl_session",) |
|
|
| def __init__(self, **kwargs: Unpack[RequestsSession]): |
| super().__init__(**kwargs) |
| self._curl_session: Optional[CurlSession] = None |
|
|
| def __enter__(self): |
| """Creates and returns a new synchronous Fetcher Session""" |
| if self._is_alive: |
| raise RuntimeError("This FetcherSession instance already has an active synchronous session.") |
|
|
| self._curl_session = CurlSession() |
| self._is_alive = True |
| return self |
|
|
| def __exit__(self, exc_type, exc_val, exc_tb): |
| """Closes the active synchronous session managed by this instance, if any.""" |
| |
| _ = ( |
| exc_type, |
| exc_val, |
| exc_tb, |
| ) |
| if self._curl_session: |
| self._curl_session.close() |
| self._curl_session = None |
|
|
| self._is_alive = False |
|
|
| def _make_request(self, method: SUPPORTED_HTTP_METHODS, stealth: Optional[bool] = None, **kwargs) -> Response: |
| """ |
| Perform an HTTP request using the configured session. |
| """ |
| stealth = self._stealth if stealth is None else stealth |
|
|
| selector_config = self._get_param(kwargs, "selector_config", self.selector_config) or self.selector_config |
| max_retries = self._get_param(kwargs, "retries", self._default_retries) |
| retry_delay = self._get_param(kwargs, "retry_delay", self._default_retry_delay) |
| static_proxy = kwargs.pop("proxy", None) |
|
|
| session = self._curl_session |
| one_off_request = False |
| if session is _NO_SESSION and self.__enter__ is None: |
| |
| |
| session = CurlSession() |
| one_off_request = True |
|
|
| if not session: |
| raise RuntimeError("No active session available.") |
|
|
| try: |
| for attempt in range(max_retries): |
| if self._proxy_rotator and static_proxy is None: |
| proxy = self._proxy_rotator.get_proxy() |
| else: |
| proxy = static_proxy |
|
|
| request_args = self._merge_request_args(stealth=stealth, proxy=proxy, **kwargs) |
| try: |
| response = session.request(method, **request_args) |
| result = ResponseFactory.from_http_request(response, selector_config, meta={"proxy": proxy}) |
| return result |
| except CurlError as e: |
| if attempt < max_retries - 1: |
| |
| |
| if is_proxy_error(e): |
| log.warning( |
| f"Proxy '{proxy}' failed (attempt {attempt + 1}) | Retrying in {retry_delay} seconds..." |
| ) |
| else: |
| log.warning(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...") |
| time_sleep(retry_delay) |
| else: |
| log.error(f"Failed after {max_retries} attempts: {e}") |
| raise |
| finally: |
| if session and one_off_request: |
| session.close() |
|
|
| raise RuntimeError("No active session available.") |
|
|
| def get(self, url: str, **kwargs: Unpack[GetRequestParams]) -> Response: |
| """ |
| Perform a GET request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.Session().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("GET", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def post(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Response: |
| """ |
| Perform a POST request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.Session().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("POST", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def put(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Response: |
| """ |
| Perform a PUT request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.Session().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("PUT", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def delete(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Response: |
| """ |
| Perform a DELETE request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.Session().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| |
| |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("DELETE", stealth=stealthy_headers, url=url, **kwargs) |
|
|
|
|
| class _ASyncSessionLogic(_ConfigurationLogic): |
| __slots__ = ("_async_curl_session",) |
|
|
| def __init__(self, **kwargs: Unpack[RequestsSession]): |
| super().__init__(**kwargs) |
| self._async_curl_session: Optional[AsyncCurlSession] = None |
|
|
| async def __aenter__(self): |
| """Creates and returns a new asynchronous Session.""" |
| if self._is_alive: |
| raise RuntimeError("This FetcherSession instance already has an active asynchronous session.") |
|
|
| self._async_curl_session = AsyncCurlSession() |
| self._is_alive = True |
| return self |
|
|
| async def __aexit__(self, exc_type, exc_val, exc_tb): |
| """Closes the active asynchronous session managed by this instance, if any.""" |
| |
| _ = ( |
| exc_type, |
| exc_val, |
| exc_tb, |
| ) |
| if self._async_curl_session: |
| await self._async_curl_session.close() |
| self._async_curl_session = None |
|
|
| self._is_alive = False |
|
|
| async def _make_request(self, method: SUPPORTED_HTTP_METHODS, stealth: Optional[bool] = None, **kwargs) -> Response: |
| """ |
| Perform an HTTP request using the configured session. |
| """ |
| stealth = self._stealth if stealth is None else stealth |
|
|
| selector_config = self._get_param(kwargs, "selector_config", self.selector_config) or self.selector_config |
| max_retries = self._get_param(kwargs, "retries", self._default_retries) |
| retry_delay = self._get_param(kwargs, "retry_delay", self._default_retry_delay) |
| static_proxy = kwargs.pop("proxy", None) |
|
|
| session = self._async_curl_session |
| one_off_request = False |
| if session is _NO_SESSION and self.__aenter__ is None: |
| |
| |
| |
| |
| session = AsyncCurlSession() |
| one_off_request = True |
|
|
| if not session: |
| raise RuntimeError("No active session available.") |
|
|
| try: |
| |
| for attempt in range(max_retries): |
| if self._proxy_rotator and static_proxy is None: |
| proxy = self._proxy_rotator.get_proxy() |
| else: |
| proxy = static_proxy |
|
|
| request_args = self._merge_request_args(stealth=stealth, proxy=proxy, **kwargs) |
| try: |
| response = await session.request(method, **request_args) |
| result = ResponseFactory.from_http_request(response, selector_config, meta={"proxy": proxy}) |
| return result |
| except CurlError as e: |
| if attempt < max_retries - 1: |
| |
| |
| if is_proxy_error(e): |
| log.warning( |
| f"Proxy '{proxy}' failed (attempt {attempt + 1}) | Retrying in {retry_delay} seconds..." |
| ) |
| else: |
| log.warning(f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds...") |
|
|
| await asyncio_sleep(retry_delay) |
| else: |
| log.error(f"Failed after {max_retries} attempts: {e}") |
| raise |
| finally: |
| if session and one_off_request: |
| await session.close() |
|
|
| raise RuntimeError("No active session available.") |
|
|
| def get(self, url: str, **kwargs: Unpack[GetRequestParams]) -> Awaitable[Response]: |
| """ |
| Perform a GET request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.AsyncSession().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("GET", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def post(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Awaitable[Response]: |
| """ |
| Perform a POST request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.AsyncSession().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("POST", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def put(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Awaitable[Response]: |
| """ |
| Perform a PUT request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.AsyncSession().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("PUT", stealth=stealthy_headers, url=url, **kwargs) |
|
|
| def delete(self, url: str, **kwargs: Unpack[DataRequestParams]) -> Awaitable[Response]: |
| """ |
| Perform a DELETE request. |
| |
| Any additional keyword arguments are passed to the `curl_cffi.requests.AsyncSession().request()` method. |
| |
| :param url: Target URL for the request. |
| :param kwargs: Additional keyword arguments including: |
| - data: Form data to include in the request body. |
| - json: A JSON serializable object to include in the body of the request. |
| - params: Query string parameters for the request. |
| - headers: Headers to include in the request. |
| - cookies: Cookies to use in the request. |
| - timeout: Number of seconds to wait before timing out. |
| - follow_redirects: Whether to follow redirects. Defaults to True. |
| - max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| - retries: Number of retry attempts. Defaults to 3. |
| - retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| - proxies: Dict of proxies to use. |
| - proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| - proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| - auth: HTTP basic auth tuple of (username, password). Only basic auth is supported. |
| - verify: Whether to verify HTTPS certificates. |
| - cert: Tuple of (cert, key) filenames for the client certificate. |
| - impersonate: Browser version to impersonate. Automatically defaults to the latest available Chrome version. |
| - http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| - stealthy_headers: If enabled (default), it creates and adds real browser headers. |
| :return: A `Response` object. |
| """ |
| |
| |
| stealthy_headers = kwargs.pop("stealthy_headers", None) |
| return self._make_request("DELETE", stealth=stealthy_headers, url=url, **kwargs) |
|
|
|
|
| class FetcherSession: |
| """ |
| A factory context manager that provides configured Fetcher sessions. |
| |
| When this manager is used in a 'with' or 'async with' block, |
| it yields a new session configured with the manager's defaults. |
| A single instance of this manager should ideally be used for one active |
| session at a time (or sequentially). Re-entering a context with the |
| same manager instance while a session is already active is disallowed. |
| """ |
|
|
| __slots__ = ( |
| "_default_impersonate", |
| "_stealth", |
| "_default_proxies", |
| "_default_proxy", |
| "_default_proxy_auth", |
| "_default_timeout", |
| "_default_headers", |
| "_default_retries", |
| "_default_retry_delay", |
| "_default_follow_redirects", |
| "_default_max_redirects", |
| "_default_verify", |
| "_default_cert", |
| "_default_http3", |
| "selector_config", |
| "_client", |
| "_is_alive", |
| "_proxy_rotator", |
| ) |
|
|
| def __init__( |
| self, |
| impersonate: ImpersonateType = "chrome", |
| http3: Optional[bool] = False, |
| stealthy_headers: Optional[bool] = True, |
| proxies: Optional[Dict[str, str]] = None, |
| proxy: Optional[str] = None, |
| proxy_auth: Optional[Tuple[str, str]] = None, |
| timeout: Optional[int | float] = 30, |
| headers: Optional[Dict[str, str]] = None, |
| retries: Optional[int] = 3, |
| retry_delay: Optional[int] = 1, |
| follow_redirects: bool = True, |
| max_redirects: int = 30, |
| verify: bool = True, |
| cert: Optional[str | Tuple[str, str]] = None, |
| selector_config: Optional[Dict] = None, |
| proxy_rotator: Optional[ProxyRotator] = None, |
| ): |
| """ |
| :param impersonate: Browser version to impersonate. Can be a single browser string or a list of browser strings for random selection. (Default: latest available Chrome version) |
| :param http3: Whether to use HTTP3. Defaults to False. It might be problematic if used it with `impersonate`. |
| :param stealthy_headers: If enabled (default), it creates and adds real browser headers. It also sets a Google referer header. |
| :param proxies: Dict of proxies to use. Format: {"http": proxy_url, "https": proxy_url}. |
| :param proxy: Proxy URL to use. Format: "http://username:password@localhost:8030". |
| Cannot be used together with the `proxies` parameter. |
| :param proxy_auth: HTTP basic auth for proxy, tuple of (username, password). |
| :param timeout: Number of seconds to wait before timing out. |
| :param headers: Headers to include in the session with every request. |
| :param retries: Number of retry attempts. Defaults to 3. |
| :param retry_delay: Number of seconds to wait between retry attempts. Defaults to 1 second. |
| :param follow_redirects: Whether to follow redirects. Defaults to True. |
| :param max_redirects: Maximum number of redirects. Default 30, use -1 for unlimited. |
| :param verify: Whether to verify HTTPS certificates. Defaults to True. |
| :param cert: Tuple of (cert, key) filenames for the client certificate. |
| :param selector_config: Arguments passed when creating the final Selector class. |
| :param proxy_rotator: A ProxyRotator instance for automatic proxy rotation. |
| """ |
| self._default_impersonate: ImpersonateType = impersonate |
| self._stealth = stealthy_headers |
| self._default_proxies = proxies or {} |
| self._default_proxy = proxy or None |
| self._default_proxy_auth = proxy_auth or None |
| self._default_timeout = timeout |
| self._default_headers = headers or {} |
| self._default_retries = retries |
| self._default_retry_delay = retry_delay |
| self._default_follow_redirects = follow_redirects |
| self._default_max_redirects = max_redirects |
| self._default_verify = verify |
| self._default_cert = cert |
| self._default_http3 = http3 |
| self.selector_config = selector_config or {} |
| self._is_alive = False |
| self._client: _SyncSessionLogic | _ASyncSessionLogic | None = None |
| self._proxy_rotator = proxy_rotator |
|
|
| def __enter__(self) -> _SyncSessionLogic: |
| """Creates and returns a new synchronous Fetcher Session""" |
| if self._client is None: |
| |
| config = {k.replace("_default_", ""): getattr(self, k) for k in self.__slots__ if k.startswith("_default")} |
| config["stealthy_headers"] = self._stealth |
| config["selector_config"] = self.selector_config |
| config["proxy_rotator"] = self._proxy_rotator |
| self._client = _SyncSessionLogic(**config) |
| self._is_alive = True |
| return self._client.__enter__() |
| raise RuntimeError("This FetcherSession instance already has an active synchronous session.") |
|
|
| def __exit__(self, exc_type, exc_val, exc_tb): |
| if self._client is not None and isinstance(self._client, _SyncSessionLogic): |
| self._client.__exit__(exc_type, exc_val, exc_tb) |
| self._client = None |
| self._is_alive = False |
| return |
| raise RuntimeError("Cannot exit invalid session") |
|
|
| async def __aenter__(self) -> _ASyncSessionLogic: |
| """Creates and returns a new asynchronous Session.""" |
| if self._client is None: |
| |
| config = {k.replace("_default_", ""): getattr(self, k) for k in self.__slots__ if k.startswith("_default")} |
| config["stealthy_headers"] = self._stealth |
| config["selector_config"] = self.selector_config |
| config["proxy_rotator"] = self._proxy_rotator |
| self._client = _ASyncSessionLogic(**config) |
| self._is_alive = True |
| return await self._client.__aenter__() |
| raise RuntimeError("This FetcherSession instance already has an active asynchronous session.") |
|
|
| async def __aexit__(self, exc_type, exc_val, exc_tb): |
| if self._client is not None and isinstance(self._client, _ASyncSessionLogic): |
| await self._client.__aexit__(exc_type, exc_val, exc_tb) |
| self._client = None |
| self._is_alive = False |
| return |
| raise RuntimeError("Cannot exit invalid session") |
|
|
|
|
| class FetcherClient(_SyncSessionLogic): |
| __slots__ = ("__enter__", "__exit__") |
|
|
| def __init__(self, **kwargs: Any) -> None: |
| super().__init__(**kwargs) |
| self.__enter__: Any = None |
| self.__exit__: Any = None |
| self._curl_session: Any = _NO_SESSION |
|
|
|
|
| class AsyncFetcherClient(_ASyncSessionLogic): |
| __slots__ = ("__aenter__", "__aexit__") |
|
|
| def __init__(self, **kwargs: Any) -> None: |
| super().__init__(**kwargs) |
| self.__aenter__: Any = None |
| self.__aexit__: Any = None |
| self._async_curl_session: Any = _NO_SESSION |
|
|