Spaces:

lenson78
/

Scrapling

Paused

App Files Files Community

Karim shoair commited on Jul 29, 2025

Commit

916182a

1 Parent(s): a2a8556

style: A lot of type hints correction

Browse files

Since we are using Py3.10 as minimum version now, we remove Union when possible

Files changed (17) hide show

scrapling/core/_html_utils.py +2 -2
scrapling/core/_types.py +0 -1
scrapling/core/ai.py +22 -23
scrapling/core/custom_types.py +31 -40
scrapling/core/shell.py +2 -3
scrapling/core/storage.py +7 -7
scrapling/engines/_browsers/_camoufox.py +11 -12
scrapling/engines/_browsers/_controllers.py +7 -8
scrapling/engines/_browsers/_page.py +3 -3
scrapling/engines/_browsers/_validators.py +7 -8
scrapling/engines/static.py +25 -26
scrapling/engines/toolbelt/__init__.py +0 -1
scrapling/engines/toolbelt/custom.py +1 -51
scrapling/engines/toolbelt/fingerprints.py +2 -2
scrapling/engines/toolbelt/navigation.py +3 -3
scrapling/fetchers.py +16 -17
scrapling/parser.py +39 -45

scrapling/core/_html_utils.py CHANGED Viewed

@@ -6,7 +6,7 @@ Repo source code: https://github.com/scrapy/w3lib/blob/master/w3lib/html.py
 from re import compile as _re_compile, IGNORECASE
-from scrapling.core._types import Iterable, Union, Match, StrOrBytes
 _ent_re = _re_compile(
     r"&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)",
@@ -270,7 +270,7 @@ name2codepoint = {
 def to_unicode(
-    text: StrOrBytes, encoding: Union[str, None] = None, errors: str = "strict"
 ) -> str:
     """Return the Unicode representation of a bytes object `text`. If `text`
     is already a Unicode object, return it as-is."""

 from re import compile as _re_compile, IGNORECASE
+from scrapling.core._types import Iterable, Optional, Match, StrOrBytes
 _ent_re = _re_compile(
     r"&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)",
 def to_unicode(
+    text: StrOrBytes, encoding: Optional[str] = None, errors: str = "strict"
 ) -> str:
     """Return the Unicode representation of a bytes object `text`. If `text`
     is already a Unicode object, return it as-is."""

scrapling/core/_types.py CHANGED Viewed

@@ -16,7 +16,6 @@ from typing import (
     Optional,
     Pattern,
     Tuple,
-    Type,
     TypeVar,
     Union,
     Match,

     Optional,
     Pattern,
     Tuple,
     TypeVar,
     Union,
     Match,

scrapling/core/ai.py CHANGED Viewed

@@ -17,7 +17,6 @@ from scrapling.core._types import (
     Optional,
     Tuple,
     extraction_types,
-    Union,
     Mapping,
     Dict,
     List,
@@ -61,10 +60,10 @@ class ScraplingMCPServer:
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = None,
-        cookies: Optional[Union[Dict[str, str], list[tuple[str, str]]]] = None,
-        timeout: Optional[Union[int, float]] = 30,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         retries: Optional[int] = 3,
@@ -140,10 +139,10 @@ class ScraplingMCPServer:
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = None,
-        cookies: Optional[Union[Dict[str, str], list[tuple[str, str]]]] = None,
-        timeout: Optional[Union[int, float]] = 30,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         retries: Optional[int] = 3,
@@ -232,13 +231,13 @@ class ScraplingMCPServer:
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
@@ -321,13 +320,13 @@ class ScraplingMCPServer:
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
@@ -409,23 +408,23 @@ class ScraplingMCPServer:
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
@@ -509,23 +508,23 @@ class ScraplingMCPServer:
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,

     Optional,
     Tuple,
     extraction_types,
     Mapping,
     Dict,
     List,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
+        params: Optional[Dict | List | Tuple] = None,
         headers: Optional[Mapping[str, Optional[str]]] = None,
+        cookies: Optional[Dict[str, str] | list[tuple[str, str]]] = None,
+        timeout: Optional[int | float] = 30,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         retries: Optional[int] = 3,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
+        params: Optional[Dict | List | Tuple] = None,
         headers: Optional[Mapping[str, Optional[str]]] = None,
+        cookies: Optional[Dict[str, str] | list[tuple[str, str]]] = None,
+        timeout: Optional[int | float] = 30,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         retries: Optional[int] = 3,
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
         extraction_type: extraction_types = "markdown",
         css_selector: Optional[str] = None,
         main_content_only: bool = True,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         wait_selector_state: SelectorWaitStates = "attached",
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,

scrapling/core/custom_types.py CHANGED Viewed

@@ -8,7 +8,6 @@ from scrapling.core._types import (
     cast,
     Dict,
     List,
-    Union,
     overload,
     TypeVar,
     Literal,
@@ -34,7 +33,7 @@ class TextHandler(str):
     def __new__(cls, string):
         return super().__new__(cls, str(string))
-    def __getitem__(self, key: Union[SupportsIndex, slice]) -> "TextHandler":
         lst = super().__getitem__(key)
         return cast(_TextHandlerType, TextHandler(lst))
@@ -46,78 +45,72 @@ class TextHandler(str):
             )
         )
-    def strip(self, chars: str = None) -> Union[str, "TextHandler"]:
         return TextHandler(super().strip(chars))
-    def lstrip(self, chars: str = None) -> Union[str, "TextHandler"]:
         return TextHandler(super().lstrip(chars))
-    def rstrip(self, chars: str = None) -> Union[str, "TextHandler"]:
         return TextHandler(super().rstrip(chars))
-    def capitalize(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().capitalize())
-    def casefold(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().casefold())
-    def center(
-        self, width: SupportsIndex, fillchar: str = " "
-    ) -> Union[str, "TextHandler"]:
         return TextHandler(super().center(width, fillchar))
-    def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:
         return TextHandler(super().expandtabs(tabsize))
-    def format(self, *args: str, **kwargs: str) -> Union[str, "TextHandler"]:
         return TextHandler(super().format(*args, **kwargs))
-    def format_map(self, mapping) -> Union[str, "TextHandler"]:
         return TextHandler(super().format_map(mapping))
-    def join(self, iterable: Iterable[str]) -> Union[str, "TextHandler"]:
         return TextHandler(super().join(iterable))
-    def ljust(
-        self, width: SupportsIndex, fillchar: str = " "
-    ) -> Union[str, "TextHandler"]:
         return TextHandler(super().ljust(width, fillchar))
-    def rjust(
-        self, width: SupportsIndex, fillchar: str = " "
-    ) -> Union[str, "TextHandler"]:
         return TextHandler(super().rjust(width, fillchar))
-    def swapcase(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().swapcase())
-    def title(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().title())
-    def translate(self, table) -> Union[str, "TextHandler"]:
         return TextHandler(super().translate(table))
-    def zfill(self, width: SupportsIndex) -> Union[str, "TextHandler"]:
         return TextHandler(super().zfill(width))
     def replace(
         self, old: str, new: str, count: SupportsIndex = -1
-    ) -> Union[str, "TextHandler"]:
         return TextHandler(super().replace(old, new, count))
-    def upper(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().upper())
-    def lower(self) -> Union[str, "TextHandler"]:
         return TextHandler(super().lower())
     ##############
-    def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]:
         """Return a sorted version of the string"""
         return self.__class__("".join(sorted(self, reverse=reverse)))
-    def clean(self) -> Union[str, "TextHandler"]:
         """Return a new version of the string after removing all white spaces and consecutive spaces"""
         data = self.translate(__CLEANING_TABLE__)
         return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
@@ -141,7 +134,7 @@ class TextHandler(str):
     @overload
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         check_match: Literal[True],
         replace_entities: bool = True,
         clean_match: bool = False,
@@ -151,7 +144,7 @@ class TextHandler(str):
     @overload
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -160,12 +153,12 @@ class TextHandler(str):
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
         check_match: bool = False,
-    ) -> Union["TextHandlers[TextHandler]", bool]:
         """Apply the given regex to the current text and return a list of strings with the matches.
         :param regex: Can be either a compiled regular expression or a string.
@@ -205,7 +198,7 @@ class TextHandler(str):
     def re_first(
         self,
-        regex: Union[str, Pattern[str]],
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
@@ -244,9 +237,7 @@ class TextHandlers(List[TextHandler]):
     def __getitem__(self, pos: slice) -> "TextHandlers":
         pass
-    def __getitem__(
-        self, pos: Union[SupportsIndex, slice]
-    ) -> Union[TextHandler, "TextHandlers"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             lst = [TextHandler(s) for s in lst]
@@ -255,7 +246,7 @@ class TextHandlers(List[TextHandler]):
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -275,7 +266,7 @@ class TextHandlers(List[TextHandler]):
     def re_first(
         self,
-        regex: Union[str, Pattern[str]],
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
@@ -339,7 +330,7 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
     def get(
         self, key: str, default: Optional[str] = None
-    ) -> Union[_TextHandlerType, None]:
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)

     cast,
     Dict,
     List,
     overload,
     TypeVar,
     Literal,
     def __new__(cls, string):
         return super().__new__(cls, str(string))
+    def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":
         lst = super().__getitem__(key)
         return cast(_TextHandlerType, TextHandler(lst))
             )
         )
+    def strip(self, chars: str = None) -> str | "TextHandler":
         return TextHandler(super().strip(chars))
+    def lstrip(self, chars: str = None) -> str | "TextHandler":
         return TextHandler(super().lstrip(chars))
+    def rstrip(self, chars: str = None) -> str | "TextHandler":
         return TextHandler(super().rstrip(chars))
+    def capitalize(self) -> str | "TextHandler":
         return TextHandler(super().capitalize())
+    def casefold(self) -> str | "TextHandler":
         return TextHandler(super().casefold())
+    def center(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
         return TextHandler(super().center(width, fillchar))
+    def expandtabs(self, tabsize: SupportsIndex = 8) -> str | "TextHandler":
         return TextHandler(super().expandtabs(tabsize))
+    def format(self, *args: str, **kwargs: str) -> str | "TextHandler":
         return TextHandler(super().format(*args, **kwargs))
+    def format_map(self, mapping) -> str | "TextHandler":
         return TextHandler(super().format_map(mapping))
+    def join(self, iterable: Iterable[str]) -> str | "TextHandler":
         return TextHandler(super().join(iterable))
+    def ljust(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
         return TextHandler(super().ljust(width, fillchar))
+    def rjust(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
         return TextHandler(super().rjust(width, fillchar))
+    def swapcase(self) -> str | "TextHandler":
         return TextHandler(super().swapcase())
+    def title(self) -> str | "TextHandler":
         return TextHandler(super().title())
+    def translate(self, table) -> str | "TextHandler":
         return TextHandler(super().translate(table))
+    def zfill(self, width: SupportsIndex) -> str | "TextHandler":
         return TextHandler(super().zfill(width))
     def replace(
         self, old: str, new: str, count: SupportsIndex = -1
+    ) -> str | "TextHandler":
         return TextHandler(super().replace(old, new, count))
+    def upper(self) -> str | "TextHandler":
         return TextHandler(super().upper())
+    def lower(self) -> str | "TextHandler":
         return TextHandler(super().lower())
     ##############
+    def sort(self, reverse: bool = False) -> str | "TextHandler":
         """Return a sorted version of the string"""
         return self.__class__("".join(sorted(self, reverse=reverse)))
+    def clean(self) -> str | "TextHandler":
         """Return a new version of the string after removing all white spaces and consecutive spaces"""
         data = self.translate(__CLEANING_TABLE__)
         return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
     @overload
     def re(
         self,
+        regex: str | Pattern,
         check_match: Literal[True],
         replace_entities: bool = True,
         clean_match: bool = False,
     @overload
     def re(
         self,
+        regex: str | Pattern,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
     def re(
         self,
+        regex: str | Pattern,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
         check_match: bool = False,
+    ) -> "TextHandlers" | bool:
         """Apply the given regex to the current text and return a list of strings with the matches.
         :param regex: Can be either a compiled regular expression or a string.
     def re_first(
         self,
+        regex: str | Pattern,
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
     def __getitem__(self, pos: slice) -> "TextHandlers":
         pass
+    def __getitem__(self, pos: SupportsIndex | slice) -> TextHandler | "TextHandlers":
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             lst = [TextHandler(s) for s in lst]
     def re(
         self,
+        regex: str | Pattern,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
     def re_first(
         self,
+        regex: str | Pattern,
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
     def get(
         self, key: str, default: Optional[str] = None
+    ) -> Optional[_TextHandlerType]:
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)

scrapling/core/shell.py CHANGED Viewed

@@ -33,7 +33,6 @@ from scrapling.core._types import (
     Dict,
     Tuple,
     Any,
-    Union,
     extraction_types,
     Generator,
 )
@@ -254,7 +253,7 @@ class CurlParser:
         # --- Process Data Payload ---
         params = dict()
-        data_payload: Union[str, bytes, Dict, None] = None
         json_payload: Optional[Any] = None
         # DevTools often uses --data-raw for JSON bodies
@@ -358,7 +357,7 @@ class CurlParser:
             follow_redirects=True,  # Scrapling default is True
         )
-    def convert2fetcher(self, curl_command: Union[Request, str]) -> Optional[Response]:
         if isinstance(curl_command, (Request, str)):
             request = (
                 self.parse(curl_command)

     Dict,
     Tuple,
     Any,
     extraction_types,
     Generator,
 )
         # --- Process Data Payload ---
         params = dict()
+        data_payload: Optional[str | bytes | Dict] = None
         json_payload: Optional[Any] = None
         # DevTools often uses --data-raw for JSON bodies
             follow_redirects=True,  # Scrapling default is True
         )
+    def convert2fetcher(self, curl_command: Request | str) -> Optional[Response]:
         if isinstance(curl_command, (Request, str)):
             request = (
                 self.parse(curl_command)

scrapling/core/storage.py CHANGED Viewed

@@ -1,20 +1,20 @@
-from sqlite3 import connect as db_connect
-from threading import RLock
-from abc import ABC, abstractmethod
 from hashlib import sha256
 from functools import lru_cache
-from lxml.html import HtmlElement
 from orjson import dumps, loads
 from tldextract import extract as tld
 from scrapling.core.utils import _StorageTools, log
-from scrapling.core._types import Dict, Optional, Union, Any
 class StorageSystemMixin(ABC):
     # If you want to make your own storage system, you have to inherit from this
-    def __init__(self, url: Union[str, None] = None):
         """
         :param url: URL of the website we are working on to separate it from other websites data
         """
@@ -74,7 +74,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
     Mainly built, so the library can run in threaded frameworks like scrapy or threaded tools
     > It's optimized for threaded applications, but running it without threads shouldn't make it slow."""
-    def __init__(self, storage_file: str, url: Union[str, None] = None):
         """
         :param storage_file: File to be used to store elements' data.
         :param url: URL of the website we are working on to separate it from other websites data

 from hashlib import sha256
+from threading import RLock
 from functools import lru_cache
+from abc import ABC, abstractmethod
+from sqlite3 import connect as db_connect
 from orjson import dumps, loads
+from lxml.html import HtmlElement
 from tldextract import extract as tld
 from scrapling.core.utils import _StorageTools, log
+from scrapling.core._types import Dict, Optional, Any
 class StorageSystemMixin(ABC):
     # If you want to make your own storage system, you have to inherit from this
+    def __init__(self, url: Optional[str] = None):
         """
         :param url: URL of the website we are working on to separate it from other websites data
         """
     Mainly built, so the library can run in threaded frameworks like scrapy or threaded tools
     > It's optimized for threaded applications, but running it without threads shouldn't make it slow."""
+    def __init__(self, storage_file: str, url: Optional[str] = None):
         """
         :param storage_file: File to be used to store elements' data.
         :param url: URL of the website we are working on to separate it from other websites data

scrapling/engines/_browsers/_camoufox.py CHANGED Viewed

@@ -26,10 +26,9 @@ from ._page import PageInfo, PagePool
 from ._validators import validate, CamoufoxConfig
 from scrapling.core._types import (
     Dict,
     Optional,
-    Union,
     Callable,
-    List,
     SelectorWaitStates,
 )
 from scrapling.engines.toolbelt import (
@@ -84,16 +83,16 @@ class StealthySession:
     def __init__(
         self,
         max_pages: int = 1,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
@@ -101,7 +100,7 @@ class StealthySession:
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
@@ -461,16 +460,16 @@ class AsyncStealthySession(StealthySession):
     def __init__(
         self,
         max_pages: int = 1,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
@@ -478,7 +477,7 @@ class AsyncStealthySession(StealthySession):
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,

 from ._validators import validate, CamoufoxConfig
 from scrapling.core._types import (
     Dict,
+    List,
     Optional,
     Callable,
     SelectorWaitStates,
 )
 from scrapling.engines.toolbelt import (
     def __init__(
         self,
         max_pages: int = 1,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
     def __init__(
         self,
         max_pages: int = 1,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,

scrapling/engines/_browsers/_controllers.py CHANGED Viewed

@@ -28,9 +28,8 @@ from ._validators import validate, PlaywrightConfig
 from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
 from scrapling.core._types import (
     Dict,
-    Optional,
-    Union,
     List,
     Callable,
     SelectorWaitStates,
 )
@@ -87,14 +86,14 @@ class DynamicSession:
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
         page_action: Optional[Callable] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
@@ -404,14 +403,14 @@ class AsyncDynamicSession(DynamicSession):
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
         page_action: Optional[Callable] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,

 from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
 from scrapling.core._types import (
     Dict,
     List,
+    Optional,
     Callable,
     SelectorWaitStates,
 )
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
         page_action: Optional[Callable] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
         page_action: Optional[Callable] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[List[Dict]] = None,

scrapling/engines/_browsers/_page.py CHANGED Viewed

@@ -4,7 +4,7 @@ from dataclasses import dataclass
 from playwright.sync_api import Page as SyncPage
 from playwright.async_api import Page as AsyncPage
-from scrapling.core._types import Optional, Union, List, Literal
 PageState = Literal["ready", "busy", "error"]  # States that a page can be in
@@ -14,7 +14,7 @@ class PageInfo:
     """Information about the page and its current state"""
     __slots__ = ("page", "state", "url")
-    page: Union[SyncPage, AsyncPage]
     state: PageState
     url: Optional[str]
@@ -52,7 +52,7 @@ class PagePool:
         self.pages: List[PageInfo] = []
         self._lock = RLock()
-    def add_page(self, page: Union[SyncPage, AsyncPage]) -> PageInfo:
         """Add a new page to the pool"""
         with self._lock:
             if len(self.pages) >= self.max_pages:

 from playwright.sync_api import Page as SyncPage
 from playwright.async_api import Page as AsyncPage
+from scrapling.core._types import Optional, List, Literal
 PageState = Literal["ready", "busy", "error"]  # States that a page can be in
     """Information about the page and its current state"""
     __slots__ = ("page", "state", "url")
+    page: SyncPage | AsyncPage
     state: PageState
     url: Optional[str]
         self.pages: List[PageInfo] = []
         self._lock = RLock()
+    def add_page(self, page: SyncPage | AsyncPage) -> PageInfo:
         """Add a new page to the pool"""
         with self._lock:
             if len(self.pages) >= self.max_pages:

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -4,7 +4,6 @@ from pathlib import Path
 from scrapling.core._types import (
     Optional,
-    Union,
     Dict,
     Callable,
     List,
@@ -24,15 +23,15 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
     disable_webgl: bool = False
     real_chrome: bool = False
     stealth: bool = False
-    wait: Union[int, float] = 0
     page_action: Optional[Callable] = None
-    proxy: Optional[Union[str, Dict[str, str]]] = (
         None  # The default value for proxy in Playwright's source is `None`
     )
     locale: str = "en-US"
     extra_headers: Optional[Dict[str, str]] = None
     useragent: Optional[str] = None
-    timeout: Union[int, float] = 30000
     disable_resources: bool = False
     wait_selector: Optional[str] = None
     cookies: Optional[List[Dict]] = None
@@ -87,10 +86,10 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     block_webrtc: bool = False
     allow_webgl: bool = True
     network_idle: bool = False
-    humanize: Union[bool, float] = True
     solve_cloudflare: bool = False
-    wait: Union[int, float] = 0
-    timeout: Union[int, float] = 30000
     page_action: Optional[Callable] = None
     wait_selector: Optional[str] = None
     addons: Optional[List[str]] = None
@@ -98,7 +97,7 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     cookies: Optional[List[Dict]] = None
     google_search: bool = True
     extra_headers: Optional[Dict[str, str]] = None
-    proxy: Optional[Union[str, Dict[str, str]]] = (
         None  # The default value for proxy in Playwright's source is `None`
     )
     os_randomize: bool = False

 from scrapling.core._types import (
     Optional,
     Dict,
     Callable,
     List,
     disable_webgl: bool = False
     real_chrome: bool = False
     stealth: bool = False
+    wait: int | float = 0
     page_action: Optional[Callable] = None
+    proxy: Optional[str | Dict[str, str]] = (
         None  # The default value for proxy in Playwright's source is `None`
     )
     locale: str = "en-US"
     extra_headers: Optional[Dict[str, str]] = None
     useragent: Optional[str] = None
+    timeout: int | float = 30000
     disable_resources: bool = False
     wait_selector: Optional[str] = None
     cookies: Optional[List[Dict]] = None
     block_webrtc: bool = False
     allow_webgl: bool = True
     network_idle: bool = False
+    humanize: bool | float = True
     solve_cloudflare: bool = False
+    wait: int | float = 0
+    timeout: int | float = 30000
     page_action: Optional[Callable] = None
     wait_selector: Optional[str] = None
     addons: Optional[List[str]] = None
     cookies: Optional[List[Dict]] = None
     google_search: bool = True
     extra_headers: Optional[Dict[str, str]] = None
+    proxy: Optional[str | Dict[str, str]] = (
         None  # The default value for proxy in Playwright's source is `None`
     )
     os_randomize: bool = False

scrapling/engines/static.py CHANGED Viewed

@@ -17,7 +17,6 @@ from scrapling.core._types import (
     Dict,
     Optional,
     Tuple,
-    Union,
     Mapping,
     SUPPORTED_HTTP_METHODS,
     Awaitable,
@@ -55,14 +54,14 @@ class FetcherSession:
         proxies: Optional[Dict[str, str]] = None,
         proxy: Optional[str] = None,
         proxy_auth: Optional[Tuple[str, str]] = None,
-        timeout: Optional[Union[int, float]] = 30,
         headers: Optional[Dict[str, str]] = None,
         retries: Optional[int] = 3,
         retry_delay: Optional[int] = 1,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         verify: bool = True,
-        cert: Optional[Union[str, Tuple[str, str]]] = None,
         selector_config: Optional[Dict] = None,
     ):
         """
@@ -357,7 +356,7 @@ class FetcherSession:
         method: SUPPORTED_HTTP_METHODS,
         stealth: Optional[bool] = None,
         **kwargs,
-    ) -> Union[Response, Awaitable[Response]]:
         """
         Internal dispatcher. Prepares arguments and calls sync or async request helper.
@@ -390,10 +389,10 @@ class FetcherSession:
     def get(
         self,
         url: str,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
         cookies: Optional[CookieTypes] = None,
-        timeout: Optional[Union[int, float]] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
@@ -403,12 +402,12 @@ class FetcherSession:
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
-        cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
-    ) -> Union[Response, Awaitable[Response]]:
         """
         Perform a GET request.
@@ -461,12 +460,12 @@ class FetcherSession:
     def post(
         self,
         url: str,
-        data: Optional[Union[Dict, str]] = None,
-        json: Optional[Union[Dict, List]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         cookies: Optional[CookieTypes] = None,
-        timeout: Optional[Union[int, float]] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
@@ -476,12 +475,12 @@ class FetcherSession:
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
-        cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
-    ) -> Union[Response, Awaitable[Response]]:
         """
         Perform a POST request.
@@ -538,12 +537,12 @@ class FetcherSession:
     def put(
         self,
         url: str,
-        data: Optional[Union[Dict, str]] = None,
-        json: Optional[Union[Dict, List]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         cookies: Optional[CookieTypes] = None,
-        timeout: Optional[Union[int, float]] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
@@ -553,12 +552,12 @@ class FetcherSession:
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
-        cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
-    ) -> Union[Response, Awaitable[Response]]:
         """
         Perform a PUT request.
@@ -615,12 +614,12 @@ class FetcherSession:
     def delete(
         self,
         url: str,
-        data: Optional[Union[Dict, str]] = None,
-        json: Optional[Union[Dict, List]] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
-        params: Optional[Union[Dict, List, Tuple]] = None,
         cookies: Optional[CookieTypes] = None,
-        timeout: Optional[Union[int, float]] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
@@ -630,12 +629,12 @@ class FetcherSession:
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
-        cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
-    ) -> Union[Response, Awaitable[Response]]:
         """
         Perform a DELETE request.

     Dict,
     Optional,
     Tuple,
     Mapping,
     SUPPORTED_HTTP_METHODS,
     Awaitable,
         proxies: Optional[Dict[str, str]] = None,
         proxy: Optional[str] = None,
         proxy_auth: Optional[Tuple[str, str]] = None,
+        timeout: Optional[int | float] = 30,
         headers: Optional[Dict[str, str]] = None,
         retries: Optional[int] = 3,
         retry_delay: Optional[int] = 1,
         follow_redirects: bool = True,
         max_redirects: int = 30,
         verify: bool = True,
+        cert: Optional[str | Tuple[str, str]] = None,
         selector_config: Optional[Dict] = None,
     ):
         """
         method: SUPPORTED_HTTP_METHODS,
         stealth: Optional[bool] = None,
         **kwargs,
+    ) -> Response | Awaitable[Response]:
         """
         Internal dispatcher. Prepares arguments and calls sync or async request helper.
     def get(
         self,
         url: str,
+        params: Optional[Dict | List | Tuple] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
         cookies: Optional[CookieTypes] = None,
+        timeout: Optional[int | float] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
+        cert: Optional[str | Tuple[str, str]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
+    ) -> Response | Awaitable[Response]:
         """
         Perform a GET request.
     def post(
         self,
         url: str,
+        data: Optional[Dict | str] = None,
+        json: Optional[Dict | List] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
+        params: Optional[Dict | List | Tuple] = None,
         cookies: Optional[CookieTypes] = None,
+        timeout: Optional[int | float] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
+        cert: Optional[str | Tuple[str, str]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
+    ) -> Response | Awaitable[Response]:
         """
         Perform a POST request.
     def put(
         self,
         url: str,
+        data: Optional[Dict | str] = None,
+        json: Optional[Dict | List] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
+        params: Optional[Dict | List | Tuple] = None,
         cookies: Optional[CookieTypes] = None,
+        timeout: Optional[int | float] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
+        cert: Optional[str | Tuple[str, str]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
+    ) -> Response | Awaitable[Response]:
         """
         Perform a PUT request.
     def delete(
         self,
         url: str,
+        data: Optional[Dict | str] = None,
+        json: Optional[Dict | List] = None,
         headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
+        params: Optional[Dict | List | Tuple] = None,
         cookies: Optional[CookieTypes] = None,
+        timeout: Optional[int | float] = _UNSET,
         follow_redirects: Optional[bool] = _UNSET,
         max_redirects: Optional[int] = _UNSET,
         retries: Optional[int] = _UNSET,
         proxy_auth: Optional[Tuple[str, str]] = _UNSET,
         auth: Optional[Tuple[str, str]] = None,
         verify: Optional[bool] = _UNSET,
+        cert: Optional[str | Tuple[str, str]] = _UNSET,
         impersonate: Optional[BrowserTypeLiteral] = _UNSET,
         http3: Optional[bool] = _UNSET,
         stealthy_headers: Optional[bool] = _UNSET,
         **kwargs,
+    ) -> Response | Awaitable[Response]:
         """
         Perform a DELETE request.

scrapling/engines/toolbelt/__init__.py CHANGED Viewed

@@ -2,7 +2,6 @@ from .custom import (
     BaseFetcher,
     Response,
     StatusText,
-    check_type_validity,
     get_variable_name,
 )
 from .fingerprints import (

     BaseFetcher,
     Response,
     StatusText,
     get_variable_name,
 )
 from .fingerprints import (

scrapling/engines/toolbelt/custom.py CHANGED Viewed

@@ -10,8 +10,6 @@ from scrapling.core._types import (
     List,
     Optional,
     Tuple,
-    Type,
-    Union,
 )
 from scrapling.core.custom_types import MappingProxyType
 from scrapling.core.utils import log, lru_cache
@@ -106,7 +104,7 @@ class Response(Selector):
         content: str | bytes,
         status: int,
         reason: str,
-        cookies: Union[Tuple[Dict[str, str], ...], Dict[str, str]],
         headers: Dict,
         request_headers: Dict,
         encoding: str = "utf-8",
@@ -318,51 +316,3 @@ def get_variable_name(var: Any) -> Optional[str]:
             if value is var:
                 return name
     return None
-def check_type_validity(
-    variable: Any,
-    valid_types: Union[List[Type], None],
-    default_value: Any = None,
-    critical: bool = False,
-    param_name: Optional[str] = None,
-) -> Any:
-    """Check if a variable matches the specified type constraints.
-    :param variable: The variable to check
-    :param valid_types: List of valid types for the variable
-    :param default_value: Value to return if type check fails
-    :param critical: If True, raises TypeError instead of logging error
-    :param param_name: Optional parameter name for error messages
-    :return: The original variable if valid, default_value if invalid
-    :raise TypeError: If critical=True and type check fails
-    """
-    # Use provided param_name or try to get it automatically
-    var_name = param_name or get_variable_name(variable) or "Unknown"
-    # Convert valid_types to a list if None
-    valid_types = valid_types or []
-    # Handle None value
-    if variable is None:
-        if type(None) in valid_types:
-            return variable
-        error_msg = f'Argument "{var_name}" cannot be None'
-        if critical:
-            raise TypeError(error_msg)
-        log.error(f"[Ignored] {error_msg}")
-        return default_value
-    # If no valid_types specified and variable has a value, return it
-    if not valid_types:
-        return variable
-    # Check if variable type matches any of the valid types
-    if not any(isinstance(variable, t) for t in valid_types):
-        type_names = [t.__name__ for t in valid_types]
-        error_msg = f'Argument "{var_name}" must be of type {" or ".join(type_names)}'
-        if critical:
-            raise TypeError(error_msg)
-        log.error(f"[Ignored] {error_msg}")
-        return default_value
-    return variable

     List,
     Optional,
     Tuple,
 )
 from scrapling.core.custom_types import MappingProxyType
 from scrapling.core.utils import log, lru_cache
         content: str | bytes,
         status: int,
         reason: str,
+        cookies: Tuple[Dict[str, str], ...] | Dict[str, str],
         headers: Dict,
         request_headers: Dict,
         encoding: str = "utf-8",
             if value is var:
                 return name
     return None

scrapling/engines/toolbelt/fingerprints.py CHANGED Viewed

@@ -7,7 +7,7 @@ from platform import system as platform_system
 from tldextract import extract
 from browserforge.headers import Browser, HeaderGenerator
-from scrapling.core._types import Dict, Union
 from scrapling.core.utils import lru_cache
 __OS_NAME__ = platform_system()
@@ -28,7 +28,7 @@ def generate_convincing_referer(url: str) -> str:
 @lru_cache(1, typed=True)
-def get_os_name() -> Union[str, None]:
     """Get the current OS name in the same format needed for browserforge
     :return: Current OS name or `None` otherwise

 from tldextract import extract
 from browserforge.headers import Browser, HeaderGenerator
+from scrapling.core._types import Dict, Optional
 from scrapling.core.utils import lru_cache
 __OS_NAME__ = platform_system()
 @lru_cache(1, typed=True)
+def get_os_name() -> Optional[str]:
     """Get the current OS name in the same format needed for browserforge
     :return: Current OS name or `None` otherwise

scrapling/engines/toolbelt/navigation.py CHANGED Viewed

@@ -11,7 +11,7 @@ from msgspec import Struct, structs, convert, ValidationError
 from playwright.sync_api import Route
 from scrapling.core.utils import log
-from scrapling.core._types import Dict, Optional, Union, Tuple
 from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
 __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
@@ -54,8 +54,8 @@ async def async_intercept_route(route: async_Route):
 def construct_proxy_dict(
-    proxy_string: Union[str, Dict[str, str]], as_tuple=False
-) -> Union[Dict, Tuple, None]:
     """Validate a proxy and return it in the acceptable format for Playwright
     Reference: https://playwright.dev/python/docs/network#http-proxy

 from playwright.sync_api import Route
 from scrapling.core.utils import log
+from scrapling.core._types import Dict, Optional, Tuple
 from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
 __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
 def construct_proxy_dict(
+    proxy_string: str | Dict[str, str], as_tuple=False
+) -> Optional[Dict | Tuple]:
     """Validate a proxy and return it in the acceptable format for Playwright
     Reference: https://playwright.dev/python/docs/network#http-proxy

scrapling/fetchers.py CHANGED Viewed

@@ -4,7 +4,6 @@ from scrapling.core._types import (
     List,
     Optional,
     SelectorWaitStates,
-    Union,
     Iterable,
 )
 from scrapling.engines import (
@@ -51,16 +50,16 @@ class StealthyFetcher(BaseFetcher):
     def fetch(
         cls,
         url: str,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
@@ -68,7 +67,7 @@ class StealthyFetcher(BaseFetcher):
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
@@ -147,16 +146,16 @@ class StealthyFetcher(BaseFetcher):
     async def async_fetch(
         cls,
         url: str,
-        headless: Union[bool] = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
-        humanize: Union[bool, float] = True,
         solve_cloudflare: bool = False,
-        wait: Union[int, float] = 0,
-        timeout: Union[int, float] = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
@@ -164,7 +163,7 @@ class StealthyFetcher(BaseFetcher):
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
@@ -267,14 +266,14 @@ class DynamicFetcher(BaseFetcher):
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
         page_action: Optional[Callable] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[Iterable[Dict]] = None,
@@ -350,14 +349,14 @@ class DynamicFetcher(BaseFetcher):
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
-        wait: Union[int, float] = 0,
         page_action: Optional[Callable] = None,
-        proxy: Optional[Union[str, Dict[str, str]]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
-        timeout: Union[int, float] = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[Iterable[Dict]] = None,

     List,
     Optional,
     SelectorWaitStates,
     Iterable,
 )
 from scrapling.engines import (
     def fetch(
         cls,
         url: str,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
     async def async_fetch(
         cls,
         url: str,
+        headless: bool = True,  # noqa: F821
         block_images: bool = False,
         disable_resources: bool = False,
         block_webrtc: bool = False,
         allow_webgl: bool = True,
         network_idle: bool = False,
+        humanize: bool | float = True,
         solve_cloudflare: bool = False,
+        wait: int | float = 0,
+        timeout: int | float = 30000,
         page_action: Optional[Callable] = None,
         wait_selector: Optional[str] = None,
         addons: Optional[List[str]] = None,
         cookies: Optional[List[Dict]] = None,
         google_search: bool = True,
         extra_headers: Optional[Dict[str, str]] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         os_randomize: bool = False,
         disable_ads: bool = False,
         geoip: bool = False,
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
         page_action: Optional[Callable] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[Iterable[Dict]] = None,
         disable_webgl: bool = False,
         real_chrome: bool = False,
         stealth: bool = False,
+        wait: int | float = 0,
         page_action: Optional[Callable] = None,
+        proxy: Optional[str | Dict[str, str]] = None,
         locale: str = "en-US",
         extra_headers: Optional[Dict[str, str]] = None,
         useragent: Optional[str] = None,
         cdp_url: Optional[str] = None,
+        timeout: int | float = 30000,
         disable_resources: bool = False,
         wait_selector: Optional[str] = None,
         cookies: Optional[Iterable[Dict]] = None,

scrapling/parser.py CHANGED Viewed

@@ -59,7 +59,7 @@ class Selector(SelectorsGeneration):
     def __init__(
         self,
-        content: Optional[Union[str, bytes]] = None,
         url: Optional[str] = None,
         encoding: str = "utf8",
         huge_tree: bool = True,
@@ -197,7 +197,7 @@ class Selector(SelectorsGeneration):
     # Node functionalities, I wanted to move to a separate Mixin class, but it had a slight impact on performance
     @staticmethod
     def _is_text_node(
-        element: Union[HtmlElement, _ElementUnicodeResult],
     ) -> bool:
         """Return True if the given element is a result of a string expression
         Examples:
@@ -209,7 +209,7 @@ class Selector(SelectorsGeneration):
     @staticmethod
     def __content_convertor(
-        element: Union[HtmlElement, _ElementUnicodeResult],
     ) -> TextHandler:
         """Used internally to convert a single element's text content to TextHandler directly without checks
@@ -235,8 +235,8 @@ class Selector(SelectorsGeneration):
         )
     def __handle_element(
-        self, element: Union[HtmlElement, _ElementUnicodeResult]
-    ) -> Union[TextHandler, "Selector", None]:
         """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
         if element is None:
             return None
@@ -247,7 +247,7 @@ class Selector(SelectorsGeneration):
             return self.__element_convertor(element)
     def __handle_elements(
-        self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
     ) -> Union["Selectors", "TextHandlers"]:
         """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
         if not len(
@@ -364,18 +364,18 @@ class Selector(SelectorsGeneration):
         return class_name in self._root.classes
     @property
-    def parent(self) -> Union["Selector", None]:
         """Return the direct parent of the element or ``None`` otherwise"""
         return self.__handle_element(self._root.getparent())
     @property
-    def below_elements(self) -> "Selectors[Selector]":
         """Return all elements under the current element in the DOM tree"""
         below = self._root.xpath(".//*")
         return self.__handle_elements(below)
     @property
-    def children(self) -> "Selectors[Selector]":
         """Return the children elements of the current element or empty list otherwise"""
         return Selectors(
             self.__element_convertor(child)
@@ -384,7 +384,7 @@ class Selector(SelectorsGeneration):
         )
     @property
-    def siblings(self) -> "Selectors[Selector]":
         """Return other children of the current element's parent or empty list otherwise"""
         if self.parent:
             return Selectors(
@@ -397,9 +397,7 @@ class Selector(SelectorsGeneration):
         for ancestor in self._root.iterancestors():
             yield self.__element_convertor(ancestor)
-    def find_ancestor(
-        self, func: Callable[["Selector"], bool]
-    ) -> Union["Selector", None]:
         """Loop over all ancestors of the element till one match the passed function
         :param func: A function that takes each ancestor as an argument and returns True/False
         :return: The first ancestor that match the function or ``None`` otherwise.
@@ -410,13 +408,13 @@ class Selector(SelectorsGeneration):
         return None
     @property
-    def path(self) -> "Selectors[Selector]":
         """Returns a list of type `Selectors` that contains the path leading to the current element from the root."""
         lst = list(self.iterancestors())
         return Selectors(lst)
     @property
-    def next(self) -> Union["Selector", None]:
         """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
         next_element = self._root.getnext()
         if next_element is not None:
@@ -427,7 +425,7 @@ class Selector(SelectorsGeneration):
         return self.__handle_element(next_element)
     @property
-    def previous(self) -> Union["Selector", None]:
         """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
         prev_element = self._root.getprevious()
         if prev_element is not None:
@@ -470,10 +468,10 @@ class Selector(SelectorsGeneration):
     # From here we start with the selecting functions
     def relocate(
         self,
-        element: Union[Dict, HtmlElement, "Selector"],
         percentage: int = 0,
         selector_type: bool = False,
-    ) -> Union[List[Union[HtmlElement, None]], "Selectors"]:
         """This function will search again for the element in the page tree, used automatically on page structure change
         :param element: The element we want to relocate in the tree
@@ -581,7 +579,7 @@ class Selector(SelectorsGeneration):
         adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
-    ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with CSS3 selectors
         **Important:
@@ -644,7 +642,7 @@ class Selector(SelectorsGeneration):
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
         """Search the current tree with XPath selectors
         **Important:
@@ -708,7 +706,7 @@ class Selector(SelectorsGeneration):
     def find_all(
         self,
-        *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
     ) -> "Selectors":
         """Find elements by filters of your creations for ease.
@@ -815,9 +813,9 @@ class Selector(SelectorsGeneration):
     def find(
         self,
-        *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
         **kwargs: str,
-    ) -> Union["Selector", None]:
         """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
@@ -924,7 +922,7 @@ class Selector(SelectorsGeneration):
         )
         return score
-    def save(self, element: Union["Selector", HtmlElement], identifier: str) -> None:
         """Saves the element's unique properties to the storage for retrieval and relocation later
         :param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
@@ -969,7 +967,7 @@ class Selector(SelectorsGeneration):
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
@@ -985,7 +983,7 @@ class Selector(SelectorsGeneration):
     def re_first(
         self,
-        regex: Union[str, Pattern[str]],
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
@@ -1004,9 +1002,7 @@ class Selector(SelectorsGeneration):
         )
     @staticmethod
-    def __get_attributes(
-        element: HtmlElement, ignore_attributes: Union[List, Tuple]
-    ) -> Dict:
         """Return attributes dictionary without the ignored list"""
         return {k: v for k, v in element.attrib.items() if k not in ignore_attributes}
@@ -1015,7 +1011,7 @@ class Selector(SelectorsGeneration):
         original: HtmlElement,
         original_attributes: Dict,
         candidate: HtmlElement,
-        ignore_attributes: Union[List, Tuple],
         similarity_threshold: float,
         match_text: bool = False,
     ) -> bool:
@@ -1055,12 +1051,12 @@ class Selector(SelectorsGeneration):
     def find_similar(
         self,
         similarity_threshold: float = 0.2,
-        ignore_attributes: Union[List, Tuple] = (
             "href",
             "src",
         ),
         match_text: bool = False,
-    ) -> Union["Selectors[Selector]", List]:
         """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
         then return the ones that match the current element attributes with a percentage higher than the input threshold.
@@ -1123,7 +1119,7 @@ class Selector(SelectorsGeneration):
         partial: bool = False,
         case_sensitive: bool = False,
         clean_match: bool = True,
-    ) -> Union["Selectors[Selector]", "Selector"]:
         """Find elements that its text content fully/partially matches input.
         :param text: Text query to match
         :param first_match: Returns the first element that matches conditions, enabled by default
@@ -1165,11 +1161,11 @@ class Selector(SelectorsGeneration):
     def find_by_regex(
         self,
-        query: Union[str, Pattern[str]],
         first_match: bool = True,
         case_sensitive: bool = False,
         clean_match: bool = True,
-    ) -> Union["Selectors[Selector]", "Selector"]:
         """Find elements that its text content matches the input regex pattern.
         :param query: Regex query/pattern to match
         :param first_match: Return the first element that matches conditions; enabled by default.
@@ -1216,9 +1212,7 @@ class Selectors(List[Selector]):
     def __getitem__(self, pos: slice) -> "Selectors":
         pass
-    def __getitem__(
-        self, pos: Union[SupportsIndex, slice]
-    ) -> Union[Selector, "Selectors"]:
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             return self.__class__(lst)
@@ -1232,7 +1226,7 @@ class Selectors(List[Selector]):
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
-    ) -> "Selectors[Selector]":
         """
         Call the ``.xpath()`` method for each element in this list and return
         their results as another `Selectors` class.
@@ -1267,7 +1261,7 @@ class Selectors(List[Selector]):
         identifier: str = "",
         auto_save: bool = False,
         percentage: int = 0,
-    ) -> "Selectors[Selector]":
         """
         Call the ``.css()`` method for each element in this list and return
         their results flattened as another `Selectors` class.
@@ -1294,11 +1288,11 @@ class Selectors(List[Selector]):
     def re(
         self,
-        regex: Union[str, Pattern[str]],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
-    ) -> TextHandlers[TextHandler]:
         """Call the ``.re()`` method for each element in this list and return
         their results flattened as List of TextHandler.
@@ -1315,7 +1309,7 @@ class Selectors(List[Selector]):
     def re_first(
         self,
-        regex: Union[str, Pattern[str]],
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
@@ -1335,7 +1329,7 @@ class Selectors(List[Selector]):
                 return result
         return default
-    def search(self, func: Callable[["Selector"], bool]) -> Union["Selector", None]:
         """Loop over all current elements and return the first element that matches the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The first element that match the function or ``None`` otherwise.
@@ -1345,7 +1339,7 @@ class Selectors(List[Selector]):
                 return element
         return None
-    def filter(self, func: Callable[["Selector"], bool]) -> "Selectors[Selector]":
         """Filter current elements based on the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The new `Selectors` object or empty list otherwise.

     def __init__(
         self,
+        content: Optional[str | bytes] = None,
         url: Optional[str] = None,
         encoding: str = "utf8",
         huge_tree: bool = True,
     # Node functionalities, I wanted to move to a separate Mixin class, but it had a slight impact on performance
     @staticmethod
     def _is_text_node(
+        element: HtmlElement | _ElementUnicodeResult,
     ) -> bool:
         """Return True if the given element is a result of a string expression
         Examples:
     @staticmethod
     def __content_convertor(
+        element: HtmlElement | _ElementUnicodeResult,
     ) -> TextHandler:
         """Used internally to convert a single element's text content to TextHandler directly without checks
         )
     def __handle_element(
+        self, element: HtmlElement | _ElementUnicodeResult
+    ) -> Optional[TextHandler | "Selector"]:
         """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
         if element is None:
             return None
             return self.__element_convertor(element)
     def __handle_elements(
+        self, result: List[HtmlElement | _ElementUnicodeResult]
     ) -> Union["Selectors", "TextHandlers"]:
         """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
         if not len(
         return class_name in self._root.classes
     @property
+    def parent(self) -> Optional["Selector"]:
         """Return the direct parent of the element or ``None`` otherwise"""
         return self.__handle_element(self._root.getparent())
     @property
+    def below_elements(self) -> "Selectors":
         """Return all elements under the current element in the DOM tree"""
         below = self._root.xpath(".//*")
         return self.__handle_elements(below)
     @property
+    def children(self) -> "Selectors":
         """Return the children elements of the current element or empty list otherwise"""
         return Selectors(
             self.__element_convertor(child)
         )
     @property
+    def siblings(self) -> "Selectors":
         """Return other children of the current element's parent or empty list otherwise"""
         if self.parent:
             return Selectors(
         for ancestor in self._root.iterancestors():
             yield self.__element_convertor(ancestor)
+    def find_ancestor(self, func: Callable[["Selector"], bool]) -> Optional["Selector"]:
         """Loop over all ancestors of the element till one match the passed function
         :param func: A function that takes each ancestor as an argument and returns True/False
         :return: The first ancestor that match the function or ``None`` otherwise.
         return None
     @property
+    def path(self) -> "Selectors":
         """Returns a list of type `Selectors` that contains the path leading to the current element from the root."""
         lst = list(self.iterancestors())
         return Selectors(lst)
     @property
+    def next(self) -> Optional["Selector"]:
         """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
         next_element = self._root.getnext()
         if next_element is not None:
         return self.__handle_element(next_element)
     @property
+    def previous(self) -> Optional["Selector"]:
         """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
         prev_element = self._root.getprevious()
         if prev_element is not None:
     # From here we start with the selecting functions
     def relocate(
         self,
+        element: Dict | HtmlElement | "Selector",
         percentage: int = 0,
         selector_type: bool = False,
+    ) -> List[HtmlElement] | "Selectors":
         """This function will search again for the element in the page tree, used automatically on page structure change
         :param element: The element we want to relocate in the tree
         adaptive: bool = False,
         auto_save: bool = False,
         percentage: int = 0,
+    ) -> "Selectors" | List | "TextHandlers":
         """Search the current tree with CSS3 selectors
         **Important:
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
+    ) -> "Selectors" | List | "TextHandlers":
         """Search the current tree with XPath selectors
         **Important:
     def find_all(
         self,
+        *args: str | Iterable[str] | Pattern | Callable | Dict[str, str],
         **kwargs: str,
     ) -> "Selectors":
         """Find elements by filters of your creations for ease.
     def find(
         self,
+        *args: str | Iterable[str] | Pattern | Callable | Dict[str, str],
         **kwargs: str,
+    ) -> Optional["Selector"]:
         """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
         :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
         )
         return score
+    def save(self, element: "Selector" | HtmlElement, identifier: str) -> None:
         """Saves the element's unique properties to the storage for retrieval and relocation later
         :param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
     def re(
         self,
+        regex: str | Pattern[str],
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
     def re_first(
         self,
+        regex: str | Pattern[str],
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
         )
     @staticmethod
+    def __get_attributes(element: HtmlElement, ignore_attributes: List | Tuple) -> Dict:
         """Return attributes dictionary without the ignored list"""
         return {k: v for k, v in element.attrib.items() if k not in ignore_attributes}
         original: HtmlElement,
         original_attributes: Dict,
         candidate: HtmlElement,
+        ignore_attributes: List | Tuple,
         similarity_threshold: float,
         match_text: bool = False,
     ) -> bool:
     def find_similar(
         self,
         similarity_threshold: float = 0.2,
+        ignore_attributes: List | Tuple = (
             "href",
             "src",
         ),
         match_text: bool = False,
+    ) -> "Selectors" | List:
         """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
         then return the ones that match the current element attributes with a percentage higher than the input threshold.
         partial: bool = False,
         case_sensitive: bool = False,
         clean_match: bool = True,
+    ) -> Union["Selectors", "Selector"]:
         """Find elements that its text content fully/partially matches input.
         :param text: Text query to match
         :param first_match: Returns the first element that matches conditions, enabled by default
     def find_by_regex(
         self,
+        query: str | Pattern[str],
         first_match: bool = True,
         case_sensitive: bool = False,
         clean_match: bool = True,
+    ) -> Union["Selectors", "Selector"]:
         """Find elements that its text content matches the input regex pattern.
         :param query: Regex query/pattern to match
         :param first_match: Return the first element that matches conditions; enabled by default.
     def __getitem__(self, pos: slice) -> "Selectors":
         pass
+    def __getitem__(self, pos: SupportsIndex | slice) -> Selector | "Selectors":
         lst = super().__getitem__(pos)
         if isinstance(pos, slice):
             return self.__class__(lst)
         auto_save: bool = False,
         percentage: int = 0,
         **kwargs: Any,
+    ) -> "Selectors":
         """
         Call the ``.xpath()`` method for each element in this list and return
         their results as another `Selectors` class.
         identifier: str = "",
         auto_save: bool = False,
         percentage: int = 0,
+    ) -> "Selectors":
         """
         Call the ``.css()`` method for each element in this list and return
         their results flattened as another `Selectors` class.
     def re(
         self,
+        regex: str | Pattern,
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
+    ) -> TextHandlers:
         """Call the ``.re()`` method for each element in this list and return
         their results flattened as List of TextHandler.
     def re_first(
         self,
+        regex: str | Pattern,
         default=None,
         replace_entities: bool = True,
         clean_match: bool = False,
                 return result
         return default
+    def search(self, func: Callable[["Selector"], bool]) -> Optional["Selector"]:
         """Loop over all current elements and return the first element that matches the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The first element that match the function or ``None`` otherwise.
                 return element
         return None
+    def filter(self, func: Callable[["Selector"], bool]) -> "Selectors":
         """Filter current elements based on the passed function
         :param func: A function that takes each element as an argument and returns True/False
         :return: The new `Selectors` object or empty list otherwise.