Spaces:

lenson78
/

Scrapling

Paused

App Files Files Community

Karim shoair commited on Jul 29, 2025

Commit

1d98b51

1 Parent(s): 9ac26c8

style: General type hints fixes and imports optimizing

Browse files

Files changed (7) hide show

ruff.toml +1 -1
scrapling/core/_types.py +1 -0
scrapling/core/custom_types.py +8 -5
scrapling/core/storage.py +3 -3
scrapling/core/utils.py +6 -6
scrapling/engines/_browsers/_validators.py +1 -1
scrapling/parser.py +13 -13

ruff.toml CHANGED Viewed

@@ -15,7 +15,7 @@ target-version = "py39"
 [lint]
 select = ["E", "F", "W"]
-ignore = ["E501", "F401"]
 [format]
 # Like Black, use double quotes for strings.

 [lint]
 select = ["E", "F", "W"]
+ignore = ["E501", "F401", "F811"]
 [format]
 # Like Black, use double quotes for strings.

scrapling/core/_types.py CHANGED Viewed

@@ -4,6 +4,7 @@ Type definitions for type checking purposes.
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
     Dict,

 from typing import (
     TYPE_CHECKING,
+    overload,
     Any,
     Callable,
     Dict,

scrapling/core/custom_types.py CHANGED Viewed

@@ -7,14 +7,15 @@ from orjson import dumps, loads
 from scrapling.core._types import (
     Dict,
-    Iterable,
     List,
     Literal,
-    Optional,
     Pattern,
     SupportsIndex,
-    TypeVar,
-    Union,
 )
 from scrapling.core.utils import _is_iterable, flatten
 from scrapling.core._html_utils import _replace_entities
@@ -341,7 +342,9 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)
-    def search_values(self, keyword, partial=False):
         """Search current attributes by values and return a dictionary of each matching item
         :param keyword: The keyword to search for in the attribute values
         :param partial: If True, the function will search if keyword in each value instead of perfect match

 from scrapling.core._types import (
     Dict,
     List,
+    Union,
+    TypeVar,
     Literal,
     Pattern,
+    Iterable,
+    Optional,
+    Generator,
     SupportsIndex,
 )
 from scrapling.core.utils import _is_iterable, flatten
 from scrapling.core._html_utils import _replace_entities
         """Acts like the standard dictionary `.get()` method"""
         return self._data.get(key, default)
+    def search_values(
+        self, keyword: str, partial: bool = False
+    ) -> Generator["AttributesHandler", None, None]:
         """Search current attributes by values and return a dictionary of each matching item
         :param keyword: The keyword to search for in the attribute values
         :param partial: If True, the function will search if keyword in each value instead of perfect match

scrapling/core/storage.py CHANGED Viewed

@@ -9,7 +9,7 @@ from orjson import dumps, loads
 from tldextract import extract as tld
 from scrapling.core.utils import _StorageTools, log
-from scrapling.core._types import Dict, Optional, Union
 class StorageSystemMixin(ABC):
@@ -106,7 +106,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
         """)
         self.connection.commit()
-    def save(self, element: HtmlElement, identifier: str):
         """Saves the elements unique properties to the storage for retrieval and relocation later
         :param element: The element itself which we want to save to storage.
@@ -126,7 +126,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
             self.cursor.fetchall()
             self.connection.commit()
-    def retrieve(self, identifier: str) -> Optional[Dict]:
         """Using the identifier, we search the storage and return the unique properties of the element
         :param identifier: This is the identifier that will be used to retrieve the element from the storage. See

 from tldextract import extract as tld
 from scrapling.core.utils import _StorageTools, log
+from scrapling.core._types import Dict, Optional, Union, Any
 class StorageSystemMixin(ABC):
         """)
         self.connection.commit()
+    def save(self, element: HtmlElement, identifier: str) -> None:
         """Saves the elements unique properties to the storage for retrieval and relocation later
         :param element: The element itself which we want to save to storage.
             self.cursor.fetchall()
             self.connection.commit()
+    def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]:
         """Using the identifier, we search the storage and return the unique properties of the element
         :param identifier: This is the identifier that will be used to retrieve the element from the storage. See

scrapling/core/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ from itertools import chain
 import orjson
 from lxml import html
-from scrapling.core._types import Any, Dict, Iterable, Union
 # Using cache on top of a class is a brilliant way to achieve a Singleton design pattern without much code
 from functools import lru_cache  # isort:skip
@@ -41,8 +41,8 @@ def setup_logger():
 log = setup_logger()
-def is_jsonable(content: Union[bytes, str]) -> bool:
-    if type(content) is bytes:
         content = content.decode()
     try:
@@ -52,14 +52,14 @@ def is_jsonable(content: Union[bytes, str]) -> bool:
         return False
-def flatten(lst: Iterable):
     return list(chain.from_iterable(lst))
-def _is_iterable(s: Any):
     # This will be used only in regex functions to make sure it's iterable but not string/bytes
     return isinstance(
-        s,
         (
             list,
             tuple,

 import orjson
 from lxml import html
+from scrapling.core._types import Any, Dict, Iterable, Union, List
 # Using cache on top of a class is a brilliant way to achieve a Singleton design pattern without much code
 from functools import lru_cache  # isort:skip
 log = setup_logger()
+def is_jsonable(content: bytes | str) -> bool:
+    if isinstance(content, bytes):
         content = content.decode()
     try:
         return False
+def flatten(lst: Iterable[Any]) -> List[Any]:
     return list(chain.from_iterable(lst))
+def _is_iterable(obj: Any) -> bool:
     # This will be used only in regex functions to make sure it's iterable but not string/bytes
     return isinstance(
+        obj,
         (
             list,
             tuple,

scrapling/engines/_browsers/_validators.py CHANGED Viewed

@@ -82,7 +82,7 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
     """Configuration struct for validation"""
     max_pages: int = 1
-    headless: Union[bool] = True  # noqa: F821
     block_images: bool = False
     disable_resources: bool = False
     block_webrtc: bool = False

     """Configuration struct for validation"""
     max_pages: int = 1
+    headless: bool = True  # noqa: F821
     block_images: bool = False
     disable_resources: bool = False
     block_webrtc: bool = False

scrapling/parser.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import inspect
 import os
 import re
-import typing
 from difflib import SequenceMatcher
 from urllib.parse import urljoin
@@ -18,16 +17,17 @@ from lxml.etree import (
 from scrapling.core._types import (
     Any,
-    Callable,
     Dict,
-    Generator,
-    Iterable,
     List,
-    Optional,
-    Pattern,
-    SupportsIndex,
     Tuple,
     Union,
 )
 from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
 from scrapling.core.mixins import SelectorsGeneration
@@ -248,7 +248,7 @@ class Selector(SelectorsGeneration):
     def __handle_elements(
         self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
-    ) -> Union["Selectors", "TextHandlers", List]:
         """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
         if not len(
             result
@@ -761,7 +761,7 @@ class Selector(SelectorsGeneration):
                 patterns.add(arg)
             elif callable(arg):
-                if len(inspect.signature(arg).parameters) > 0:
                     functions.append(arg)
                 else:
                     raise TypeError(
@@ -914,7 +914,7 @@ class Selector(SelectorsGeneration):
         return round((score / checks) * 100, 2)
     @staticmethod
-    def __calculate_dict_diff(dict1: dict, dict2: dict) -> float:
         """Used internally to calculate similarity between two dictionaries as SequenceMatcher doesn't accept dictionaries"""
         score = (
             SequenceMatcher(None, tuple(dict1.keys()), tuple(dict2.keys())).ratio()
@@ -1210,11 +1210,11 @@ class Selectors(List[Selector]):
     __slots__ = ()
-    @typing.overload
     def __getitem__(self, pos: SupportsIndex) -> Selector:
         pass
-    @typing.overload
     def __getitem__(self, pos: slice) -> "Selectors":
         pass

 import os
 import re
+from inspect import signature
 from difflib import SequenceMatcher
 from urllib.parse import urljoin
 from scrapling.core._types import (
     Any,
     Dict,
     List,
     Tuple,
     Union,
+    Pattern,
+    Callable,
+    Optional,
+    Iterable,
+    overload,
+    Generator,
+    SupportsIndex,
 )
 from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
 from scrapling.core.mixins import SelectorsGeneration
     def __handle_elements(
         self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
+    ) -> Union["Selectors", "TextHandlers"]:
         """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
         if not len(
             result
                 patterns.add(arg)
             elif callable(arg):
+                if len(signature(arg).parameters) > 0:
                     functions.append(arg)
                 else:
                     raise TypeError(
         return round((score / checks) * 100, 2)
     @staticmethod
+    def __calculate_dict_diff(dict1: Dict, dict2: Dict) -> float:
         """Used internally to calculate similarity between two dictionaries as SequenceMatcher doesn't accept dictionaries"""
         score = (
             SequenceMatcher(None, tuple(dict1.keys()), tuple(dict2.keys())).ratio()
     __slots__ = ()
+    @overload
     def __getitem__(self, pos: SupportsIndex) -> Selector:
         pass
+    @overload
     def __getitem__(self, pos: slice) -> "Selectors":
         pass