Karim shoair commited on
Commit
1d98b51
·
1 Parent(s): 9ac26c8

style: General type hints fixes and imports optimizing

Browse files
ruff.toml CHANGED
@@ -15,7 +15,7 @@ target-version = "py39"
15
 
16
  [lint]
17
  select = ["E", "F", "W"]
18
- ignore = ["E501", "F401"]
19
 
20
  [format]
21
  # Like Black, use double quotes for strings.
 
15
 
16
  [lint]
17
  select = ["E", "F", "W"]
18
+ ignore = ["E501", "F401", "F811"]
19
 
20
  [format]
21
  # Like Black, use double quotes for strings.
scrapling/core/_types.py CHANGED
@@ -4,6 +4,7 @@ Type definitions for type checking purposes.
4
 
5
  from typing import (
6
  TYPE_CHECKING,
 
7
  Any,
8
  Callable,
9
  Dict,
 
4
 
5
  from typing import (
6
  TYPE_CHECKING,
7
+ overload,
8
  Any,
9
  Callable,
10
  Dict,
scrapling/core/custom_types.py CHANGED
@@ -7,14 +7,15 @@ from orjson import dumps, loads
7
 
8
  from scrapling.core._types import (
9
  Dict,
10
- Iterable,
11
  List,
 
 
12
  Literal,
13
- Optional,
14
  Pattern,
 
 
 
15
  SupportsIndex,
16
- TypeVar,
17
- Union,
18
  )
19
  from scrapling.core.utils import _is_iterable, flatten
20
  from scrapling.core._html_utils import _replace_entities
@@ -341,7 +342,9 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
341
  """Acts like the standard dictionary `.get()` method"""
342
  return self._data.get(key, default)
343
 
344
- def search_values(self, keyword, partial=False):
 
 
345
  """Search current attributes by values and return a dictionary of each matching item
346
  :param keyword: The keyword to search for in the attribute values
347
  :param partial: If True, the function will search if keyword in each value instead of perfect match
 
7
 
8
  from scrapling.core._types import (
9
  Dict,
 
10
  List,
11
+ Union,
12
+ TypeVar,
13
  Literal,
 
14
  Pattern,
15
+ Iterable,
16
+ Optional,
17
+ Generator,
18
  SupportsIndex,
 
 
19
  )
20
  from scrapling.core.utils import _is_iterable, flatten
21
  from scrapling.core._html_utils import _replace_entities
 
342
  """Acts like the standard dictionary `.get()` method"""
343
  return self._data.get(key, default)
344
 
345
+ def search_values(
346
+ self, keyword: str, partial: bool = False
347
+ ) -> Generator["AttributesHandler", None, None]:
348
  """Search current attributes by values and return a dictionary of each matching item
349
  :param keyword: The keyword to search for in the attribute values
350
  :param partial: If True, the function will search if keyword in each value instead of perfect match
scrapling/core/storage.py CHANGED
@@ -9,7 +9,7 @@ from orjson import dumps, loads
9
  from tldextract import extract as tld
10
 
11
  from scrapling.core.utils import _StorageTools, log
12
- from scrapling.core._types import Dict, Optional, Union
13
 
14
 
15
  class StorageSystemMixin(ABC):
@@ -106,7 +106,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
106
  """)
107
  self.connection.commit()
108
 
109
- def save(self, element: HtmlElement, identifier: str):
110
  """Saves the elements unique properties to the storage for retrieval and relocation later
111
 
112
  :param element: The element itself which we want to save to storage.
@@ -126,7 +126,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
126
  self.cursor.fetchall()
127
  self.connection.commit()
128
 
129
- def retrieve(self, identifier: str) -> Optional[Dict]:
130
  """Using the identifier, we search the storage and return the unique properties of the element
131
 
132
  :param identifier: This is the identifier that will be used to retrieve the element from the storage. See
 
9
  from tldextract import extract as tld
10
 
11
  from scrapling.core.utils import _StorageTools, log
12
+ from scrapling.core._types import Dict, Optional, Union, Any
13
 
14
 
15
  class StorageSystemMixin(ABC):
 
106
  """)
107
  self.connection.commit()
108
 
109
+ def save(self, element: HtmlElement, identifier: str) -> None:
110
  """Saves the elements unique properties to the storage for retrieval and relocation later
111
 
112
  :param element: The element itself which we want to save to storage.
 
126
  self.cursor.fetchall()
127
  self.connection.commit()
128
 
129
+ def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]:
130
  """Using the identifier, we search the storage and return the unique properties of the element
131
 
132
  :param identifier: This is the identifier that will be used to retrieve the element from the storage. See
scrapling/core/utils.py CHANGED
@@ -5,7 +5,7 @@ from itertools import chain
5
  import orjson
6
  from lxml import html
7
 
8
- from scrapling.core._types import Any, Dict, Iterable, Union
9
 
10
  # Using cache on top of a class is a brilliant way to achieve a Singleton design pattern without much code
11
  from functools import lru_cache # isort:skip
@@ -41,8 +41,8 @@ def setup_logger():
41
  log = setup_logger()
42
 
43
 
44
- def is_jsonable(content: Union[bytes, str]) -> bool:
45
- if type(content) is bytes:
46
  content = content.decode()
47
 
48
  try:
@@ -52,14 +52,14 @@ def is_jsonable(content: Union[bytes, str]) -> bool:
52
  return False
53
 
54
 
55
- def flatten(lst: Iterable):
56
  return list(chain.from_iterable(lst))
57
 
58
 
59
- def _is_iterable(s: Any):
60
  # This will be used only in regex functions to make sure it's iterable but not string/bytes
61
  return isinstance(
62
- s,
63
  (
64
  list,
65
  tuple,
 
5
  import orjson
6
  from lxml import html
7
 
8
+ from scrapling.core._types import Any, Dict, Iterable, Union, List
9
 
10
  # Using cache on top of a class is a brilliant way to achieve a Singleton design pattern without much code
11
  from functools import lru_cache # isort:skip
 
41
  log = setup_logger()
42
 
43
 
44
+ def is_jsonable(content: bytes | str) -> bool:
45
+ if isinstance(content, bytes):
46
  content = content.decode()
47
 
48
  try:
 
52
  return False
53
 
54
 
55
+ def flatten(lst: Iterable[Any]) -> List[Any]:
56
  return list(chain.from_iterable(lst))
57
 
58
 
59
+ def _is_iterable(obj: Any) -> bool:
60
  # This will be used only in regex functions to make sure it's iterable but not string/bytes
61
  return isinstance(
62
+ obj,
63
  (
64
  list,
65
  tuple,
scrapling/engines/_browsers/_validators.py CHANGED
@@ -82,7 +82,7 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
82
  """Configuration struct for validation"""
83
 
84
  max_pages: int = 1
85
- headless: Union[bool] = True # noqa: F821
86
  block_images: bool = False
87
  disable_resources: bool = False
88
  block_webrtc: bool = False
 
82
  """Configuration struct for validation"""
83
 
84
  max_pages: int = 1
85
+ headless: bool = True # noqa: F821
86
  block_images: bool = False
87
  disable_resources: bool = False
88
  block_webrtc: bool = False
scrapling/parser.py CHANGED
@@ -1,7 +1,6 @@
1
- import inspect
2
  import os
3
  import re
4
- import typing
5
  from difflib import SequenceMatcher
6
  from urllib.parse import urljoin
7
 
@@ -18,16 +17,17 @@ from lxml.etree import (
18
 
19
  from scrapling.core._types import (
20
  Any,
21
- Callable,
22
  Dict,
23
- Generator,
24
- Iterable,
25
  List,
26
- Optional,
27
- Pattern,
28
- SupportsIndex,
29
  Tuple,
30
  Union,
 
 
 
 
 
 
 
31
  )
32
  from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
33
  from scrapling.core.mixins import SelectorsGeneration
@@ -248,7 +248,7 @@ class Selector(SelectorsGeneration):
248
 
249
  def __handle_elements(
250
  self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
251
- ) -> Union["Selectors", "TextHandlers", List]:
252
  """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
253
  if not len(
254
  result
@@ -761,7 +761,7 @@ class Selector(SelectorsGeneration):
761
  patterns.add(arg)
762
 
763
  elif callable(arg):
764
- if len(inspect.signature(arg).parameters) > 0:
765
  functions.append(arg)
766
  else:
767
  raise TypeError(
@@ -914,7 +914,7 @@ class Selector(SelectorsGeneration):
914
  return round((score / checks) * 100, 2)
915
 
916
  @staticmethod
917
- def __calculate_dict_diff(dict1: dict, dict2: dict) -> float:
918
  """Used internally to calculate similarity between two dictionaries as SequenceMatcher doesn't accept dictionaries"""
919
  score = (
920
  SequenceMatcher(None, tuple(dict1.keys()), tuple(dict2.keys())).ratio()
@@ -1210,11 +1210,11 @@ class Selectors(List[Selector]):
1210
 
1211
  __slots__ = ()
1212
 
1213
- @typing.overload
1214
  def __getitem__(self, pos: SupportsIndex) -> Selector:
1215
  pass
1216
 
1217
- @typing.overload
1218
  def __getitem__(self, pos: slice) -> "Selectors":
1219
  pass
1220
 
 
 
1
  import os
2
  import re
3
+ from inspect import signature
4
  from difflib import SequenceMatcher
5
  from urllib.parse import urljoin
6
 
 
17
 
18
  from scrapling.core._types import (
19
  Any,
 
20
  Dict,
 
 
21
  List,
 
 
 
22
  Tuple,
23
  Union,
24
+ Pattern,
25
+ Callable,
26
+ Optional,
27
+ Iterable,
28
+ overload,
29
+ Generator,
30
+ SupportsIndex,
31
  )
32
  from scrapling.core.custom_types import AttributesHandler, TextHandler, TextHandlers
33
  from scrapling.core.mixins import SelectorsGeneration
 
248
 
249
  def __handle_elements(
250
  self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
251
+ ) -> Union["Selectors", "TextHandlers"]:
252
  """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
253
  if not len(
254
  result
 
761
  patterns.add(arg)
762
 
763
  elif callable(arg):
764
+ if len(signature(arg).parameters) > 0:
765
  functions.append(arg)
766
  else:
767
  raise TypeError(
 
914
  return round((score / checks) * 100, 2)
915
 
916
  @staticmethod
917
+ def __calculate_dict_diff(dict1: Dict, dict2: Dict) -> float:
918
  """Used internally to calculate similarity between two dictionaries as SequenceMatcher doesn't accept dictionaries"""
919
  score = (
920
  SequenceMatcher(None, tuple(dict1.keys()), tuple(dict2.keys())).ratio()
 
1210
 
1211
  __slots__ = ()
1212
 
1213
+ @overload
1214
  def __getitem__(self, pos: SupportsIndex) -> Selector:
1215
  pass
1216
 
1217
+ @overload
1218
  def __getitem__(self, pos: slice) -> "Selectors":
1219
  pass
1220