Karim shoair commited on
Commit
916182a
·
1 Parent(s): a2a8556

style: A lot of type hints correction

Browse files

Since we are using Py3.10 as minimum version now, we remove Union when possible

scrapling/core/_html_utils.py CHANGED
@@ -6,7 +6,7 @@ Repo source code: https://github.com/scrapy/w3lib/blob/master/w3lib/html.py
6
 
7
  from re import compile as _re_compile, IGNORECASE
8
 
9
- from scrapling.core._types import Iterable, Union, Match, StrOrBytes
10
 
11
  _ent_re = _re_compile(
12
  r"&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)",
@@ -270,7 +270,7 @@ name2codepoint = {
270
 
271
 
272
  def to_unicode(
273
- text: StrOrBytes, encoding: Union[str, None] = None, errors: str = "strict"
274
  ) -> str:
275
  """Return the Unicode representation of a bytes object `text`. If `text`
276
  is already a Unicode object, return it as-is."""
 
6
 
7
  from re import compile as _re_compile, IGNORECASE
8
 
9
+ from scrapling.core._types import Iterable, Optional, Match, StrOrBytes
10
 
11
  _ent_re = _re_compile(
12
  r"&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)",
 
270
 
271
 
272
  def to_unicode(
273
+ text: StrOrBytes, encoding: Optional[str] = None, errors: str = "strict"
274
  ) -> str:
275
  """Return the Unicode representation of a bytes object `text`. If `text`
276
  is already a Unicode object, return it as-is."""
scrapling/core/_types.py CHANGED
@@ -16,7 +16,6 @@ from typing import (
16
  Optional,
17
  Pattern,
18
  Tuple,
19
- Type,
20
  TypeVar,
21
  Union,
22
  Match,
 
16
  Optional,
17
  Pattern,
18
  Tuple,
 
19
  TypeVar,
20
  Union,
21
  Match,
scrapling/core/ai.py CHANGED
@@ -17,7 +17,6 @@ from scrapling.core._types import (
17
  Optional,
18
  Tuple,
19
  extraction_types,
20
- Union,
21
  Mapping,
22
  Dict,
23
  List,
@@ -61,10 +60,10 @@ class ScraplingMCPServer:
61
  extraction_type: extraction_types = "markdown",
62
  css_selector: Optional[str] = None,
63
  main_content_only: bool = True,
64
- params: Optional[Union[Dict, List, Tuple]] = None,
65
  headers: Optional[Mapping[str, Optional[str]]] = None,
66
- cookies: Optional[Union[Dict[str, str], list[tuple[str, str]]]] = None,
67
- timeout: Optional[Union[int, float]] = 30,
68
  follow_redirects: bool = True,
69
  max_redirects: int = 30,
70
  retries: Optional[int] = 3,
@@ -140,10 +139,10 @@ class ScraplingMCPServer:
140
  extraction_type: extraction_types = "markdown",
141
  css_selector: Optional[str] = None,
142
  main_content_only: bool = True,
143
- params: Optional[Union[Dict, List, Tuple]] = None,
144
  headers: Optional[Mapping[str, Optional[str]]] = None,
145
- cookies: Optional[Union[Dict[str, str], list[tuple[str, str]]]] = None,
146
- timeout: Optional[Union[int, float]] = 30,
147
  follow_redirects: bool = True,
148
  max_redirects: int = 30,
149
  retries: Optional[int] = 3,
@@ -232,13 +231,13 @@ class ScraplingMCPServer:
232
  disable_webgl: bool = False,
233
  real_chrome: bool = False,
234
  stealth: bool = False,
235
- wait: Union[int, float] = 0,
236
- proxy: Optional[Union[str, Dict[str, str]]] = None,
237
  locale: str = "en-US",
238
  extra_headers: Optional[Dict[str, str]] = None,
239
  useragent: Optional[str] = None,
240
  cdp_url: Optional[str] = None,
241
- timeout: Union[int, float] = 30000,
242
  disable_resources: bool = False,
243
  wait_selector: Optional[str] = None,
244
  cookies: Optional[List[Dict]] = None,
@@ -321,13 +320,13 @@ class ScraplingMCPServer:
321
  disable_webgl: bool = False,
322
  real_chrome: bool = False,
323
  stealth: bool = False,
324
- wait: Union[int, float] = 0,
325
- proxy: Optional[Union[str, Dict[str, str]]] = None,
326
  locale: str = "en-US",
327
  extra_headers: Optional[Dict[str, str]] = None,
328
  useragent: Optional[str] = None,
329
  cdp_url: Optional[str] = None,
330
- timeout: Union[int, float] = 30000,
331
  disable_resources: bool = False,
332
  wait_selector: Optional[str] = None,
333
  cookies: Optional[List[Dict]] = None,
@@ -409,23 +408,23 @@ class ScraplingMCPServer:
409
  extraction_type: extraction_types = "markdown",
410
  css_selector: Optional[str] = None,
411
  main_content_only: bool = True,
412
- headless: Union[bool] = True, # noqa: F821
413
  block_images: bool = False,
414
  disable_resources: bool = False,
415
  block_webrtc: bool = False,
416
  allow_webgl: bool = True,
417
  network_idle: bool = False,
418
- humanize: Union[bool, float] = True,
419
  solve_cloudflare: bool = False,
420
- wait: Union[int, float] = 0,
421
- timeout: Union[int, float] = 30000,
422
  wait_selector: Optional[str] = None,
423
  addons: Optional[List[str]] = None,
424
  wait_selector_state: SelectorWaitStates = "attached",
425
  cookies: Optional[List[Dict]] = None,
426
  google_search: bool = True,
427
  extra_headers: Optional[Dict[str, str]] = None,
428
- proxy: Optional[Union[str, Dict[str, str]]] = None,
429
  os_randomize: bool = False,
430
  disable_ads: bool = False,
431
  geoip: bool = False,
@@ -509,23 +508,23 @@ class ScraplingMCPServer:
509
  extraction_type: extraction_types = "markdown",
510
  css_selector: Optional[str] = None,
511
  main_content_only: bool = True,
512
- headless: Union[bool] = True, # noqa: F821
513
  block_images: bool = False,
514
  disable_resources: bool = False,
515
  block_webrtc: bool = False,
516
  allow_webgl: bool = True,
517
  network_idle: bool = False,
518
- humanize: Union[bool, float] = True,
519
  solve_cloudflare: bool = False,
520
- wait: Union[int, float] = 0,
521
- timeout: Union[int, float] = 30000,
522
  wait_selector: Optional[str] = None,
523
  addons: Optional[List[str]] = None,
524
  wait_selector_state: SelectorWaitStates = "attached",
525
  cookies: Optional[List[Dict]] = None,
526
  google_search: bool = True,
527
  extra_headers: Optional[Dict[str, str]] = None,
528
- proxy: Optional[Union[str, Dict[str, str]]] = None,
529
  os_randomize: bool = False,
530
  disable_ads: bool = False,
531
  geoip: bool = False,
 
17
  Optional,
18
  Tuple,
19
  extraction_types,
 
20
  Mapping,
21
  Dict,
22
  List,
 
60
  extraction_type: extraction_types = "markdown",
61
  css_selector: Optional[str] = None,
62
  main_content_only: bool = True,
63
+ params: Optional[Dict | List | Tuple] = None,
64
  headers: Optional[Mapping[str, Optional[str]]] = None,
65
+ cookies: Optional[Dict[str, str] | list[tuple[str, str]]] = None,
66
+ timeout: Optional[int | float] = 30,
67
  follow_redirects: bool = True,
68
  max_redirects: int = 30,
69
  retries: Optional[int] = 3,
 
139
  extraction_type: extraction_types = "markdown",
140
  css_selector: Optional[str] = None,
141
  main_content_only: bool = True,
142
+ params: Optional[Dict | List | Tuple] = None,
143
  headers: Optional[Mapping[str, Optional[str]]] = None,
144
+ cookies: Optional[Dict[str, str] | list[tuple[str, str]]] = None,
145
+ timeout: Optional[int | float] = 30,
146
  follow_redirects: bool = True,
147
  max_redirects: int = 30,
148
  retries: Optional[int] = 3,
 
231
  disable_webgl: bool = False,
232
  real_chrome: bool = False,
233
  stealth: bool = False,
234
+ wait: int | float = 0,
235
+ proxy: Optional[str | Dict[str, str]] = None,
236
  locale: str = "en-US",
237
  extra_headers: Optional[Dict[str, str]] = None,
238
  useragent: Optional[str] = None,
239
  cdp_url: Optional[str] = None,
240
+ timeout: int | float = 30000,
241
  disable_resources: bool = False,
242
  wait_selector: Optional[str] = None,
243
  cookies: Optional[List[Dict]] = None,
 
320
  disable_webgl: bool = False,
321
  real_chrome: bool = False,
322
  stealth: bool = False,
323
+ wait: int | float = 0,
324
+ proxy: Optional[str | Dict[str, str]] = None,
325
  locale: str = "en-US",
326
  extra_headers: Optional[Dict[str, str]] = None,
327
  useragent: Optional[str] = None,
328
  cdp_url: Optional[str] = None,
329
+ timeout: int | float = 30000,
330
  disable_resources: bool = False,
331
  wait_selector: Optional[str] = None,
332
  cookies: Optional[List[Dict]] = None,
 
408
  extraction_type: extraction_types = "markdown",
409
  css_selector: Optional[str] = None,
410
  main_content_only: bool = True,
411
+ headless: bool = True, # noqa: F821
412
  block_images: bool = False,
413
  disable_resources: bool = False,
414
  block_webrtc: bool = False,
415
  allow_webgl: bool = True,
416
  network_idle: bool = False,
417
+ humanize: bool | float = True,
418
  solve_cloudflare: bool = False,
419
+ wait: int | float = 0,
420
+ timeout: int | float = 30000,
421
  wait_selector: Optional[str] = None,
422
  addons: Optional[List[str]] = None,
423
  wait_selector_state: SelectorWaitStates = "attached",
424
  cookies: Optional[List[Dict]] = None,
425
  google_search: bool = True,
426
  extra_headers: Optional[Dict[str, str]] = None,
427
+ proxy: Optional[str | Dict[str, str]] = None,
428
  os_randomize: bool = False,
429
  disable_ads: bool = False,
430
  geoip: bool = False,
 
508
  extraction_type: extraction_types = "markdown",
509
  css_selector: Optional[str] = None,
510
  main_content_only: bool = True,
511
+ headless: bool = True, # noqa: F821
512
  block_images: bool = False,
513
  disable_resources: bool = False,
514
  block_webrtc: bool = False,
515
  allow_webgl: bool = True,
516
  network_idle: bool = False,
517
+ humanize: bool | float = True,
518
  solve_cloudflare: bool = False,
519
+ wait: int | float = 0,
520
+ timeout: int | float = 30000,
521
  wait_selector: Optional[str] = None,
522
  addons: Optional[List[str]] = None,
523
  wait_selector_state: SelectorWaitStates = "attached",
524
  cookies: Optional[List[Dict]] = None,
525
  google_search: bool = True,
526
  extra_headers: Optional[Dict[str, str]] = None,
527
+ proxy: Optional[str | Dict[str, str]] = None,
528
  os_randomize: bool = False,
529
  disable_ads: bool = False,
530
  geoip: bool = False,
scrapling/core/custom_types.py CHANGED
@@ -8,7 +8,6 @@ from scrapling.core._types import (
8
  cast,
9
  Dict,
10
  List,
11
- Union,
12
  overload,
13
  TypeVar,
14
  Literal,
@@ -34,7 +33,7 @@ class TextHandler(str):
34
  def __new__(cls, string):
35
  return super().__new__(cls, str(string))
36
 
37
- def __getitem__(self, key: Union[SupportsIndex, slice]) -> "TextHandler":
38
  lst = super().__getitem__(key)
39
  return cast(_TextHandlerType, TextHandler(lst))
40
 
@@ -46,78 +45,72 @@ class TextHandler(str):
46
  )
47
  )
48
 
49
- def strip(self, chars: str = None) -> Union[str, "TextHandler"]:
50
  return TextHandler(super().strip(chars))
51
 
52
- def lstrip(self, chars: str = None) -> Union[str, "TextHandler"]:
53
  return TextHandler(super().lstrip(chars))
54
 
55
- def rstrip(self, chars: str = None) -> Union[str, "TextHandler"]:
56
  return TextHandler(super().rstrip(chars))
57
 
58
- def capitalize(self) -> Union[str, "TextHandler"]:
59
  return TextHandler(super().capitalize())
60
 
61
- def casefold(self) -> Union[str, "TextHandler"]:
62
  return TextHandler(super().casefold())
63
 
64
- def center(
65
- self, width: SupportsIndex, fillchar: str = " "
66
- ) -> Union[str, "TextHandler"]:
67
  return TextHandler(super().center(width, fillchar))
68
 
69
- def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:
70
  return TextHandler(super().expandtabs(tabsize))
71
 
72
- def format(self, *args: str, **kwargs: str) -> Union[str, "TextHandler"]:
73
  return TextHandler(super().format(*args, **kwargs))
74
 
75
- def format_map(self, mapping) -> Union[str, "TextHandler"]:
76
  return TextHandler(super().format_map(mapping))
77
 
78
- def join(self, iterable: Iterable[str]) -> Union[str, "TextHandler"]:
79
  return TextHandler(super().join(iterable))
80
 
81
- def ljust(
82
- self, width: SupportsIndex, fillchar: str = " "
83
- ) -> Union[str, "TextHandler"]:
84
  return TextHandler(super().ljust(width, fillchar))
85
 
86
- def rjust(
87
- self, width: SupportsIndex, fillchar: str = " "
88
- ) -> Union[str, "TextHandler"]:
89
  return TextHandler(super().rjust(width, fillchar))
90
 
91
- def swapcase(self) -> Union[str, "TextHandler"]:
92
  return TextHandler(super().swapcase())
93
 
94
- def title(self) -> Union[str, "TextHandler"]:
95
  return TextHandler(super().title())
96
 
97
- def translate(self, table) -> Union[str, "TextHandler"]:
98
  return TextHandler(super().translate(table))
99
 
100
- def zfill(self, width: SupportsIndex) -> Union[str, "TextHandler"]:
101
  return TextHandler(super().zfill(width))
102
 
103
  def replace(
104
  self, old: str, new: str, count: SupportsIndex = -1
105
- ) -> Union[str, "TextHandler"]:
106
  return TextHandler(super().replace(old, new, count))
107
 
108
- def upper(self) -> Union[str, "TextHandler"]:
109
  return TextHandler(super().upper())
110
 
111
- def lower(self) -> Union[str, "TextHandler"]:
112
  return TextHandler(super().lower())
113
 
114
  ##############
115
 
116
- def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]:
117
  """Return a sorted version of the string"""
118
  return self.__class__("".join(sorted(self, reverse=reverse)))
119
 
120
- def clean(self) -> Union[str, "TextHandler"]:
121
  """Return a new version of the string after removing all white spaces and consecutive spaces"""
122
  data = self.translate(__CLEANING_TABLE__)
123
  return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
@@ -141,7 +134,7 @@ class TextHandler(str):
141
  @overload
142
  def re(
143
  self,
144
- regex: Union[str, Pattern[str]],
145
  check_match: Literal[True],
146
  replace_entities: bool = True,
147
  clean_match: bool = False,
@@ -151,7 +144,7 @@ class TextHandler(str):
151
  @overload
152
  def re(
153
  self,
154
- regex: Union[str, Pattern[str]],
155
  replace_entities: bool = True,
156
  clean_match: bool = False,
157
  case_sensitive: bool = True,
@@ -160,12 +153,12 @@ class TextHandler(str):
160
 
161
  def re(
162
  self,
163
- regex: Union[str, Pattern[str]],
164
  replace_entities: bool = True,
165
  clean_match: bool = False,
166
  case_sensitive: bool = True,
167
  check_match: bool = False,
168
- ) -> Union["TextHandlers[TextHandler]", bool]:
169
  """Apply the given regex to the current text and return a list of strings with the matches.
170
 
171
  :param regex: Can be either a compiled regular expression or a string.
@@ -205,7 +198,7 @@ class TextHandler(str):
205
 
206
  def re_first(
207
  self,
208
- regex: Union[str, Pattern[str]],
209
  default=None,
210
  replace_entities: bool = True,
211
  clean_match: bool = False,
@@ -244,9 +237,7 @@ class TextHandlers(List[TextHandler]):
244
  def __getitem__(self, pos: slice) -> "TextHandlers":
245
  pass
246
 
247
- def __getitem__(
248
- self, pos: Union[SupportsIndex, slice]
249
- ) -> Union[TextHandler, "TextHandlers"]:
250
  lst = super().__getitem__(pos)
251
  if isinstance(pos, slice):
252
  lst = [TextHandler(s) for s in lst]
@@ -255,7 +246,7 @@ class TextHandlers(List[TextHandler]):
255
 
256
  def re(
257
  self,
258
- regex: Union[str, Pattern[str]],
259
  replace_entities: bool = True,
260
  clean_match: bool = False,
261
  case_sensitive: bool = True,
@@ -275,7 +266,7 @@ class TextHandlers(List[TextHandler]):
275
 
276
  def re_first(
277
  self,
278
- regex: Union[str, Pattern[str]],
279
  default=None,
280
  replace_entities: bool = True,
281
  clean_match: bool = False,
@@ -339,7 +330,7 @@ class AttributesHandler(Mapping[str, _TextHandlerType]):
339
 
340
  def get(
341
  self, key: str, default: Optional[str] = None
342
- ) -> Union[_TextHandlerType, None]:
343
  """Acts like the standard dictionary `.get()` method"""
344
  return self._data.get(key, default)
345
 
 
8
  cast,
9
  Dict,
10
  List,
 
11
  overload,
12
  TypeVar,
13
  Literal,
 
33
  def __new__(cls, string):
34
  return super().__new__(cls, str(string))
35
 
36
+ def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":
37
  lst = super().__getitem__(key)
38
  return cast(_TextHandlerType, TextHandler(lst))
39
 
 
45
  )
46
  )
47
 
48
+ def strip(self, chars: str = None) -> str | "TextHandler":
49
  return TextHandler(super().strip(chars))
50
 
51
+ def lstrip(self, chars: str = None) -> str | "TextHandler":
52
  return TextHandler(super().lstrip(chars))
53
 
54
+ def rstrip(self, chars: str = None) -> str | "TextHandler":
55
  return TextHandler(super().rstrip(chars))
56
 
57
+ def capitalize(self) -> str | "TextHandler":
58
  return TextHandler(super().capitalize())
59
 
60
+ def casefold(self) -> str | "TextHandler":
61
  return TextHandler(super().casefold())
62
 
63
+ def center(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
 
 
64
  return TextHandler(super().center(width, fillchar))
65
 
66
+ def expandtabs(self, tabsize: SupportsIndex = 8) -> str | "TextHandler":
67
  return TextHandler(super().expandtabs(tabsize))
68
 
69
+ def format(self, *args: str, **kwargs: str) -> str | "TextHandler":
70
  return TextHandler(super().format(*args, **kwargs))
71
 
72
+ def format_map(self, mapping) -> str | "TextHandler":
73
  return TextHandler(super().format_map(mapping))
74
 
75
+ def join(self, iterable: Iterable[str]) -> str | "TextHandler":
76
  return TextHandler(super().join(iterable))
77
 
78
+ def ljust(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
 
 
79
  return TextHandler(super().ljust(width, fillchar))
80
 
81
+ def rjust(self, width: SupportsIndex, fillchar: str = " ") -> str | "TextHandler":
 
 
82
  return TextHandler(super().rjust(width, fillchar))
83
 
84
+ def swapcase(self) -> str | "TextHandler":
85
  return TextHandler(super().swapcase())
86
 
87
+ def title(self) -> str | "TextHandler":
88
  return TextHandler(super().title())
89
 
90
+ def translate(self, table) -> str | "TextHandler":
91
  return TextHandler(super().translate(table))
92
 
93
+ def zfill(self, width: SupportsIndex) -> str | "TextHandler":
94
  return TextHandler(super().zfill(width))
95
 
96
  def replace(
97
  self, old: str, new: str, count: SupportsIndex = -1
98
+ ) -> str | "TextHandler":
99
  return TextHandler(super().replace(old, new, count))
100
 
101
+ def upper(self) -> str | "TextHandler":
102
  return TextHandler(super().upper())
103
 
104
+ def lower(self) -> str | "TextHandler":
105
  return TextHandler(super().lower())
106
 
107
  ##############
108
 
109
+ def sort(self, reverse: bool = False) -> str | "TextHandler":
110
  """Return a sorted version of the string"""
111
  return self.__class__("".join(sorted(self, reverse=reverse)))
112
 
113
+ def clean(self) -> str | "TextHandler":
114
  """Return a new version of the string after removing all white spaces and consecutive spaces"""
115
  data = self.translate(__CLEANING_TABLE__)
116
  return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())
 
134
  @overload
135
  def re(
136
  self,
137
+ regex: str | Pattern,
138
  check_match: Literal[True],
139
  replace_entities: bool = True,
140
  clean_match: bool = False,
 
144
  @overload
145
  def re(
146
  self,
147
+ regex: str | Pattern,
148
  replace_entities: bool = True,
149
  clean_match: bool = False,
150
  case_sensitive: bool = True,
 
153
 
154
  def re(
155
  self,
156
+ regex: str | Pattern,
157
  replace_entities: bool = True,
158
  clean_match: bool = False,
159
  case_sensitive: bool = True,
160
  check_match: bool = False,
161
+ ) -> "TextHandlers" | bool:
162
  """Apply the given regex to the current text and return a list of strings with the matches.
163
 
164
  :param regex: Can be either a compiled regular expression or a string.
 
198
 
199
  def re_first(
200
  self,
201
+ regex: str | Pattern,
202
  default=None,
203
  replace_entities: bool = True,
204
  clean_match: bool = False,
 
237
  def __getitem__(self, pos: slice) -> "TextHandlers":
238
  pass
239
 
240
+ def __getitem__(self, pos: SupportsIndex | slice) -> TextHandler | "TextHandlers":
 
 
241
  lst = super().__getitem__(pos)
242
  if isinstance(pos, slice):
243
  lst = [TextHandler(s) for s in lst]
 
246
 
247
  def re(
248
  self,
249
+ regex: str | Pattern,
250
  replace_entities: bool = True,
251
  clean_match: bool = False,
252
  case_sensitive: bool = True,
 
266
 
267
  def re_first(
268
  self,
269
+ regex: str | Pattern,
270
  default=None,
271
  replace_entities: bool = True,
272
  clean_match: bool = False,
 
330
 
331
  def get(
332
  self, key: str, default: Optional[str] = None
333
+ ) -> Optional[_TextHandlerType]:
334
  """Acts like the standard dictionary `.get()` method"""
335
  return self._data.get(key, default)
336
 
scrapling/core/shell.py CHANGED
@@ -33,7 +33,6 @@ from scrapling.core._types import (
33
  Dict,
34
  Tuple,
35
  Any,
36
- Union,
37
  extraction_types,
38
  Generator,
39
  )
@@ -254,7 +253,7 @@ class CurlParser:
254
 
255
  # --- Process Data Payload ---
256
  params = dict()
257
- data_payload: Union[str, bytes, Dict, None] = None
258
  json_payload: Optional[Any] = None
259
 
260
  # DevTools often uses --data-raw for JSON bodies
@@ -358,7 +357,7 @@ class CurlParser:
358
  follow_redirects=True, # Scrapling default is True
359
  )
360
 
361
- def convert2fetcher(self, curl_command: Union[Request, str]) -> Optional[Response]:
362
  if isinstance(curl_command, (Request, str)):
363
  request = (
364
  self.parse(curl_command)
 
33
  Dict,
34
  Tuple,
35
  Any,
 
36
  extraction_types,
37
  Generator,
38
  )
 
253
 
254
  # --- Process Data Payload ---
255
  params = dict()
256
+ data_payload: Optional[str | bytes | Dict] = None
257
  json_payload: Optional[Any] = None
258
 
259
  # DevTools often uses --data-raw for JSON bodies
 
357
  follow_redirects=True, # Scrapling default is True
358
  )
359
 
360
+ def convert2fetcher(self, curl_command: Request | str) -> Optional[Response]:
361
  if isinstance(curl_command, (Request, str)):
362
  request = (
363
  self.parse(curl_command)
scrapling/core/storage.py CHANGED
@@ -1,20 +1,20 @@
1
- from sqlite3 import connect as db_connect
2
- from threading import RLock
3
- from abc import ABC, abstractmethod
4
  from hashlib import sha256
 
5
  from functools import lru_cache
 
 
6
 
7
- from lxml.html import HtmlElement
8
  from orjson import dumps, loads
 
9
  from tldextract import extract as tld
10
 
11
  from scrapling.core.utils import _StorageTools, log
12
- from scrapling.core._types import Dict, Optional, Union, Any
13
 
14
 
15
  class StorageSystemMixin(ABC):
16
  # If you want to make your own storage system, you have to inherit from this
17
- def __init__(self, url: Union[str, None] = None):
18
  """
19
  :param url: URL of the website we are working on to separate it from other websites data
20
  """
@@ -74,7 +74,7 @@ class SQLiteStorageSystem(StorageSystemMixin):
74
  Mainly built, so the library can run in threaded frameworks like scrapy or threaded tools
75
  > It's optimized for threaded applications, but running it without threads shouldn't make it slow."""
76
 
77
- def __init__(self, storage_file: str, url: Union[str, None] = None):
78
  """
79
  :param storage_file: File to be used to store elements' data.
80
  :param url: URL of the website we are working on to separate it from other websites data
 
 
 
 
1
  from hashlib import sha256
2
+ from threading import RLock
3
  from functools import lru_cache
4
+ from abc import ABC, abstractmethod
5
+ from sqlite3 import connect as db_connect
6
 
 
7
  from orjson import dumps, loads
8
+ from lxml.html import HtmlElement
9
  from tldextract import extract as tld
10
 
11
  from scrapling.core.utils import _StorageTools, log
12
+ from scrapling.core._types import Dict, Optional, Any
13
 
14
 
15
  class StorageSystemMixin(ABC):
16
  # If you want to make your own storage system, you have to inherit from this
17
+ def __init__(self, url: Optional[str] = None):
18
  """
19
  :param url: URL of the website we are working on to separate it from other websites data
20
  """
 
74
  Mainly built, so the library can run in threaded frameworks like scrapy or threaded tools
75
  > It's optimized for threaded applications, but running it without threads shouldn't make it slow."""
76
 
77
+ def __init__(self, storage_file: str, url: Optional[str] = None):
78
  """
79
  :param storage_file: File to be used to store elements' data.
80
  :param url: URL of the website we are working on to separate it from other websites data
scrapling/engines/_browsers/_camoufox.py CHANGED
@@ -26,10 +26,9 @@ from ._page import PageInfo, PagePool
26
  from ._validators import validate, CamoufoxConfig
27
  from scrapling.core._types import (
28
  Dict,
 
29
  Optional,
30
- Union,
31
  Callable,
32
- List,
33
  SelectorWaitStates,
34
  )
35
  from scrapling.engines.toolbelt import (
@@ -84,16 +83,16 @@ class StealthySession:
84
  def __init__(
85
  self,
86
  max_pages: int = 1,
87
- headless: Union[bool] = True, # noqa: F821
88
  block_images: bool = False,
89
  disable_resources: bool = False,
90
  block_webrtc: bool = False,
91
  allow_webgl: bool = True,
92
  network_idle: bool = False,
93
- humanize: Union[bool, float] = True,
94
  solve_cloudflare: bool = False,
95
- wait: Union[int, float] = 0,
96
- timeout: Union[int, float] = 30000,
97
  page_action: Optional[Callable] = None,
98
  wait_selector: Optional[str] = None,
99
  addons: Optional[List[str]] = None,
@@ -101,7 +100,7 @@ class StealthySession:
101
  cookies: Optional[List[Dict]] = None,
102
  google_search: bool = True,
103
  extra_headers: Optional[Dict[str, str]] = None,
104
- proxy: Optional[Union[str, Dict[str, str]]] = None,
105
  os_randomize: bool = False,
106
  disable_ads: bool = False,
107
  geoip: bool = False,
@@ -461,16 +460,16 @@ class AsyncStealthySession(StealthySession):
461
  def __init__(
462
  self,
463
  max_pages: int = 1,
464
- headless: Union[bool] = True, # noqa: F821
465
  block_images: bool = False,
466
  disable_resources: bool = False,
467
  block_webrtc: bool = False,
468
  allow_webgl: bool = True,
469
  network_idle: bool = False,
470
- humanize: Union[bool, float] = True,
471
  solve_cloudflare: bool = False,
472
- wait: Union[int, float] = 0,
473
- timeout: Union[int, float] = 30000,
474
  page_action: Optional[Callable] = None,
475
  wait_selector: Optional[str] = None,
476
  addons: Optional[List[str]] = None,
@@ -478,7 +477,7 @@ class AsyncStealthySession(StealthySession):
478
  cookies: Optional[List[Dict]] = None,
479
  google_search: bool = True,
480
  extra_headers: Optional[Dict[str, str]] = None,
481
- proxy: Optional[Union[str, Dict[str, str]]] = None,
482
  os_randomize: bool = False,
483
  disable_ads: bool = False,
484
  geoip: bool = False,
 
26
  from ._validators import validate, CamoufoxConfig
27
  from scrapling.core._types import (
28
  Dict,
29
+ List,
30
  Optional,
 
31
  Callable,
 
32
  SelectorWaitStates,
33
  )
34
  from scrapling.engines.toolbelt import (
 
83
  def __init__(
84
  self,
85
  max_pages: int = 1,
86
+ headless: bool = True, # noqa: F821
87
  block_images: bool = False,
88
  disable_resources: bool = False,
89
  block_webrtc: bool = False,
90
  allow_webgl: bool = True,
91
  network_idle: bool = False,
92
+ humanize: bool | float = True,
93
  solve_cloudflare: bool = False,
94
+ wait: int | float = 0,
95
+ timeout: int | float = 30000,
96
  page_action: Optional[Callable] = None,
97
  wait_selector: Optional[str] = None,
98
  addons: Optional[List[str]] = None,
 
100
  cookies: Optional[List[Dict]] = None,
101
  google_search: bool = True,
102
  extra_headers: Optional[Dict[str, str]] = None,
103
+ proxy: Optional[str | Dict[str, str]] = None,
104
  os_randomize: bool = False,
105
  disable_ads: bool = False,
106
  geoip: bool = False,
 
460
  def __init__(
461
  self,
462
  max_pages: int = 1,
463
+ headless: bool = True, # noqa: F821
464
  block_images: bool = False,
465
  disable_resources: bool = False,
466
  block_webrtc: bool = False,
467
  allow_webgl: bool = True,
468
  network_idle: bool = False,
469
+ humanize: bool | float = True,
470
  solve_cloudflare: bool = False,
471
+ wait: int | float = 0,
472
+ timeout: int | float = 30000,
473
  page_action: Optional[Callable] = None,
474
  wait_selector: Optional[str] = None,
475
  addons: Optional[List[str]] = None,
 
477
  cookies: Optional[List[Dict]] = None,
478
  google_search: bool = True,
479
  extra_headers: Optional[Dict[str, str]] = None,
480
+ proxy: Optional[str | Dict[str, str]] = None,
481
  os_randomize: bool = False,
482
  disable_ads: bool = False,
483
  geoip: bool = False,
scrapling/engines/_browsers/_controllers.py CHANGED
@@ -28,9 +28,8 @@ from ._validators import validate, PlaywrightConfig
28
  from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
29
  from scrapling.core._types import (
30
  Dict,
31
- Optional,
32
- Union,
33
  List,
 
34
  Callable,
35
  SelectorWaitStates,
36
  )
@@ -87,14 +86,14 @@ class DynamicSession:
87
  disable_webgl: bool = False,
88
  real_chrome: bool = False,
89
  stealth: bool = False,
90
- wait: Union[int, float] = 0,
91
  page_action: Optional[Callable] = None,
92
- proxy: Optional[Union[str, Dict[str, str]]] = None,
93
  locale: str = "en-US",
94
  extra_headers: Optional[Dict[str, str]] = None,
95
  useragent: Optional[str] = None,
96
  cdp_url: Optional[str] = None,
97
- timeout: Union[int, float] = 30000,
98
  disable_resources: bool = False,
99
  wait_selector: Optional[str] = None,
100
  cookies: Optional[List[Dict]] = None,
@@ -404,14 +403,14 @@ class AsyncDynamicSession(DynamicSession):
404
  disable_webgl: bool = False,
405
  real_chrome: bool = False,
406
  stealth: bool = False,
407
- wait: Union[int, float] = 0,
408
  page_action: Optional[Callable] = None,
409
- proxy: Optional[Union[str, Dict[str, str]]] = None,
410
  locale: str = "en-US",
411
  extra_headers: Optional[Dict[str, str]] = None,
412
  useragent: Optional[str] = None,
413
  cdp_url: Optional[str] = None,
414
- timeout: Union[int, float] = 30000,
415
  disable_resources: bool = False,
416
  wait_selector: Optional[str] = None,
417
  cookies: Optional[List[Dict]] = None,
 
28
  from ._config_tools import _compiled_stealth_scripts, _launch_kwargs, _context_kwargs
29
  from scrapling.core._types import (
30
  Dict,
 
 
31
  List,
32
+ Optional,
33
  Callable,
34
  SelectorWaitStates,
35
  )
 
86
  disable_webgl: bool = False,
87
  real_chrome: bool = False,
88
  stealth: bool = False,
89
+ wait: int | float = 0,
90
  page_action: Optional[Callable] = None,
91
+ proxy: Optional[str | Dict[str, str]] = None,
92
  locale: str = "en-US",
93
  extra_headers: Optional[Dict[str, str]] = None,
94
  useragent: Optional[str] = None,
95
  cdp_url: Optional[str] = None,
96
+ timeout: int | float = 30000,
97
  disable_resources: bool = False,
98
  wait_selector: Optional[str] = None,
99
  cookies: Optional[List[Dict]] = None,
 
403
  disable_webgl: bool = False,
404
  real_chrome: bool = False,
405
  stealth: bool = False,
406
+ wait: int | float = 0,
407
  page_action: Optional[Callable] = None,
408
+ proxy: Optional[str | Dict[str, str]] = None,
409
  locale: str = "en-US",
410
  extra_headers: Optional[Dict[str, str]] = None,
411
  useragent: Optional[str] = None,
412
  cdp_url: Optional[str] = None,
413
+ timeout: int | float = 30000,
414
  disable_resources: bool = False,
415
  wait_selector: Optional[str] = None,
416
  cookies: Optional[List[Dict]] = None,
scrapling/engines/_browsers/_page.py CHANGED
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
  from playwright.sync_api import Page as SyncPage
5
  from playwright.async_api import Page as AsyncPage
6
 
7
- from scrapling.core._types import Optional, Union, List, Literal
8
 
9
  PageState = Literal["ready", "busy", "error"] # States that a page can be in
10
 
@@ -14,7 +14,7 @@ class PageInfo:
14
  """Information about the page and its current state"""
15
 
16
  __slots__ = ("page", "state", "url")
17
- page: Union[SyncPage, AsyncPage]
18
  state: PageState
19
  url: Optional[str]
20
 
@@ -52,7 +52,7 @@ class PagePool:
52
  self.pages: List[PageInfo] = []
53
  self._lock = RLock()
54
 
55
- def add_page(self, page: Union[SyncPage, AsyncPage]) -> PageInfo:
56
  """Add a new page to the pool"""
57
  with self._lock:
58
  if len(self.pages) >= self.max_pages:
 
4
  from playwright.sync_api import Page as SyncPage
5
  from playwright.async_api import Page as AsyncPage
6
 
7
+ from scrapling.core._types import Optional, List, Literal
8
 
9
  PageState = Literal["ready", "busy", "error"] # States that a page can be in
10
 
 
14
  """Information about the page and its current state"""
15
 
16
  __slots__ = ("page", "state", "url")
17
+ page: SyncPage | AsyncPage
18
  state: PageState
19
  url: Optional[str]
20
 
 
52
  self.pages: List[PageInfo] = []
53
  self._lock = RLock()
54
 
55
+ def add_page(self, page: SyncPage | AsyncPage) -> PageInfo:
56
  """Add a new page to the pool"""
57
  with self._lock:
58
  if len(self.pages) >= self.max_pages:
scrapling/engines/_browsers/_validators.py CHANGED
@@ -4,7 +4,6 @@ from pathlib import Path
4
 
5
  from scrapling.core._types import (
6
  Optional,
7
- Union,
8
  Dict,
9
  Callable,
10
  List,
@@ -24,15 +23,15 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
24
  disable_webgl: bool = False
25
  real_chrome: bool = False
26
  stealth: bool = False
27
- wait: Union[int, float] = 0
28
  page_action: Optional[Callable] = None
29
- proxy: Optional[Union[str, Dict[str, str]]] = (
30
  None # The default value for proxy in Playwright's source is `None`
31
  )
32
  locale: str = "en-US"
33
  extra_headers: Optional[Dict[str, str]] = None
34
  useragent: Optional[str] = None
35
- timeout: Union[int, float] = 30000
36
  disable_resources: bool = False
37
  wait_selector: Optional[str] = None
38
  cookies: Optional[List[Dict]] = None
@@ -87,10 +86,10 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
87
  block_webrtc: bool = False
88
  allow_webgl: bool = True
89
  network_idle: bool = False
90
- humanize: Union[bool, float] = True
91
  solve_cloudflare: bool = False
92
- wait: Union[int, float] = 0
93
- timeout: Union[int, float] = 30000
94
  page_action: Optional[Callable] = None
95
  wait_selector: Optional[str] = None
96
  addons: Optional[List[str]] = None
@@ -98,7 +97,7 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
98
  cookies: Optional[List[Dict]] = None
99
  google_search: bool = True
100
  extra_headers: Optional[Dict[str, str]] = None
101
- proxy: Optional[Union[str, Dict[str, str]]] = (
102
  None # The default value for proxy in Playwright's source is `None`
103
  )
104
  os_randomize: bool = False
 
4
 
5
  from scrapling.core._types import (
6
  Optional,
 
7
  Dict,
8
  Callable,
9
  List,
 
23
  disable_webgl: bool = False
24
  real_chrome: bool = False
25
  stealth: bool = False
26
+ wait: int | float = 0
27
  page_action: Optional[Callable] = None
28
+ proxy: Optional[str | Dict[str, str]] = (
29
  None # The default value for proxy in Playwright's source is `None`
30
  )
31
  locale: str = "en-US"
32
  extra_headers: Optional[Dict[str, str]] = None
33
  useragent: Optional[str] = None
34
+ timeout: int | float = 30000
35
  disable_resources: bool = False
36
  wait_selector: Optional[str] = None
37
  cookies: Optional[List[Dict]] = None
 
86
  block_webrtc: bool = False
87
  allow_webgl: bool = True
88
  network_idle: bool = False
89
+ humanize: bool | float = True
90
  solve_cloudflare: bool = False
91
+ wait: int | float = 0
92
+ timeout: int | float = 30000
93
  page_action: Optional[Callable] = None
94
  wait_selector: Optional[str] = None
95
  addons: Optional[List[str]] = None
 
97
  cookies: Optional[List[Dict]] = None
98
  google_search: bool = True
99
  extra_headers: Optional[Dict[str, str]] = None
100
+ proxy: Optional[str | Dict[str, str]] = (
101
  None # The default value for proxy in Playwright's source is `None`
102
  )
103
  os_randomize: bool = False
scrapling/engines/static.py CHANGED
@@ -17,7 +17,6 @@ from scrapling.core._types import (
17
  Dict,
18
  Optional,
19
  Tuple,
20
- Union,
21
  Mapping,
22
  SUPPORTED_HTTP_METHODS,
23
  Awaitable,
@@ -55,14 +54,14 @@ class FetcherSession:
55
  proxies: Optional[Dict[str, str]] = None,
56
  proxy: Optional[str] = None,
57
  proxy_auth: Optional[Tuple[str, str]] = None,
58
- timeout: Optional[Union[int, float]] = 30,
59
  headers: Optional[Dict[str, str]] = None,
60
  retries: Optional[int] = 3,
61
  retry_delay: Optional[int] = 1,
62
  follow_redirects: bool = True,
63
  max_redirects: int = 30,
64
  verify: bool = True,
65
- cert: Optional[Union[str, Tuple[str, str]]] = None,
66
  selector_config: Optional[Dict] = None,
67
  ):
68
  """
@@ -357,7 +356,7 @@ class FetcherSession:
357
  method: SUPPORTED_HTTP_METHODS,
358
  stealth: Optional[bool] = None,
359
  **kwargs,
360
- ) -> Union[Response, Awaitable[Response]]:
361
  """
362
  Internal dispatcher. Prepares arguments and calls sync or async request helper.
363
 
@@ -390,10 +389,10 @@ class FetcherSession:
390
  def get(
391
  self,
392
  url: str,
393
- params: Optional[Union[Dict, List, Tuple]] = None,
394
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
395
  cookies: Optional[CookieTypes] = None,
396
- timeout: Optional[Union[int, float]] = _UNSET,
397
  follow_redirects: Optional[bool] = _UNSET,
398
  max_redirects: Optional[int] = _UNSET,
399
  retries: Optional[int] = _UNSET,
@@ -403,12 +402,12 @@ class FetcherSession:
403
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
404
  auth: Optional[Tuple[str, str]] = None,
405
  verify: Optional[bool] = _UNSET,
406
- cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
407
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
408
  http3: Optional[bool] = _UNSET,
409
  stealthy_headers: Optional[bool] = _UNSET,
410
  **kwargs,
411
- ) -> Union[Response, Awaitable[Response]]:
412
  """
413
  Perform a GET request.
414
 
@@ -461,12 +460,12 @@ class FetcherSession:
461
  def post(
462
  self,
463
  url: str,
464
- data: Optional[Union[Dict, str]] = None,
465
- json: Optional[Union[Dict, List]] = None,
466
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
467
- params: Optional[Union[Dict, List, Tuple]] = None,
468
  cookies: Optional[CookieTypes] = None,
469
- timeout: Optional[Union[int, float]] = _UNSET,
470
  follow_redirects: Optional[bool] = _UNSET,
471
  max_redirects: Optional[int] = _UNSET,
472
  retries: Optional[int] = _UNSET,
@@ -476,12 +475,12 @@ class FetcherSession:
476
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
477
  auth: Optional[Tuple[str, str]] = None,
478
  verify: Optional[bool] = _UNSET,
479
- cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
480
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
481
  http3: Optional[bool] = _UNSET,
482
  stealthy_headers: Optional[bool] = _UNSET,
483
  **kwargs,
484
- ) -> Union[Response, Awaitable[Response]]:
485
  """
486
  Perform a POST request.
487
 
@@ -538,12 +537,12 @@ class FetcherSession:
538
  def put(
539
  self,
540
  url: str,
541
- data: Optional[Union[Dict, str]] = None,
542
- json: Optional[Union[Dict, List]] = None,
543
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
544
- params: Optional[Union[Dict, List, Tuple]] = None,
545
  cookies: Optional[CookieTypes] = None,
546
- timeout: Optional[Union[int, float]] = _UNSET,
547
  follow_redirects: Optional[bool] = _UNSET,
548
  max_redirects: Optional[int] = _UNSET,
549
  retries: Optional[int] = _UNSET,
@@ -553,12 +552,12 @@ class FetcherSession:
553
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
554
  auth: Optional[Tuple[str, str]] = None,
555
  verify: Optional[bool] = _UNSET,
556
- cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
557
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
558
  http3: Optional[bool] = _UNSET,
559
  stealthy_headers: Optional[bool] = _UNSET,
560
  **kwargs,
561
- ) -> Union[Response, Awaitable[Response]]:
562
  """
563
  Perform a PUT request.
564
 
@@ -615,12 +614,12 @@ class FetcherSession:
615
  def delete(
616
  self,
617
  url: str,
618
- data: Optional[Union[Dict, str]] = None,
619
- json: Optional[Union[Dict, List]] = None,
620
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
621
- params: Optional[Union[Dict, List, Tuple]] = None,
622
  cookies: Optional[CookieTypes] = None,
623
- timeout: Optional[Union[int, float]] = _UNSET,
624
  follow_redirects: Optional[bool] = _UNSET,
625
  max_redirects: Optional[int] = _UNSET,
626
  retries: Optional[int] = _UNSET,
@@ -630,12 +629,12 @@ class FetcherSession:
630
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
631
  auth: Optional[Tuple[str, str]] = None,
632
  verify: Optional[bool] = _UNSET,
633
- cert: Optional[Union[str, Tuple[str, str]]] = _UNSET,
634
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
635
  http3: Optional[bool] = _UNSET,
636
  stealthy_headers: Optional[bool] = _UNSET,
637
  **kwargs,
638
- ) -> Union[Response, Awaitable[Response]]:
639
  """
640
  Perform a DELETE request.
641
 
 
17
  Dict,
18
  Optional,
19
  Tuple,
 
20
  Mapping,
21
  SUPPORTED_HTTP_METHODS,
22
  Awaitable,
 
54
  proxies: Optional[Dict[str, str]] = None,
55
  proxy: Optional[str] = None,
56
  proxy_auth: Optional[Tuple[str, str]] = None,
57
+ timeout: Optional[int | float] = 30,
58
  headers: Optional[Dict[str, str]] = None,
59
  retries: Optional[int] = 3,
60
  retry_delay: Optional[int] = 1,
61
  follow_redirects: bool = True,
62
  max_redirects: int = 30,
63
  verify: bool = True,
64
+ cert: Optional[str | Tuple[str, str]] = None,
65
  selector_config: Optional[Dict] = None,
66
  ):
67
  """
 
356
  method: SUPPORTED_HTTP_METHODS,
357
  stealth: Optional[bool] = None,
358
  **kwargs,
359
+ ) -> Response | Awaitable[Response]:
360
  """
361
  Internal dispatcher. Prepares arguments and calls sync or async request helper.
362
 
 
389
  def get(
390
  self,
391
  url: str,
392
+ params: Optional[Dict | List | Tuple] = None,
393
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
394
  cookies: Optional[CookieTypes] = None,
395
+ timeout: Optional[int | float] = _UNSET,
396
  follow_redirects: Optional[bool] = _UNSET,
397
  max_redirects: Optional[int] = _UNSET,
398
  retries: Optional[int] = _UNSET,
 
402
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
403
  auth: Optional[Tuple[str, str]] = None,
404
  verify: Optional[bool] = _UNSET,
405
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
406
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
407
  http3: Optional[bool] = _UNSET,
408
  stealthy_headers: Optional[bool] = _UNSET,
409
  **kwargs,
410
+ ) -> Response | Awaitable[Response]:
411
  """
412
  Perform a GET request.
413
 
 
460
  def post(
461
  self,
462
  url: str,
463
+ data: Optional[Dict | str] = None,
464
+ json: Optional[Dict | List] = None,
465
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
466
+ params: Optional[Dict | List | Tuple] = None,
467
  cookies: Optional[CookieTypes] = None,
468
+ timeout: Optional[int | float] = _UNSET,
469
  follow_redirects: Optional[bool] = _UNSET,
470
  max_redirects: Optional[int] = _UNSET,
471
  retries: Optional[int] = _UNSET,
 
475
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
476
  auth: Optional[Tuple[str, str]] = None,
477
  verify: Optional[bool] = _UNSET,
478
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
479
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
480
  http3: Optional[bool] = _UNSET,
481
  stealthy_headers: Optional[bool] = _UNSET,
482
  **kwargs,
483
+ ) -> Response | Awaitable[Response]:
484
  """
485
  Perform a POST request.
486
 
 
537
  def put(
538
  self,
539
  url: str,
540
+ data: Optional[Dict | str] = None,
541
+ json: Optional[Dict | List] = None,
542
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
543
+ params: Optional[Dict | List | Tuple] = None,
544
  cookies: Optional[CookieTypes] = None,
545
+ timeout: Optional[int | float] = _UNSET,
546
  follow_redirects: Optional[bool] = _UNSET,
547
  max_redirects: Optional[int] = _UNSET,
548
  retries: Optional[int] = _UNSET,
 
552
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
553
  auth: Optional[Tuple[str, str]] = None,
554
  verify: Optional[bool] = _UNSET,
555
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
556
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
557
  http3: Optional[bool] = _UNSET,
558
  stealthy_headers: Optional[bool] = _UNSET,
559
  **kwargs,
560
+ ) -> Response | Awaitable[Response]:
561
  """
562
  Perform a PUT request.
563
 
 
614
  def delete(
615
  self,
616
  url: str,
617
+ data: Optional[Dict | str] = None,
618
+ json: Optional[Dict | List] = None,
619
  headers: Optional[Mapping[str, Optional[str]]] = _UNSET,
620
+ params: Optional[Dict | List | Tuple] = None,
621
  cookies: Optional[CookieTypes] = None,
622
+ timeout: Optional[int | float] = _UNSET,
623
  follow_redirects: Optional[bool] = _UNSET,
624
  max_redirects: Optional[int] = _UNSET,
625
  retries: Optional[int] = _UNSET,
 
629
  proxy_auth: Optional[Tuple[str, str]] = _UNSET,
630
  auth: Optional[Tuple[str, str]] = None,
631
  verify: Optional[bool] = _UNSET,
632
+ cert: Optional[str | Tuple[str, str]] = _UNSET,
633
  impersonate: Optional[BrowserTypeLiteral] = _UNSET,
634
  http3: Optional[bool] = _UNSET,
635
  stealthy_headers: Optional[bool] = _UNSET,
636
  **kwargs,
637
+ ) -> Response | Awaitable[Response]:
638
  """
639
  Perform a DELETE request.
640
 
scrapling/engines/toolbelt/__init__.py CHANGED
@@ -2,7 +2,6 @@ from .custom import (
2
  BaseFetcher,
3
  Response,
4
  StatusText,
5
- check_type_validity,
6
  get_variable_name,
7
  )
8
  from .fingerprints import (
 
2
  BaseFetcher,
3
  Response,
4
  StatusText,
 
5
  get_variable_name,
6
  )
7
  from .fingerprints import (
scrapling/engines/toolbelt/custom.py CHANGED
@@ -10,8 +10,6 @@ from scrapling.core._types import (
10
  List,
11
  Optional,
12
  Tuple,
13
- Type,
14
- Union,
15
  )
16
  from scrapling.core.custom_types import MappingProxyType
17
  from scrapling.core.utils import log, lru_cache
@@ -106,7 +104,7 @@ class Response(Selector):
106
  content: str | bytes,
107
  status: int,
108
  reason: str,
109
- cookies: Union[Tuple[Dict[str, str], ...], Dict[str, str]],
110
  headers: Dict,
111
  request_headers: Dict,
112
  encoding: str = "utf-8",
@@ -318,51 +316,3 @@ def get_variable_name(var: Any) -> Optional[str]:
318
  if value is var:
319
  return name
320
  return None
321
-
322
-
323
- def check_type_validity(
324
- variable: Any,
325
- valid_types: Union[List[Type], None],
326
- default_value: Any = None,
327
- critical: bool = False,
328
- param_name: Optional[str] = None,
329
- ) -> Any:
330
- """Check if a variable matches the specified type constraints.
331
- :param variable: The variable to check
332
- :param valid_types: List of valid types for the variable
333
- :param default_value: Value to return if type check fails
334
- :param critical: If True, raises TypeError instead of logging error
335
- :param param_name: Optional parameter name for error messages
336
- :return: The original variable if valid, default_value if invalid
337
- :raise TypeError: If critical=True and type check fails
338
- """
339
- # Use provided param_name or try to get it automatically
340
- var_name = param_name or get_variable_name(variable) or "Unknown"
341
-
342
- # Convert valid_types to a list if None
343
- valid_types = valid_types or []
344
-
345
- # Handle None value
346
- if variable is None:
347
- if type(None) in valid_types:
348
- return variable
349
- error_msg = f'Argument "{var_name}" cannot be None'
350
- if critical:
351
- raise TypeError(error_msg)
352
- log.error(f"[Ignored] {error_msg}")
353
- return default_value
354
-
355
- # If no valid_types specified and variable has a value, return it
356
- if not valid_types:
357
- return variable
358
-
359
- # Check if variable type matches any of the valid types
360
- if not any(isinstance(variable, t) for t in valid_types):
361
- type_names = [t.__name__ for t in valid_types]
362
- error_msg = f'Argument "{var_name}" must be of type {" or ".join(type_names)}'
363
- if critical:
364
- raise TypeError(error_msg)
365
- log.error(f"[Ignored] {error_msg}")
366
- return default_value
367
-
368
- return variable
 
10
  List,
11
  Optional,
12
  Tuple,
 
 
13
  )
14
  from scrapling.core.custom_types import MappingProxyType
15
  from scrapling.core.utils import log, lru_cache
 
104
  content: str | bytes,
105
  status: int,
106
  reason: str,
107
+ cookies: Tuple[Dict[str, str], ...] | Dict[str, str],
108
  headers: Dict,
109
  request_headers: Dict,
110
  encoding: str = "utf-8",
 
316
  if value is var:
317
  return name
318
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scrapling/engines/toolbelt/fingerprints.py CHANGED
@@ -7,7 +7,7 @@ from platform import system as platform_system
7
  from tldextract import extract
8
  from browserforge.headers import Browser, HeaderGenerator
9
 
10
- from scrapling.core._types import Dict, Union
11
  from scrapling.core.utils import lru_cache
12
 
13
  __OS_NAME__ = platform_system()
@@ -28,7 +28,7 @@ def generate_convincing_referer(url: str) -> str:
28
 
29
 
30
  @lru_cache(1, typed=True)
31
- def get_os_name() -> Union[str, None]:
32
  """Get the current OS name in the same format needed for browserforge
33
 
34
  :return: Current OS name or `None` otherwise
 
7
  from tldextract import extract
8
  from browserforge.headers import Browser, HeaderGenerator
9
 
10
+ from scrapling.core._types import Dict, Optional
11
  from scrapling.core.utils import lru_cache
12
 
13
  __OS_NAME__ = platform_system()
 
28
 
29
 
30
  @lru_cache(1, typed=True)
31
+ def get_os_name() -> Optional[str]:
32
  """Get the current OS name in the same format needed for browserforge
33
 
34
  :return: Current OS name or `None` otherwise
scrapling/engines/toolbelt/navigation.py CHANGED
@@ -11,7 +11,7 @@ from msgspec import Struct, structs, convert, ValidationError
11
  from playwright.sync_api import Route
12
 
13
  from scrapling.core.utils import log
14
- from scrapling.core._types import Dict, Optional, Union, Tuple
15
  from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
16
 
17
  __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
@@ -54,8 +54,8 @@ async def async_intercept_route(route: async_Route):
54
 
55
 
56
  def construct_proxy_dict(
57
- proxy_string: Union[str, Dict[str, str]], as_tuple=False
58
- ) -> Union[Dict, Tuple, None]:
59
  """Validate a proxy and return it in the acceptable format for Playwright
60
  Reference: https://playwright.dev/python/docs/network#http-proxy
61
 
 
11
  from playwright.sync_api import Route
12
 
13
  from scrapling.core.utils import log
14
+ from scrapling.core._types import Dict, Optional, Tuple
15
  from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
16
 
17
  __BYPASSES_DIR__ = Path(__file__).parent / "bypasses"
 
54
 
55
 
56
  def construct_proxy_dict(
57
+ proxy_string: str | Dict[str, str], as_tuple=False
58
+ ) -> Optional[Dict | Tuple]:
59
  """Validate a proxy and return it in the acceptable format for Playwright
60
  Reference: https://playwright.dev/python/docs/network#http-proxy
61
 
scrapling/fetchers.py CHANGED
@@ -4,7 +4,6 @@ from scrapling.core._types import (
4
  List,
5
  Optional,
6
  SelectorWaitStates,
7
- Union,
8
  Iterable,
9
  )
10
  from scrapling.engines import (
@@ -51,16 +50,16 @@ class StealthyFetcher(BaseFetcher):
51
  def fetch(
52
  cls,
53
  url: str,
54
- headless: Union[bool] = True, # noqa: F821
55
  block_images: bool = False,
56
  disable_resources: bool = False,
57
  block_webrtc: bool = False,
58
  allow_webgl: bool = True,
59
  network_idle: bool = False,
60
- humanize: Union[bool, float] = True,
61
  solve_cloudflare: bool = False,
62
- wait: Union[int, float] = 0,
63
- timeout: Union[int, float] = 30000,
64
  page_action: Optional[Callable] = None,
65
  wait_selector: Optional[str] = None,
66
  addons: Optional[List[str]] = None,
@@ -68,7 +67,7 @@ class StealthyFetcher(BaseFetcher):
68
  cookies: Optional[List[Dict]] = None,
69
  google_search: bool = True,
70
  extra_headers: Optional[Dict[str, str]] = None,
71
- proxy: Optional[Union[str, Dict[str, str]]] = None,
72
  os_randomize: bool = False,
73
  disable_ads: bool = False,
74
  geoip: bool = False,
@@ -147,16 +146,16 @@ class StealthyFetcher(BaseFetcher):
147
  async def async_fetch(
148
  cls,
149
  url: str,
150
- headless: Union[bool] = True, # noqa: F821
151
  block_images: bool = False,
152
  disable_resources: bool = False,
153
  block_webrtc: bool = False,
154
  allow_webgl: bool = True,
155
  network_idle: bool = False,
156
- humanize: Union[bool, float] = True,
157
  solve_cloudflare: bool = False,
158
- wait: Union[int, float] = 0,
159
- timeout: Union[int, float] = 30000,
160
  page_action: Optional[Callable] = None,
161
  wait_selector: Optional[str] = None,
162
  addons: Optional[List[str]] = None,
@@ -164,7 +163,7 @@ class StealthyFetcher(BaseFetcher):
164
  cookies: Optional[List[Dict]] = None,
165
  google_search: bool = True,
166
  extra_headers: Optional[Dict[str, str]] = None,
167
- proxy: Optional[Union[str, Dict[str, str]]] = None,
168
  os_randomize: bool = False,
169
  disable_ads: bool = False,
170
  geoip: bool = False,
@@ -267,14 +266,14 @@ class DynamicFetcher(BaseFetcher):
267
  disable_webgl: bool = False,
268
  real_chrome: bool = False,
269
  stealth: bool = False,
270
- wait: Union[int, float] = 0,
271
  page_action: Optional[Callable] = None,
272
- proxy: Optional[Union[str, Dict[str, str]]] = None,
273
  locale: str = "en-US",
274
  extra_headers: Optional[Dict[str, str]] = None,
275
  useragent: Optional[str] = None,
276
  cdp_url: Optional[str] = None,
277
- timeout: Union[int, float] = 30000,
278
  disable_resources: bool = False,
279
  wait_selector: Optional[str] = None,
280
  cookies: Optional[Iterable[Dict]] = None,
@@ -350,14 +349,14 @@ class DynamicFetcher(BaseFetcher):
350
  disable_webgl: bool = False,
351
  real_chrome: bool = False,
352
  stealth: bool = False,
353
- wait: Union[int, float] = 0,
354
  page_action: Optional[Callable] = None,
355
- proxy: Optional[Union[str, Dict[str, str]]] = None,
356
  locale: str = "en-US",
357
  extra_headers: Optional[Dict[str, str]] = None,
358
  useragent: Optional[str] = None,
359
  cdp_url: Optional[str] = None,
360
- timeout: Union[int, float] = 30000,
361
  disable_resources: bool = False,
362
  wait_selector: Optional[str] = None,
363
  cookies: Optional[Iterable[Dict]] = None,
 
4
  List,
5
  Optional,
6
  SelectorWaitStates,
 
7
  Iterable,
8
  )
9
  from scrapling.engines import (
 
50
  def fetch(
51
  cls,
52
  url: str,
53
+ headless: bool = True, # noqa: F821
54
  block_images: bool = False,
55
  disable_resources: bool = False,
56
  block_webrtc: bool = False,
57
  allow_webgl: bool = True,
58
  network_idle: bool = False,
59
+ humanize: bool | float = True,
60
  solve_cloudflare: bool = False,
61
+ wait: int | float = 0,
62
+ timeout: int | float = 30000,
63
  page_action: Optional[Callable] = None,
64
  wait_selector: Optional[str] = None,
65
  addons: Optional[List[str]] = None,
 
67
  cookies: Optional[List[Dict]] = None,
68
  google_search: bool = True,
69
  extra_headers: Optional[Dict[str, str]] = None,
70
+ proxy: Optional[str | Dict[str, str]] = None,
71
  os_randomize: bool = False,
72
  disable_ads: bool = False,
73
  geoip: bool = False,
 
146
  async def async_fetch(
147
  cls,
148
  url: str,
149
+ headless: bool = True, # noqa: F821
150
  block_images: bool = False,
151
  disable_resources: bool = False,
152
  block_webrtc: bool = False,
153
  allow_webgl: bool = True,
154
  network_idle: bool = False,
155
+ humanize: bool | float = True,
156
  solve_cloudflare: bool = False,
157
+ wait: int | float = 0,
158
+ timeout: int | float = 30000,
159
  page_action: Optional[Callable] = None,
160
  wait_selector: Optional[str] = None,
161
  addons: Optional[List[str]] = None,
 
163
  cookies: Optional[List[Dict]] = None,
164
  google_search: bool = True,
165
  extra_headers: Optional[Dict[str, str]] = None,
166
+ proxy: Optional[str | Dict[str, str]] = None,
167
  os_randomize: bool = False,
168
  disable_ads: bool = False,
169
  geoip: bool = False,
 
266
  disable_webgl: bool = False,
267
  real_chrome: bool = False,
268
  stealth: bool = False,
269
+ wait: int | float = 0,
270
  page_action: Optional[Callable] = None,
271
+ proxy: Optional[str | Dict[str, str]] = None,
272
  locale: str = "en-US",
273
  extra_headers: Optional[Dict[str, str]] = None,
274
  useragent: Optional[str] = None,
275
  cdp_url: Optional[str] = None,
276
+ timeout: int | float = 30000,
277
  disable_resources: bool = False,
278
  wait_selector: Optional[str] = None,
279
  cookies: Optional[Iterable[Dict]] = None,
 
349
  disable_webgl: bool = False,
350
  real_chrome: bool = False,
351
  stealth: bool = False,
352
+ wait: int | float = 0,
353
  page_action: Optional[Callable] = None,
354
+ proxy: Optional[str | Dict[str, str]] = None,
355
  locale: str = "en-US",
356
  extra_headers: Optional[Dict[str, str]] = None,
357
  useragent: Optional[str] = None,
358
  cdp_url: Optional[str] = None,
359
+ timeout: int | float = 30000,
360
  disable_resources: bool = False,
361
  wait_selector: Optional[str] = None,
362
  cookies: Optional[Iterable[Dict]] = None,
scrapling/parser.py CHANGED
@@ -59,7 +59,7 @@ class Selector(SelectorsGeneration):
59
 
60
  def __init__(
61
  self,
62
- content: Optional[Union[str, bytes]] = None,
63
  url: Optional[str] = None,
64
  encoding: str = "utf8",
65
  huge_tree: bool = True,
@@ -197,7 +197,7 @@ class Selector(SelectorsGeneration):
197
  # Node functionalities, I wanted to move to a separate Mixin class, but it had a slight impact on performance
198
  @staticmethod
199
  def _is_text_node(
200
- element: Union[HtmlElement, _ElementUnicodeResult],
201
  ) -> bool:
202
  """Return True if the given element is a result of a string expression
203
  Examples:
@@ -209,7 +209,7 @@ class Selector(SelectorsGeneration):
209
 
210
  @staticmethod
211
  def __content_convertor(
212
- element: Union[HtmlElement, _ElementUnicodeResult],
213
  ) -> TextHandler:
214
  """Used internally to convert a single element's text content to TextHandler directly without checks
215
 
@@ -235,8 +235,8 @@ class Selector(SelectorsGeneration):
235
  )
236
 
237
  def __handle_element(
238
- self, element: Union[HtmlElement, _ElementUnicodeResult]
239
- ) -> Union[TextHandler, "Selector", None]:
240
  """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
241
  if element is None:
242
  return None
@@ -247,7 +247,7 @@ class Selector(SelectorsGeneration):
247
  return self.__element_convertor(element)
248
 
249
  def __handle_elements(
250
- self, result: List[Union[HtmlElement, _ElementUnicodeResult]]
251
  ) -> Union["Selectors", "TextHandlers"]:
252
  """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
253
  if not len(
@@ -364,18 +364,18 @@ class Selector(SelectorsGeneration):
364
  return class_name in self._root.classes
365
 
366
  @property
367
- def parent(self) -> Union["Selector", None]:
368
  """Return the direct parent of the element or ``None`` otherwise"""
369
  return self.__handle_element(self._root.getparent())
370
 
371
  @property
372
- def below_elements(self) -> "Selectors[Selector]":
373
  """Return all elements under the current element in the DOM tree"""
374
  below = self._root.xpath(".//*")
375
  return self.__handle_elements(below)
376
 
377
  @property
378
- def children(self) -> "Selectors[Selector]":
379
  """Return the children elements of the current element or empty list otherwise"""
380
  return Selectors(
381
  self.__element_convertor(child)
@@ -384,7 +384,7 @@ class Selector(SelectorsGeneration):
384
  )
385
 
386
  @property
387
- def siblings(self) -> "Selectors[Selector]":
388
  """Return other children of the current element's parent or empty list otherwise"""
389
  if self.parent:
390
  return Selectors(
@@ -397,9 +397,7 @@ class Selector(SelectorsGeneration):
397
  for ancestor in self._root.iterancestors():
398
  yield self.__element_convertor(ancestor)
399
 
400
- def find_ancestor(
401
- self, func: Callable[["Selector"], bool]
402
- ) -> Union["Selector", None]:
403
  """Loop over all ancestors of the element till one match the passed function
404
  :param func: A function that takes each ancestor as an argument and returns True/False
405
  :return: The first ancestor that match the function or ``None`` otherwise.
@@ -410,13 +408,13 @@ class Selector(SelectorsGeneration):
410
  return None
411
 
412
  @property
413
- def path(self) -> "Selectors[Selector]":
414
  """Returns a list of type `Selectors` that contains the path leading to the current element from the root."""
415
  lst = list(self.iterancestors())
416
  return Selectors(lst)
417
 
418
  @property
419
- def next(self) -> Union["Selector", None]:
420
  """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
421
  next_element = self._root.getnext()
422
  if next_element is not None:
@@ -427,7 +425,7 @@ class Selector(SelectorsGeneration):
427
  return self.__handle_element(next_element)
428
 
429
  @property
430
- def previous(self) -> Union["Selector", None]:
431
  """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
432
  prev_element = self._root.getprevious()
433
  if prev_element is not None:
@@ -470,10 +468,10 @@ class Selector(SelectorsGeneration):
470
  # From here we start with the selecting functions
471
  def relocate(
472
  self,
473
- element: Union[Dict, HtmlElement, "Selector"],
474
  percentage: int = 0,
475
  selector_type: bool = False,
476
- ) -> Union[List[Union[HtmlElement, None]], "Selectors"]:
477
  """This function will search again for the element in the page tree, used automatically on page structure change
478
 
479
  :param element: The element we want to relocate in the tree
@@ -581,7 +579,7 @@ class Selector(SelectorsGeneration):
581
  adaptive: bool = False,
582
  auto_save: bool = False,
583
  percentage: int = 0,
584
- ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
585
  """Search the current tree with CSS3 selectors
586
 
587
  **Important:
@@ -644,7 +642,7 @@ class Selector(SelectorsGeneration):
644
  auto_save: bool = False,
645
  percentage: int = 0,
646
  **kwargs: Any,
647
- ) -> Union["Selectors[Selector]", List, "TextHandlers[TextHandler]"]:
648
  """Search the current tree with XPath selectors
649
 
650
  **Important:
@@ -708,7 +706,7 @@ class Selector(SelectorsGeneration):
708
 
709
  def find_all(
710
  self,
711
- *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
712
  **kwargs: str,
713
  ) -> "Selectors":
714
  """Find elements by filters of your creations for ease.
@@ -815,9 +813,9 @@ class Selector(SelectorsGeneration):
815
 
816
  def find(
817
  self,
818
- *args: Union[str, Iterable[str], Pattern, Callable, Dict[str, str]],
819
  **kwargs: str,
820
- ) -> Union["Selector", None]:
821
  """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
822
 
823
  :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
@@ -924,7 +922,7 @@ class Selector(SelectorsGeneration):
924
  )
925
  return score
926
 
927
- def save(self, element: Union["Selector", HtmlElement], identifier: str) -> None:
928
  """Saves the element's unique properties to the storage for retrieval and relocation later
929
 
930
  :param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
@@ -969,7 +967,7 @@ class Selector(SelectorsGeneration):
969
 
970
  def re(
971
  self,
972
- regex: Union[str, Pattern[str]],
973
  replace_entities: bool = True,
974
  clean_match: bool = False,
975
  case_sensitive: bool = True,
@@ -985,7 +983,7 @@ class Selector(SelectorsGeneration):
985
 
986
  def re_first(
987
  self,
988
- regex: Union[str, Pattern[str]],
989
  default=None,
990
  replace_entities: bool = True,
991
  clean_match: bool = False,
@@ -1004,9 +1002,7 @@ class Selector(SelectorsGeneration):
1004
  )
1005
 
1006
  @staticmethod
1007
- def __get_attributes(
1008
- element: HtmlElement, ignore_attributes: Union[List, Tuple]
1009
- ) -> Dict:
1010
  """Return attributes dictionary without the ignored list"""
1011
  return {k: v for k, v in element.attrib.items() if k not in ignore_attributes}
1012
 
@@ -1015,7 +1011,7 @@ class Selector(SelectorsGeneration):
1015
  original: HtmlElement,
1016
  original_attributes: Dict,
1017
  candidate: HtmlElement,
1018
- ignore_attributes: Union[List, Tuple],
1019
  similarity_threshold: float,
1020
  match_text: bool = False,
1021
  ) -> bool:
@@ -1055,12 +1051,12 @@ class Selector(SelectorsGeneration):
1055
  def find_similar(
1056
  self,
1057
  similarity_threshold: float = 0.2,
1058
- ignore_attributes: Union[List, Tuple] = (
1059
  "href",
1060
  "src",
1061
  ),
1062
  match_text: bool = False,
1063
- ) -> Union["Selectors[Selector]", List]:
1064
  """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
1065
  then return the ones that match the current element attributes with a percentage higher than the input threshold.
1066
 
@@ -1123,7 +1119,7 @@ class Selector(SelectorsGeneration):
1123
  partial: bool = False,
1124
  case_sensitive: bool = False,
1125
  clean_match: bool = True,
1126
- ) -> Union["Selectors[Selector]", "Selector"]:
1127
  """Find elements that its text content fully/partially matches input.
1128
  :param text: Text query to match
1129
  :param first_match: Returns the first element that matches conditions, enabled by default
@@ -1165,11 +1161,11 @@ class Selector(SelectorsGeneration):
1165
 
1166
  def find_by_regex(
1167
  self,
1168
- query: Union[str, Pattern[str]],
1169
  first_match: bool = True,
1170
  case_sensitive: bool = False,
1171
  clean_match: bool = True,
1172
- ) -> Union["Selectors[Selector]", "Selector"]:
1173
  """Find elements that its text content matches the input regex pattern.
1174
  :param query: Regex query/pattern to match
1175
  :param first_match: Return the first element that matches conditions; enabled by default.
@@ -1216,9 +1212,7 @@ class Selectors(List[Selector]):
1216
  def __getitem__(self, pos: slice) -> "Selectors":
1217
  pass
1218
 
1219
- def __getitem__(
1220
- self, pos: Union[SupportsIndex, slice]
1221
- ) -> Union[Selector, "Selectors"]:
1222
  lst = super().__getitem__(pos)
1223
  if isinstance(pos, slice):
1224
  return self.__class__(lst)
@@ -1232,7 +1226,7 @@ class Selectors(List[Selector]):
1232
  auto_save: bool = False,
1233
  percentage: int = 0,
1234
  **kwargs: Any,
1235
- ) -> "Selectors[Selector]":
1236
  """
1237
  Call the ``.xpath()`` method for each element in this list and return
1238
  their results as another `Selectors` class.
@@ -1267,7 +1261,7 @@ class Selectors(List[Selector]):
1267
  identifier: str = "",
1268
  auto_save: bool = False,
1269
  percentage: int = 0,
1270
- ) -> "Selectors[Selector]":
1271
  """
1272
  Call the ``.css()`` method for each element in this list and return
1273
  their results flattened as another `Selectors` class.
@@ -1294,11 +1288,11 @@ class Selectors(List[Selector]):
1294
 
1295
  def re(
1296
  self,
1297
- regex: Union[str, Pattern[str]],
1298
  replace_entities: bool = True,
1299
  clean_match: bool = False,
1300
  case_sensitive: bool = True,
1301
- ) -> TextHandlers[TextHandler]:
1302
  """Call the ``.re()`` method for each element in this list and return
1303
  their results flattened as List of TextHandler.
1304
 
@@ -1315,7 +1309,7 @@ class Selectors(List[Selector]):
1315
 
1316
  def re_first(
1317
  self,
1318
- regex: Union[str, Pattern[str]],
1319
  default=None,
1320
  replace_entities: bool = True,
1321
  clean_match: bool = False,
@@ -1335,7 +1329,7 @@ class Selectors(List[Selector]):
1335
  return result
1336
  return default
1337
 
1338
- def search(self, func: Callable[["Selector"], bool]) -> Union["Selector", None]:
1339
  """Loop over all current elements and return the first element that matches the passed function
1340
  :param func: A function that takes each element as an argument and returns True/False
1341
  :return: The first element that match the function or ``None`` otherwise.
@@ -1345,7 +1339,7 @@ class Selectors(List[Selector]):
1345
  return element
1346
  return None
1347
 
1348
- def filter(self, func: Callable[["Selector"], bool]) -> "Selectors[Selector]":
1349
  """Filter current elements based on the passed function
1350
  :param func: A function that takes each element as an argument and returns True/False
1351
  :return: The new `Selectors` object or empty list otherwise.
 
59
 
60
  def __init__(
61
  self,
62
+ content: Optional[str | bytes] = None,
63
  url: Optional[str] = None,
64
  encoding: str = "utf8",
65
  huge_tree: bool = True,
 
197
  # Node functionalities, I wanted to move to a separate Mixin class, but it had a slight impact on performance
198
  @staticmethod
199
  def _is_text_node(
200
+ element: HtmlElement | _ElementUnicodeResult,
201
  ) -> bool:
202
  """Return True if the given element is a result of a string expression
203
  Examples:
 
209
 
210
  @staticmethod
211
  def __content_convertor(
212
+ element: HtmlElement | _ElementUnicodeResult,
213
  ) -> TextHandler:
214
  """Used internally to convert a single element's text content to TextHandler directly without checks
215
 
 
235
  )
236
 
237
  def __handle_element(
238
+ self, element: HtmlElement | _ElementUnicodeResult
239
+ ) -> Optional[TextHandler | "Selector"]:
240
  """Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
241
  if element is None:
242
  return None
 
247
  return self.__element_convertor(element)
248
 
249
  def __handle_elements(
250
+ self, result: List[HtmlElement | _ElementUnicodeResult]
251
  ) -> Union["Selectors", "TextHandlers"]:
252
  """Used internally in all functions to convert results to type (Selectors|TextHandlers) in bulk when possible"""
253
  if not len(
 
364
  return class_name in self._root.classes
365
 
366
  @property
367
+ def parent(self) -> Optional["Selector"]:
368
  """Return the direct parent of the element or ``None`` otherwise"""
369
  return self.__handle_element(self._root.getparent())
370
 
371
  @property
372
+ def below_elements(self) -> "Selectors":
373
  """Return all elements under the current element in the DOM tree"""
374
  below = self._root.xpath(".//*")
375
  return self.__handle_elements(below)
376
 
377
  @property
378
+ def children(self) -> "Selectors":
379
  """Return the children elements of the current element or empty list otherwise"""
380
  return Selectors(
381
  self.__element_convertor(child)
 
384
  )
385
 
386
  @property
387
+ def siblings(self) -> "Selectors":
388
  """Return other children of the current element's parent or empty list otherwise"""
389
  if self.parent:
390
  return Selectors(
 
397
  for ancestor in self._root.iterancestors():
398
  yield self.__element_convertor(ancestor)
399
 
400
+ def find_ancestor(self, func: Callable[["Selector"], bool]) -> Optional["Selector"]:
 
 
401
  """Loop over all ancestors of the element till one match the passed function
402
  :param func: A function that takes each ancestor as an argument and returns True/False
403
  :return: The first ancestor that match the function or ``None`` otherwise.
 
408
  return None
409
 
410
  @property
411
+ def path(self) -> "Selectors":
412
  """Returns a list of type `Selectors` that contains the path leading to the current element from the root."""
413
  lst = list(self.iterancestors())
414
  return Selectors(lst)
415
 
416
  @property
417
+ def next(self) -> Optional["Selector"]:
418
  """Returns the next element of the current element in the children of the parent or ``None`` otherwise."""
419
  next_element = self._root.getnext()
420
  if next_element is not None:
 
425
  return self.__handle_element(next_element)
426
 
427
  @property
428
+ def previous(self) -> Optional["Selector"]:
429
  """Returns the previous element of the current element in the children of the parent or ``None`` otherwise."""
430
  prev_element = self._root.getprevious()
431
  if prev_element is not None:
 
468
  # From here we start with the selecting functions
469
  def relocate(
470
  self,
471
+ element: Dict | HtmlElement | "Selector",
472
  percentage: int = 0,
473
  selector_type: bool = False,
474
+ ) -> List[HtmlElement] | "Selectors":
475
  """This function will search again for the element in the page tree, used automatically on page structure change
476
 
477
  :param element: The element we want to relocate in the tree
 
579
  adaptive: bool = False,
580
  auto_save: bool = False,
581
  percentage: int = 0,
582
+ ) -> "Selectors" | List | "TextHandlers":
583
  """Search the current tree with CSS3 selectors
584
 
585
  **Important:
 
642
  auto_save: bool = False,
643
  percentage: int = 0,
644
  **kwargs: Any,
645
+ ) -> "Selectors" | List | "TextHandlers":
646
  """Search the current tree with XPath selectors
647
 
648
  **Important:
 
706
 
707
  def find_all(
708
  self,
709
+ *args: str | Iterable[str] | Pattern | Callable | Dict[str, str],
710
  **kwargs: str,
711
  ) -> "Selectors":
712
  """Find elements by filters of your creations for ease.
 
813
 
814
  def find(
815
  self,
816
+ *args: str | Iterable[str] | Pattern | Callable | Dict[str, str],
817
  **kwargs: str,
818
+ ) -> Optional["Selector"]:
819
  """Find elements by filters of your creations for ease, then return the first result. Otherwise return `None`.
820
 
821
  :param args: Tag name(s), iterable of tag names, regex patterns, function, or a dictionary of elements' attributes. Leave empty for selecting all.
 
922
  )
923
  return score
924
 
925
+ def save(self, element: "Selector" | HtmlElement, identifier: str) -> None:
926
  """Saves the element's unique properties to the storage for retrieval and relocation later
927
 
928
  :param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
 
967
 
968
  def re(
969
  self,
970
+ regex: str | Pattern[str],
971
  replace_entities: bool = True,
972
  clean_match: bool = False,
973
  case_sensitive: bool = True,
 
983
 
984
  def re_first(
985
  self,
986
+ regex: str | Pattern[str],
987
  default=None,
988
  replace_entities: bool = True,
989
  clean_match: bool = False,
 
1002
  )
1003
 
1004
  @staticmethod
1005
+ def __get_attributes(element: HtmlElement, ignore_attributes: List | Tuple) -> Dict:
 
 
1006
  """Return attributes dictionary without the ignored list"""
1007
  return {k: v for k, v in element.attrib.items() if k not in ignore_attributes}
1008
 
 
1011
  original: HtmlElement,
1012
  original_attributes: Dict,
1013
  candidate: HtmlElement,
1014
+ ignore_attributes: List | Tuple,
1015
  similarity_threshold: float,
1016
  match_text: bool = False,
1017
  ) -> bool:
 
1051
  def find_similar(
1052
  self,
1053
  similarity_threshold: float = 0.2,
1054
+ ignore_attributes: List | Tuple = (
1055
  "href",
1056
  "src",
1057
  ),
1058
  match_text: bool = False,
1059
+ ) -> "Selectors" | List:
1060
  """Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
1061
  then return the ones that match the current element attributes with a percentage higher than the input threshold.
1062
 
 
1119
  partial: bool = False,
1120
  case_sensitive: bool = False,
1121
  clean_match: bool = True,
1122
+ ) -> Union["Selectors", "Selector"]:
1123
  """Find elements that its text content fully/partially matches input.
1124
  :param text: Text query to match
1125
  :param first_match: Returns the first element that matches conditions, enabled by default
 
1161
 
1162
  def find_by_regex(
1163
  self,
1164
+ query: str | Pattern[str],
1165
  first_match: bool = True,
1166
  case_sensitive: bool = False,
1167
  clean_match: bool = True,
1168
+ ) -> Union["Selectors", "Selector"]:
1169
  """Find elements that its text content matches the input regex pattern.
1170
  :param query: Regex query/pattern to match
1171
  :param first_match: Return the first element that matches conditions; enabled by default.
 
1212
  def __getitem__(self, pos: slice) -> "Selectors":
1213
  pass
1214
 
1215
+ def __getitem__(self, pos: SupportsIndex | slice) -> Selector | "Selectors":
 
 
1216
  lst = super().__getitem__(pos)
1217
  if isinstance(pos, slice):
1218
  return self.__class__(lst)
 
1226
  auto_save: bool = False,
1227
  percentage: int = 0,
1228
  **kwargs: Any,
1229
+ ) -> "Selectors":
1230
  """
1231
  Call the ``.xpath()`` method for each element in this list and return
1232
  their results as another `Selectors` class.
 
1261
  identifier: str = "",
1262
  auto_save: bool = False,
1263
  percentage: int = 0,
1264
+ ) -> "Selectors":
1265
  """
1266
  Call the ``.css()`` method for each element in this list and return
1267
  their results flattened as another `Selectors` class.
 
1288
 
1289
  def re(
1290
  self,
1291
+ regex: str | Pattern,
1292
  replace_entities: bool = True,
1293
  clean_match: bool = False,
1294
  case_sensitive: bool = True,
1295
+ ) -> TextHandlers:
1296
  """Call the ``.re()`` method for each element in this list and return
1297
  their results flattened as List of TextHandler.
1298
 
 
1309
 
1310
  def re_first(
1311
  self,
1312
+ regex: str | Pattern,
1313
  default=None,
1314
  replace_entities: bool = True,
1315
  clean_match: bool = False,
 
1329
  return result
1330
  return default
1331
 
1332
+ def search(self, func: Callable[["Selector"], bool]) -> Optional["Selector"]:
1333
  """Loop over all current elements and return the first element that matches the passed function
1334
  :param func: A function that takes each element as an argument and returns True/False
1335
  :return: The first element that match the function or ``None`` otherwise.
 
1339
  return element
1340
  return None
1341
 
1342
+ def filter(self, func: Callable[["Selector"], bool]) -> "Selectors":
1343
  """Filter current elements based on the passed function
1344
  :param func: A function that takes each element as an argument and returns True/False
1345
  :return: The new `Selectors` object or empty list otherwise.