Karim shoair commited on
Commit ·
631fd95
1
Parent(s): 8b0d492
fix: moving types to use Union again
Browse files- scrapling/core/custom_types.py +5 -3
- scrapling/parser.py +8 -8
scrapling/core/custom_types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from collections.abc import Mapping
|
| 2 |
from types import MappingProxyType
|
| 3 |
-
from re import compile as re_compile,
|
| 4 |
|
| 5 |
from orjson import dumps, loads
|
| 6 |
|
|
@@ -165,7 +165,7 @@ class TextHandler(str):
|
|
| 165 |
clean_match: bool = False,
|
| 166 |
case_sensitive: bool = True,
|
| 167 |
check_match: bool = False,
|
| 168 |
-
) -> "TextHandlers"
|
| 169 |
"""Apply the given regex to the current text and return a list of strings with the matches.
|
| 170 |
|
| 171 |
:param regex: Can be either a compiled regular expression or a string.
|
|
@@ -244,7 +244,9 @@ class TextHandlers(List[TextHandler]):
|
|
| 244 |
def __getitem__(self, pos: slice) -> "TextHandlers":
|
| 245 |
pass
|
| 246 |
|
| 247 |
-
def __getitem__(
|
|
|
|
|
|
|
| 248 |
lst = super().__getitem__(pos)
|
| 249 |
if isinstance(pos, slice):
|
| 250 |
lst = [TextHandler(s) for s in lst]
|
|
|
|
| 1 |
from collections.abc import Mapping
|
| 2 |
from types import MappingProxyType
|
| 3 |
+
from re import compile as re_compile, UNICODE, IGNORECASE
|
| 4 |
|
| 5 |
from orjson import dumps, loads
|
| 6 |
|
|
|
|
| 165 |
clean_match: bool = False,
|
| 166 |
case_sensitive: bool = True,
|
| 167 |
check_match: bool = False,
|
| 168 |
+
) -> Union["TextHandlers", bool]:
|
| 169 |
"""Apply the given regex to the current text and return a list of strings with the matches.
|
| 170 |
|
| 171 |
:param regex: Can be either a compiled regular expression or a string.
|
|
|
|
| 244 |
def __getitem__(self, pos: slice) -> "TextHandlers":
|
| 245 |
pass
|
| 246 |
|
| 247 |
+
def __getitem__(
|
| 248 |
+
self, pos: SupportsIndex | slice
|
| 249 |
+
) -> Union[TextHandler, "TextHandlers"]:
|
| 250 |
lst = super().__getitem__(pos)
|
| 251 |
if isinstance(pos, slice):
|
| 252 |
lst = [TextHandler(s) for s in lst]
|
scrapling/parser.py
CHANGED
|
@@ -236,7 +236,7 @@ class Selector(SelectorsGeneration):
|
|
| 236 |
|
| 237 |
def __handle_element(
|
| 238 |
self, element: HtmlElement | _ElementUnicodeResult
|
| 239 |
-
) -> Optional[TextHandler
|
| 240 |
"""Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
|
| 241 |
if element is None:
|
| 242 |
return None
|
|
@@ -468,10 +468,10 @@ class Selector(SelectorsGeneration):
|
|
| 468 |
# From here we start with the selecting functions
|
| 469 |
def relocate(
|
| 470 |
self,
|
| 471 |
-
element: Dict
|
| 472 |
percentage: int = 0,
|
| 473 |
selector_type: bool = False,
|
| 474 |
-
) -> List[HtmlElement]
|
| 475 |
"""This function will search again for the element in the page tree, used automatically on page structure change
|
| 476 |
|
| 477 |
:param element: The element we want to relocate in the tree
|
|
@@ -579,7 +579,7 @@ class Selector(SelectorsGeneration):
|
|
| 579 |
adaptive: bool = False,
|
| 580 |
auto_save: bool = False,
|
| 581 |
percentage: int = 0,
|
| 582 |
-
) -> "Selectors"
|
| 583 |
"""Search the current tree with CSS3 selectors
|
| 584 |
|
| 585 |
**Important:
|
|
@@ -642,7 +642,7 @@ class Selector(SelectorsGeneration):
|
|
| 642 |
auto_save: bool = False,
|
| 643 |
percentage: int = 0,
|
| 644 |
**kwargs: Any,
|
| 645 |
-
) -> "Selectors"
|
| 646 |
"""Search the current tree with XPath selectors
|
| 647 |
|
| 648 |
**Important:
|
|
@@ -927,7 +927,7 @@ class Selector(SelectorsGeneration):
|
|
| 927 |
)
|
| 928 |
return score
|
| 929 |
|
| 930 |
-
def save(self, element: "Selector"
|
| 931 |
"""Saves the element's unique properties to the storage for retrieval and relocation later
|
| 932 |
|
| 933 |
:param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
|
|
@@ -1061,7 +1061,7 @@ class Selector(SelectorsGeneration):
|
|
| 1061 |
"src",
|
| 1062 |
),
|
| 1063 |
match_text: bool = False,
|
| 1064 |
-
) -> "Selectors"
|
| 1065 |
"""Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
|
| 1066 |
then return the ones that match the current element attributes with a percentage higher than the input threshold.
|
| 1067 |
|
|
@@ -1217,7 +1217,7 @@ class Selectors(List[Selector]):
|
|
| 1217 |
def __getitem__(self, pos: slice) -> "Selectors":
|
| 1218 |
pass
|
| 1219 |
|
| 1220 |
-
def __getitem__(self, pos: SupportsIndex | slice) -> Selector
|
| 1221 |
lst = super().__getitem__(pos)
|
| 1222 |
if isinstance(pos, slice):
|
| 1223 |
return self.__class__(lst)
|
|
|
|
| 236 |
|
| 237 |
def __handle_element(
|
| 238 |
self, element: HtmlElement | _ElementUnicodeResult
|
| 239 |
+
) -> Optional[Union[TextHandler, "Selector"]]:
|
| 240 |
"""Used internally in all functions to convert a single element to type (Selector|TextHandler) when possible"""
|
| 241 |
if element is None:
|
| 242 |
return None
|
|
|
|
| 468 |
# From here we start with the selecting functions
|
| 469 |
def relocate(
|
| 470 |
self,
|
| 471 |
+
element: Union[Dict, HtmlElement, "Selector"],
|
| 472 |
percentage: int = 0,
|
| 473 |
selector_type: bool = False,
|
| 474 |
+
) -> Union[List[HtmlElement], "Selectors"]:
|
| 475 |
"""This function will search again for the element in the page tree, used automatically on page structure change
|
| 476 |
|
| 477 |
:param element: The element we want to relocate in the tree
|
|
|
|
| 579 |
adaptive: bool = False,
|
| 580 |
auto_save: bool = False,
|
| 581 |
percentage: int = 0,
|
| 582 |
+
) -> Union["Selectors", List, "TextHandlers"]:
|
| 583 |
"""Search the current tree with CSS3 selectors
|
| 584 |
|
| 585 |
**Important:
|
|
|
|
| 642 |
auto_save: bool = False,
|
| 643 |
percentage: int = 0,
|
| 644 |
**kwargs: Any,
|
| 645 |
+
) -> Union["Selectors", List, "TextHandlers"]:
|
| 646 |
"""Search the current tree with XPath selectors
|
| 647 |
|
| 648 |
**Important:
|
|
|
|
| 927 |
)
|
| 928 |
return score
|
| 929 |
|
| 930 |
+
def save(self, element: Union["Selector", HtmlElement], identifier: str) -> None:
|
| 931 |
"""Saves the element's unique properties to the storage for retrieval and relocation later
|
| 932 |
|
| 933 |
:param element: The element itself that we want to save to storage, it can be a ` Selector ` or pure ` HtmlElement `
|
|
|
|
| 1061 |
"src",
|
| 1062 |
),
|
| 1063 |
match_text: bool = False,
|
| 1064 |
+
) -> Union["Selectors", List]:
|
| 1065 |
"""Find elements that are in the same tree depth in the page with the same tag name and same parent tag etc...
|
| 1066 |
then return the ones that match the current element attributes with a percentage higher than the input threshold.
|
| 1067 |
|
|
|
|
| 1217 |
def __getitem__(self, pos: slice) -> "Selectors":
|
| 1218 |
pass
|
| 1219 |
|
| 1220 |
+
def __getitem__(self, pos: SupportsIndex | slice) -> Union[Selector, "Selectors"]:
|
| 1221 |
lst = super().__getitem__(pos)
|
| 1222 |
if isinstance(pos, slice):
|
| 1223 |
return self.__class__(lst)
|