Karim shoair commited on
Commit ·
2c25d77
1
Parent(s): f767a48
Adding new `filter` and `search` functions to parser
Browse files- scrapling/parser.py +20 -23
scrapling/parser.py
CHANGED
|
@@ -120,7 +120,7 @@ class Adaptor(SelectorsGeneration):
|
|
| 120 |
def _is_text_node(element: Union[html.HtmlElement, etree._ElementUnicodeResult]) -> bool:
|
| 121 |
"""Return True if given element is a result of a string expression
|
| 122 |
Examples:
|
| 123 |
-
|
| 124 |
CSS3 -> '::text', '::attr(attrib)'...
|
| 125 |
"""
|
| 126 |
# Faster than checking `element.is_attribute or element.is_text or element.is_tail`
|
|
@@ -1007,28 +1007,25 @@ class Adaptors(List[Adaptor]):
|
|
| 1007 |
return result
|
| 1008 |
return default
|
| 1009 |
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
# if len(self) == 1 and isinstance(results[0], list):
|
| 1030 |
-
# return self.__class__(results[0])
|
| 1031 |
-
# return self.__class__(results)
|
| 1032 |
|
| 1033 |
def get(self, default=None):
|
| 1034 |
"""Returns the first item of the current list
|
|
|
|
| 120 |
def _is_text_node(element: Union[html.HtmlElement, etree._ElementUnicodeResult]) -> bool:
|
| 121 |
"""Return True if given element is a result of a string expression
|
| 122 |
Examples:
|
| 123 |
+
XPath -> '/text()', '/@attribute' etc...
|
| 124 |
CSS3 -> '::text', '::attr(attrib)'...
|
| 125 |
"""
|
| 126 |
# Faster than checking `element.is_attribute or element.is_text or element.is_tail`
|
|
|
|
| 1007 |
return result
|
| 1008 |
return default
|
| 1009 |
|
| 1010 |
+
def search(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptor', None]:
|
| 1011 |
+
"""Loop over all current elements and return the first element that matches the passed function
|
| 1012 |
+
:param func: A function that takes each element as an argument and returns True/False
|
| 1013 |
+
:return: The first element that match the function or ``None`` otherwise.
|
| 1014 |
+
"""
|
| 1015 |
+
for element in self:
|
| 1016 |
+
if func(element):
|
| 1017 |
+
return element
|
| 1018 |
+
return None
|
| 1019 |
+
|
| 1020 |
+
def filter(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptors', List]:
|
| 1021 |
+
"""Filter current elements based on the passed function
|
| 1022 |
+
:param func: A function that takes each element as an argument and returns True/False
|
| 1023 |
+
:return: The new `Adaptors` object or empty list otherwise.
|
| 1024 |
+
"""
|
| 1025 |
+
results = [
|
| 1026 |
+
element for element in self if func(element)
|
| 1027 |
+
]
|
| 1028 |
+
return self.__class__(results) if results else results
|
|
|
|
|
|
|
|
|
|
| 1029 |
|
| 1030 |
def get(self, default=None):
|
| 1031 |
"""Returns the first item of the current list
|