Karim shoair commited on
Commit
2c25d77
·
1 Parent(s): f767a48

Adding new `filter` and `search` functions to parser

Browse files
Files changed (1) hide show
  1. scrapling/parser.py +20 -23
scrapling/parser.py CHANGED
@@ -120,7 +120,7 @@ class Adaptor(SelectorsGeneration):
120
  def _is_text_node(element: Union[html.HtmlElement, etree._ElementUnicodeResult]) -> bool:
121
  """Return True if given element is a result of a string expression
122
  Examples:
123
- Xpath -> '/text()', '/@attribute' etc...
124
  CSS3 -> '::text', '::attr(attrib)'...
125
  """
126
  # Faster than checking `element.is_attribute or element.is_text or element.is_tail`
@@ -1007,28 +1007,25 @@ class Adaptors(List[Adaptor]):
1007
  return result
1008
  return default
1009
 
1010
- # def __getattr__(self, name):
1011
- # if name in dir(self.__class__):
1012
- # return super().__getattribute__(name)
1013
- #
1014
- # # Execute the method itself on each Adaptor
1015
- # results = []
1016
- # for item in self:
1017
- # results.append(getattr(item, name))
1018
- #
1019
- # if all(callable(r) for r in results):
1020
- # def call_all(*args, **kwargs):
1021
- # final_results = [r(*args, **kwargs) for r in results]
1022
- # if all([isinstance(r, (Adaptor, Adaptors,)) for r in results]):
1023
- # return self.__class__(final_results)
1024
- # return final_results
1025
- #
1026
- # return call_all
1027
- # else:
1028
- # # Flatten the result if it's a single-item list containing a list
1029
- # if len(self) == 1 and isinstance(results[0], list):
1030
- # return self.__class__(results[0])
1031
- # return self.__class__(results)
1032
 
1033
  def get(self, default=None):
1034
  """Returns the first item of the current list
 
120
  def _is_text_node(element: Union[html.HtmlElement, etree._ElementUnicodeResult]) -> bool:
121
  """Return True if given element is a result of a string expression
122
  Examples:
123
+ XPath -> '/text()', '/@attribute' etc...
124
  CSS3 -> '::text', '::attr(attrib)'...
125
  """
126
  # Faster than checking `element.is_attribute or element.is_text or element.is_tail`
 
1007
  return result
1008
  return default
1009
 
1010
+ def search(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptor', None]:
1011
+ """Loop over all current elements and return the first element that matches the passed function
1012
+ :param func: A function that takes each element as an argument and returns True/False
1013
+ :return: The first element that match the function or ``None`` otherwise.
1014
+ """
1015
+ for element in self:
1016
+ if func(element):
1017
+ return element
1018
+ return None
1019
+
1020
+ def filter(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptors', List]:
1021
+ """Filter current elements based on the passed function
1022
+ :param func: A function that takes each element as an argument and returns True/False
1023
+ :return: The new `Adaptors` object or empty list otherwise.
1024
+ """
1025
+ results = [
1026
+ element for element in self if func(element)
1027
+ ]
1028
+ return self.__class__(results) if results else results
 
 
 
1029
 
1030
  def get(self, default=None):
1031
  """Returns the first item of the current list