Karim shoair commited on
Commit
6957d50
·
1 Parent(s): 5b23915

refactor(parser/find_by_text): Better implementation for ~20% speed boost

Browse files
Files changed (1) hide show
  1. scrapling/parser.py +12 -19
scrapling/parser.py CHANGED
@@ -907,32 +907,25 @@ class Adaptor(SelectorsGeneration):
907
  if not case_sensitive:
908
  text = text.lower()
909
 
910
- def _traverse(node: Adaptor) -> None:
 
911
  """Check if element matches given text otherwise, traverse the children tree and iterate"""
912
  node_text = node.text
913
- # if there's already no text in this node, dodge it to save CPU cycles and time
914
- if node_text:
915
- if clean_match:
916
- node_text = node_text.clean()
917
 
918
- if not case_sensitive:
919
- node_text = node_text.lower()
920
 
921
- if partial:
922
- if text in node_text:
923
- results.append(node)
924
- elif text == node_text:
925
  results.append(node)
 
 
926
 
927
- if results and first_match:
928
  # we got an element so we should stop
929
- return
930
-
931
- for branch in node.children:
932
- _traverse(branch)
933
-
934
- # This will block until we traverse all children/branches
935
- _traverse(self)
936
 
937
  if first_match:
938
  if results:
 
907
  if not case_sensitive:
908
  text = text.lower()
909
 
910
+ # This selector gets all elements with text content
911
+ for node in self.__handle_elements(self._root.xpath('//*[normalize-space(text())]')):
912
  """Check if element matches given text otherwise, traverse the children tree and iterate"""
913
  node_text = node.text
914
+ if clean_match:
915
+ node_text = node_text.clean()
 
 
916
 
917
+ if not case_sensitive:
918
+ node_text = node_text.lower()
919
 
920
+ if partial:
921
+ if text in node_text:
 
 
922
  results.append(node)
923
+ elif text == node_text:
924
+ results.append(node)
925
 
926
+ if first_match and results:
927
  # we got an element so we should stop
928
+ break
 
 
 
 
 
 
929
 
930
  if first_match:
931
  if results: