Karim shoair commited on
Commit ·
6957d50
1
Parent(s): 5b23915
refactor(parser/find_by_text): Better implementation for ~20% speed boost
Browse files- scrapling/parser.py +12 -19
scrapling/parser.py
CHANGED
|
@@ -907,32 +907,25 @@ class Adaptor(SelectorsGeneration):
|
|
| 907 |
if not case_sensitive:
|
| 908 |
text = text.lower()
|
| 909 |
|
| 910 |
-
|
|
|
|
| 911 |
"""Check if element matches given text otherwise, traverse the children tree and iterate"""
|
| 912 |
node_text = node.text
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
if clean_match:
|
| 916 |
-
node_text = node_text.clean()
|
| 917 |
|
| 918 |
-
|
| 919 |
-
|
| 920 |
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
results.append(node)
|
| 924 |
-
elif text == node_text:
|
| 925 |
results.append(node)
|
|
|
|
|
|
|
| 926 |
|
| 927 |
-
if
|
| 928 |
# we got an element so we should stop
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
for branch in node.children:
|
| 932 |
-
_traverse(branch)
|
| 933 |
-
|
| 934 |
-
# This will block until we traverse all children/branches
|
| 935 |
-
_traverse(self)
|
| 936 |
|
| 937 |
if first_match:
|
| 938 |
if results:
|
|
|
|
| 907 |
if not case_sensitive:
|
| 908 |
text = text.lower()
|
| 909 |
|
| 910 |
+
# This selector gets all elements with text content
|
| 911 |
+
for node in self.__handle_elements(self._root.xpath('//*[normalize-space(text())]')):
|
| 912 |
"""Check if element matches given text otherwise, traverse the children tree and iterate"""
|
| 913 |
node_text = node.text
|
| 914 |
+
if clean_match:
|
| 915 |
+
node_text = node_text.clean()
|
|
|
|
|
|
|
| 916 |
|
| 917 |
+
if not case_sensitive:
|
| 918 |
+
node_text = node_text.lower()
|
| 919 |
|
| 920 |
+
if partial:
|
| 921 |
+
if text in node_text:
|
|
|
|
|
|
|
| 922 |
results.append(node)
|
| 923 |
+
elif text == node_text:
|
| 924 |
+
results.append(node)
|
| 925 |
|
| 926 |
+
if first_match and results:
|
| 927 |
# we got an element so we should stop
|
| 928 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
|
| 930 |
if first_match:
|
| 931 |
if results:
|