Karim shoair commited on
Commit
20efe8c
·
1 Parent(s): 8400659

fix(parser): Solve the ignored elements children issue while keeping speed

Browse files
Files changed (1) hide show
  1. scrapling/parser.py +12 -6
scrapling/parser.py CHANGED
@@ -291,15 +291,21 @@ class Adaptor(SelectorsGeneration):
291
 
292
  :return: A TextHandler
293
  """
 
 
 
 
 
 
 
294
  _all_strings = []
295
  for node in self._root.xpath(".//*"):
296
- if node.tag not in ignore_tags:
297
  text = node.text
298
- if text and type(text) is str:
299
- if valid_values and text.strip():
300
- _all_strings.append(text if not strip else text.strip())
301
- else:
302
- _all_strings.append(text if not strip else text.strip())
303
 
304
  return TextHandler(separator.join(_all_strings))
305
 
 
291
 
292
  :return: A TextHandler
293
  """
294
+ ignored_elements = set()
295
+ if ignore_tags:
296
+ for tag in ignore_tags:
297
+ for element in self._root.xpath(f".//{tag}"):
298
+ ignored_elements.add(element)
299
+ ignored_elements.update(element.xpath(".//*"))
300
+
301
  _all_strings = []
302
  for node in self._root.xpath(".//*"):
303
+ if node not in ignored_elements:
304
  text = node.text
305
+ if text and isinstance(text, str):
306
+ processed_text = text.strip() if strip else text
307
+ if not valid_values or processed_text.strip():
308
+ _all_strings.append(processed_text)
 
309
 
310
  return TextHandler(separator.join(_all_strings))
311