Karim shoair commited on
Commit
fb54505
·
1 Parent(s): 2be5705

Updating the keywords whitelisting logic in `find`/`find_all` functions

Browse files
Files changed (1) hide show
  1. scrapling/parser.py +7 -3
scrapling/parser.py CHANGED
@@ -551,9 +551,10 @@ class Adaptor(SelectorsGeneration):
551
  """
552
  # Attributes that are Python reserved words and can't be used directly
553
  # Ex: find_all('a', class="blah") -> find_all('a', class_="blah")
 
554
  whitelisted = {
555
- 'id_': 'id',
556
  'class_': 'class',
 
557
  }
558
 
559
  if not args and not kwargs:
@@ -582,14 +583,17 @@ class Adaptor(SelectorsGeneration):
582
 
583
  if not all([(type(k) is str and type(v) is str) for k, v in kwargs.items()]):
584
  raise TypeError('Only string values are accepted for arguments')
585
- attributes.update(kwargs)
 
 
 
 
586
 
587
  # It's easier and faster to build a selector than traversing the tree
588
  tags = tags or ['']
589
  for tag in tags:
590
  selector = tag
591
  for key, value in attributes.items():
592
- key = whitelisted.get(key, key)
593
  value = value.replace('"', r'\"') # Escape double quotes in user input
594
  # Not escaping anything with the key so the user can pass patterns like {'href*': '/p/'} or get errors :)
595
  selector += '[{}="{}"]'.format(key, value)
 
551
  """
552
  # Attributes that are Python reserved words and can't be used directly
553
  # Ex: find_all('a', class="blah") -> find_all('a', class_="blah")
554
+ # https://www.w3schools.com/python/python_ref_keywords.asp
555
  whitelisted = {
 
556
  'class_': 'class',
557
+ 'for_': 'for',
558
  }
559
 
560
  if not args and not kwargs:
 
583
 
584
  if not all([(type(k) is str and type(v) is str) for k, v in kwargs.items()]):
585
  raise TypeError('Only string values are accepted for arguments')
586
+
587
+ for attribute_name, value in kwargs.items():
588
+ # Only replace names for kwargs, replacing them in dictionaries doesn't make sense
589
+ attribute_name = whitelisted.get(attribute_name, attribute_name)
590
+ attributes[attribute_name] = value
591
 
592
  # It's easier and faster to build a selector than traversing the tree
593
  tags = tags or ['']
594
  for tag in tags:
595
  selector = tag
596
  for key, value in attributes.items():
 
597
  value = value.replace('"', r'\"') # Escape double quotes in user input
598
  # Not escaping anything with the key so the user can pass patterns like {'href*': '/p/'} or get errors :)
599
  selector += '[{}="{}"]'.format(key, value)