Karim shoair commited on
Commit
4e52121
·
1 Parent(s): 1ba0746

Adding `css_first` and `xpath_first` for easier usage

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. scrapling/parser.py +52 -0
README.md CHANGED
@@ -59,7 +59,7 @@ quotes = page.css('.quote').css('.text::text') # Chained selectors
59
  quotes = [element.text for element in page.css('.quote').css('.text')] # Slower than bulk query above
60
 
61
  # Get the first quote element
62
- quote = page.css('.quote').first # or [0] or .get()
63
 
64
  # Working with elements
65
  quote.html_content # Inner HTML
@@ -244,8 +244,8 @@ To increase the complexity a little bit, let's say we want to get all books' dat
244
  ```python
245
  >>> for product in page.find_by_text('Tipping the Velvet').parent.parent.find_similar():
246
  print({
247
- "name": product.css('h3 a::text')[0],
248
- "price": product.css('.price_color')[0].re_first(r'[\d\.]+'),
249
  "stock": product.css('.availability::text')[-1].clean()
250
  })
251
  {'name': 'A Light in the ...', 'price': '51.77', 'stock': 'In stock'}
 
59
  quotes = [element.text for element in page.css('.quote').css('.text')] # Slower than bulk query above
60
 
61
  # Get the first quote element
62
+ quote = page.css_first('.quote') # or page.css('.quote').first or [0] or .get()
63
 
64
  # Working with elements
65
  quote.html_content # Inner HTML
 
244
  ```python
245
  >>> for product in page.find_by_text('Tipping the Velvet').parent.parent.find_similar():
246
  print({
247
+ "name": product.css_first('h3 a::text'),
248
+ "price": product.css_first('.price_color').re_first(r'[\d\.]+'),
249
  "stock": product.css('.availability::text')[-1].clean()
250
  })
251
  {'name': 'A Light in the ...', 'price': '51.77', 'stock': 'In stock'}
scrapling/parser.py CHANGED
@@ -394,6 +394,58 @@ class Adaptor(SelectorsGeneration):
394
  return self.__convert_results(score_table[highest_probability])
395
  return []
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  def css(self, selector: str, identifier: str = '',
398
  auto_match: bool = False, auto_save: bool = False, percentage: int = 0
399
  ) -> Union['Adaptors[Adaptor]', List]:
 
394
  return self.__convert_results(score_table[highest_probability])
395
  return []
396
 
397
+ def css_first(self, selector: str, identifier: str = '',
398
+ auto_match: bool = False, auto_save: bool = False, percentage: int = 0
399
+ ) -> Union['Adaptors[Adaptor]', List, None]:
400
+ """Search current tree with CSS3 selectors and return the first result if possible, otherwise return `None`
401
+
402
+ **Important:
403
+ It's recommended to use the identifier argument if you plan to use different selector later
404
+ and want to relocate the same element(s)**
405
+
406
+ :param selector: The CSS3 selector to be used.
407
+ :param auto_match: Enabled will make function try to relocate the element if it was 'saved' before
408
+ :param identifier: A string that will be used to save/retrieve element's data in auto-matching
409
+ otherwise the selector will be used.
410
+ :param auto_save: Automatically save new elements for `auto_match` later
411
+ :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
412
+ Be aware that the percentage calculation depends solely on the page structure so don't play with this
413
+ number unless you must know what you are doing!
414
+
415
+ :return: List as :class:`Adaptors`
416
+ """
417
+ try:
418
+ return self.css(selector, identifier, auto_match, auto_save, percentage)[0]
419
+ except (IndexError, TypeError,):
420
+ return None
421
+
422
+ def xpath_first(self, selector: str, identifier: str = '',
423
+ auto_match: bool = False, auto_save: bool = False, percentage: int = 0, **kwargs: Any
424
+ ) -> Union['Adaptors[Adaptor]', List, None]:
425
+ """Search current tree with XPath selectors and return the first result if possible, otherwise return `None`
426
+
427
+ **Important:
428
+ It's recommended to use the identifier argument if you plan to use different selector later
429
+ and want to relocate the same element(s)**
430
+
431
+ Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
432
+
433
+ :param selector: The XPath selector to be used.
434
+ :param auto_match: Enabled will make function try to relocate the element if it was 'saved' before
435
+ :param identifier: A string that will be used to save/retrieve element's data in auto-matching
436
+ otherwise the selector will be used.
437
+ :param auto_save: Automatically save new elements for `auto_match` later
438
+ :param percentage: The minimum percentage to accept while auto-matching and not going lower than that.
439
+ Be aware that the percentage calculation depends solely on the page structure so don't play with this
440
+ number unless you must know what you are doing!
441
+
442
+ :return: List as :class:`Adaptors`
443
+ """
444
+ try:
445
+ return self.xpath(selector, identifier, auto_match, auto_save, percentage, **kwargs)[0]
446
+ except (IndexError, TypeError,):
447
+ return None
448
+
449
  def css(self, selector: str, identifier: str = '',
450
  auto_match: bool = False, auto_save: bool = False, percentage: int = 0
451
  ) -> Union['Adaptors[Adaptor]', List]: