Karim shoair commited on
Commit ·
4893321
1
Parent(s): 7113c4e
docs: update doc strings with correct naming
Browse files- docs/contributing.md +1 -1
- scrapling/parser.py +17 -17
- tests/parser/test_adaptive.py +2 -2
docs/contributing.md
CHANGED
|
@@ -68,7 +68,7 @@ We use:
|
|
| 68 |
|
| 69 |
Example:
|
| 70 |
```
|
| 71 |
-
feat: add
|
| 72 |
|
| 73 |
- Added find_similar() method
|
| 74 |
- Implemented pattern matching
|
|
|
|
| 68 |
|
| 69 |
Example:
|
| 70 |
```
|
| 71 |
+
feat: add `adaptive` for similar elements
|
| 72 |
|
| 73 |
- Added find_similar() method
|
| 74 |
- Implemented pattern matching
|
scrapling/parser.py
CHANGED
|
@@ -103,9 +103,9 @@ class Selector(SelectorsGeneration):
|
|
| 103 |
Don't use it unless you know what you are doing!
|
| 104 |
:param keep_comments: While parsing the HTML body, drop comments or not. Disabled by default for obvious reasons
|
| 105 |
:param keep_cdata: While parsing the HTML body, drop cdata or not. Disabled by default for cleaner HTML.
|
| 106 |
-
:param adaptive: Globally turn off the
|
| 107 |
-
priority over all
|
| 108 |
-
:param storage: The storage class to be passed for
|
| 109 |
:param storage_args: A dictionary of ``argument->value`` pairs to be passed for the storage class.
|
| 110 |
If empty, default values will be used.
|
| 111 |
"""
|
|
@@ -544,10 +544,10 @@ class Selector(SelectorsGeneration):
|
|
| 544 |
|
| 545 |
:param selector: The CSS3 selector to be used.
|
| 546 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 547 |
-
:param identifier: A string that will be used to save/retrieve element's data in
|
| 548 |
otherwise the selector will be used.
|
| 549 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 550 |
-
:param percentage: The minimum percentage to accept while
|
| 551 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 552 |
number unless you must know what you are doing!
|
| 553 |
"""
|
|
@@ -581,10 +581,10 @@ class Selector(SelectorsGeneration):
|
|
| 581 |
|
| 582 |
:param selector: The XPath selector to be used.
|
| 583 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 584 |
-
:param identifier: A string that will be used to save/retrieve element's data in
|
| 585 |
otherwise the selector will be used.
|
| 586 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 587 |
-
:param percentage: The minimum percentage to accept while
|
| 588 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 589 |
number unless you must know what you are doing!
|
| 590 |
"""
|
|
@@ -617,10 +617,10 @@ class Selector(SelectorsGeneration):
|
|
| 617 |
|
| 618 |
:param selector: The CSS3 selector to be used.
|
| 619 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 620 |
-
:param identifier: A string that will be used to save/retrieve element's data in
|
| 621 |
otherwise the selector will be used.
|
| 622 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 623 |
-
:param percentage: The minimum percentage to accept while
|
| 624 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 625 |
number unless you must know what you are doing!
|
| 626 |
|
|
@@ -681,10 +681,10 @@ class Selector(SelectorsGeneration):
|
|
| 681 |
|
| 682 |
:param selector: The XPath selector to be used.
|
| 683 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 684 |
-
:param identifier: A string that will be used to save/retrieve element's data in
|
| 685 |
otherwise the selector will be used.
|
| 686 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 687 |
-
:param percentage: The minimum percentage to accept while
|
| 688 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 689 |
number unless you must know what you are doing!
|
| 690 |
|
|
@@ -971,7 +971,7 @@ class Selector(SelectorsGeneration):
|
|
| 971 |
self._storage.save(element, identifier)
|
| 972 |
else:
|
| 973 |
log.critical(
|
| 974 |
-
"Can't use
|
| 975 |
)
|
| 976 |
|
| 977 |
def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]:
|
|
@@ -985,7 +985,7 @@ class Selector(SelectorsGeneration):
|
|
| 985 |
return self._storage.retrieve(identifier)
|
| 986 |
|
| 987 |
log.critical(
|
| 988 |
-
"Can't use
|
| 989 |
)
|
| 990 |
return None
|
| 991 |
|
|
@@ -1266,10 +1266,10 @@ class Selectors(List[Selector]):
|
|
| 1266 |
Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
|
| 1267 |
|
| 1268 |
:param selector: The XPath selector to be used.
|
| 1269 |
-
:param identifier: A string that will be used to retrieve element's data in
|
| 1270 |
otherwise the selector will be used.
|
| 1271 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 1272 |
-
:param percentage: The minimum percentage to accept while
|
| 1273 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 1274 |
number unless you must know what you are doing!
|
| 1275 |
|
|
@@ -1299,10 +1299,10 @@ class Selectors(List[Selector]):
|
|
| 1299 |
and want to relocate the same element(s)**
|
| 1300 |
|
| 1301 |
:param selector: The CSS3 selector to be used.
|
| 1302 |
-
:param identifier: A string that will be used to retrieve element's data in
|
| 1303 |
otherwise the selector will be used.
|
| 1304 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 1305 |
-
:param percentage: The minimum percentage to accept while
|
| 1306 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 1307 |
number unless you must know what you are doing!
|
| 1308 |
|
|
|
|
| 103 |
Don't use it unless you know what you are doing!
|
| 104 |
:param keep_comments: While parsing the HTML body, drop comments or not. Disabled by default for obvious reasons
|
| 105 |
:param keep_cdata: While parsing the HTML body, drop cdata or not. Disabled by default for cleaner HTML.
|
| 106 |
+
:param adaptive: Globally turn off the adaptive feature in all functions, this argument takes higher
|
| 107 |
+
priority over all adaptive related arguments/functions in the class.
|
| 108 |
+
:param storage: The storage class to be passed for adaptive functionalities, see ``Docs`` for more info.
|
| 109 |
:param storage_args: A dictionary of ``argument->value`` pairs to be passed for the storage class.
|
| 110 |
If empty, default values will be used.
|
| 111 |
"""
|
|
|
|
| 544 |
|
| 545 |
:param selector: The CSS3 selector to be used.
|
| 546 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 547 |
+
:param identifier: A string that will be used to save/retrieve element's data in adaptive,
|
| 548 |
otherwise the selector will be used.
|
| 549 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 550 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 551 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 552 |
number unless you must know what you are doing!
|
| 553 |
"""
|
|
|
|
| 581 |
|
| 582 |
:param selector: The XPath selector to be used.
|
| 583 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 584 |
+
:param identifier: A string that will be used to save/retrieve element's data in adaptive,
|
| 585 |
otherwise the selector will be used.
|
| 586 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 587 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 588 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 589 |
number unless you must know what you are doing!
|
| 590 |
"""
|
|
|
|
| 617 |
|
| 618 |
:param selector: The CSS3 selector to be used.
|
| 619 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 620 |
+
:param identifier: A string that will be used to save/retrieve element's data in adaptive,
|
| 621 |
otherwise the selector will be used.
|
| 622 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 623 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 624 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 625 |
number unless you must know what you are doing!
|
| 626 |
|
|
|
|
| 681 |
|
| 682 |
:param selector: The XPath selector to be used.
|
| 683 |
:param adaptive: Enabled will make the function try to relocate the element if it was 'saved' before
|
| 684 |
+
:param identifier: A string that will be used to save/retrieve element's data in adaptive,
|
| 685 |
otherwise the selector will be used.
|
| 686 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 687 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 688 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 689 |
number unless you must know what you are doing!
|
| 690 |
|
|
|
|
| 971 |
self._storage.save(element, identifier)
|
| 972 |
else:
|
| 973 |
log.critical(
|
| 974 |
+
"Can't use `adaptive` features while it's disabled globally, you have to start a new class instance."
|
| 975 |
)
|
| 976 |
|
| 977 |
def retrieve(self, identifier: str) -> Optional[Dict[str, Any]]:
|
|
|
|
| 985 |
return self._storage.retrieve(identifier)
|
| 986 |
|
| 987 |
log.critical(
|
| 988 |
+
"Can't use `adaptive` features while it's disabled globally, you have to start a new class instance."
|
| 989 |
)
|
| 990 |
return None
|
| 991 |
|
|
|
|
| 1266 |
Note: **Additional keyword arguments will be passed as XPath variables in the XPath expression!**
|
| 1267 |
|
| 1268 |
:param selector: The XPath selector to be used.
|
| 1269 |
+
:param identifier: A string that will be used to retrieve element's data in adaptive,
|
| 1270 |
otherwise the selector will be used.
|
| 1271 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 1272 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 1273 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 1274 |
number unless you must know what you are doing!
|
| 1275 |
|
|
|
|
| 1299 |
and want to relocate the same element(s)**
|
| 1300 |
|
| 1301 |
:param selector: The CSS3 selector to be used.
|
| 1302 |
+
:param identifier: A string that will be used to retrieve element's data in adaptive,
|
| 1303 |
otherwise the selector will be used.
|
| 1304 |
:param auto_save: Automatically save new elements for `adaptive` later
|
| 1305 |
+
:param percentage: The minimum percentage to accept while `adaptive` is working and not going lower than that.
|
| 1306 |
Be aware that the percentage calculation depends solely on the page structure, so don't play with this
|
| 1307 |
number unless you must know what you are doing!
|
| 1308 |
|
tests/parser/test_adaptive.py
CHANGED
|
@@ -47,7 +47,7 @@ class TestParserAdaptive:
|
|
| 47 |
new_page = Selector(changed_html, url="example.com", adaptive=True)
|
| 48 |
|
| 49 |
# 'p1' was used as ID and now it's not and all the path elements have changes
|
| 50 |
-
# Also at the same time testing
|
| 51 |
_ = old_page.css("#p1, #p2", auto_save=True)[0]
|
| 52 |
relocated = new_page.css("#p1", adaptive=True)
|
| 53 |
|
|
@@ -101,7 +101,7 @@ class TestParserAdaptive:
|
|
| 101 |
new_page = Selector(changed_html, url="example.com", adaptive=True)
|
| 102 |
|
| 103 |
# 'p1' was used as ID and now it's not and all the path elements have changes
|
| 104 |
-
# Also at the same time testing
|
| 105 |
_ = old_page.css("#p1, #p2", auto_save=True)[0]
|
| 106 |
relocated = new_page.css("#p1", adaptive=True)
|
| 107 |
|
|
|
|
| 47 |
new_page = Selector(changed_html, url="example.com", adaptive=True)
|
| 48 |
|
| 49 |
# 'p1' was used as ID and now it's not and all the path elements have changes
|
| 50 |
+
# Also at the same time testing `adaptive` vs combined selectors
|
| 51 |
_ = old_page.css("#p1, #p2", auto_save=True)[0]
|
| 52 |
relocated = new_page.css("#p1", adaptive=True)
|
| 53 |
|
|
|
|
| 101 |
new_page = Selector(changed_html, url="example.com", adaptive=True)
|
| 102 |
|
| 103 |
# 'p1' was used as ID and now it's not and all the path elements have changes
|
| 104 |
+
# Also at the same time testing `adaptive` vs combined selectors
|
| 105 |
_ = old_page.css("#p1, #p2", auto_save=True)[0]
|
| 106 |
relocated = new_page.css("#p1", adaptive=True)
|
| 107 |
|