Karim shoair commited on
Commit ·
488acaf
1
Parent(s): 5d8eb71
test: Rewrite automatch tests to a cleaner version and adding async test
Browse files
tests/parser/test_automatch.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
-
import
|
| 2 |
|
| 3 |
-
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
-
class TestParserAutoMatch(unittest.TestCase):
|
| 7 |
|
|
|
|
| 8 |
def test_element_relocation(self):
|
| 9 |
"""Test relocating element after structure change"""
|
| 10 |
original_html = '''
|
|
@@ -50,7 +51,61 @@ class TestParserAutoMatch(unittest.TestCase):
|
|
| 50 |
_ = old_page.css('#p1, #p2', auto_save=True)[0]
|
| 51 |
relocated = new_page.css('#p1', auto_match=True)
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
|
| 3 |
+
import pytest
|
| 4 |
|
| 5 |
+
from scrapling import Adaptor
|
| 6 |
|
|
|
|
| 7 |
|
| 8 |
+
class TestParserAutoMatch:
|
| 9 |
def test_element_relocation(self):
|
| 10 |
"""Test relocating element after structure change"""
|
| 11 |
original_html = '''
|
|
|
|
| 51 |
_ = old_page.css('#p1, #p2', auto_save=True)[0]
|
| 52 |
relocated = new_page.css('#p1', auto_match=True)
|
| 53 |
|
| 54 |
+
assert relocated is not None
|
| 55 |
+
assert relocated[0].attrib['data-id'] == 'p1'
|
| 56 |
+
assert relocated[0].has_class('new-class')
|
| 57 |
+
assert relocated[0].css('.new-description')[0].text == 'Description 1'
|
| 58 |
+
|
| 59 |
+
@pytest.mark.asyncio
|
| 60 |
+
async def test_element_relocation_async(self):
|
| 61 |
+
"""Test relocating element after structure change in async mode"""
|
| 62 |
+
original_html = '''
|
| 63 |
+
<div class="container">
|
| 64 |
+
<section class="products">
|
| 65 |
+
<article class="product" id="p1">
|
| 66 |
+
<h3>Product 1</h3>
|
| 67 |
+
<p class="description">Description 1</p>
|
| 68 |
+
</article>
|
| 69 |
+
<article class="product" id="p2">
|
| 70 |
+
<h3>Product 2</h3>
|
| 71 |
+
<p class="description">Description 2</p>
|
| 72 |
+
</article>
|
| 73 |
+
</section>
|
| 74 |
+
</div>
|
| 75 |
+
'''
|
| 76 |
+
changed_html = '''
|
| 77 |
+
<div class="new-container">
|
| 78 |
+
<div class="product-wrapper">
|
| 79 |
+
<section class="products">
|
| 80 |
+
<article class="product new-class" data-id="p1">
|
| 81 |
+
<div class="product-info">
|
| 82 |
+
<h3>Product 1</h3>
|
| 83 |
+
<p class="new-description">Description 1</p>
|
| 84 |
+
</div>
|
| 85 |
+
</article>
|
| 86 |
+
<article class="product new-class" data-id="p2">
|
| 87 |
+
<div class="product-info">
|
| 88 |
+
<h3>Product 2</h3>
|
| 89 |
+
<p class="new-description">Description 2</p>
|
| 90 |
+
</div>
|
| 91 |
+
</article>
|
| 92 |
+
</section>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
'''
|
| 96 |
+
|
| 97 |
+
# Simulate async operation
|
| 98 |
+
await asyncio.sleep(0.1) # Minimal async operation
|
| 99 |
+
|
| 100 |
+
old_page = Adaptor(original_html, url='example.com', auto_match=True)
|
| 101 |
+
new_page = Adaptor(changed_html, url='example.com', auto_match=True)
|
| 102 |
+
|
| 103 |
+
# 'p1' was used as ID and now it's not and all the path elements have changes
|
| 104 |
+
# Also at the same time testing auto-match vs combined selectors
|
| 105 |
+
_ = old_page.css('#p1, #p2', auto_save=True)[0]
|
| 106 |
+
relocated = new_page.css('#p1', auto_match=True)
|
| 107 |
+
|
| 108 |
+
assert relocated is not None
|
| 109 |
+
assert relocated[0].attrib['data-id'] == 'p1'
|
| 110 |
+
assert relocated[0].has_class('new-class')
|
| 111 |
+
assert relocated[0].css('.new-description')[0].text == 'Description 1'
|