File size: 4,900 Bytes
488acaf 77fe3eb 488acaf 77fe3eb 8e67a4c 77fe3eb 8e67a4c 77fe3eb fcedcce 77fe3eb fcedcce 77fe3eb fcedcce 77fe3eb 8e67a4c 77fe3eb 4893321 fcedcce 8e67a4c 77fe3eb 488acaf fcedcce 488acaf fcedcce 488acaf fcedcce 488acaf fcedcce 488acaf 8e67a4c 488acaf 4893321 fcedcce 8e67a4c 488acaf fcedcce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | import asyncio
import pytest
from scrapling import Selector
class TestParserAdaptive:
def test_element_relocation(self):
"""Test relocating element after structure change"""
original_html = """
<div class="container">
<section class="products">
<article class="product" id="p1">
<h3>Product 1</h3>
<p class="description">Description 1</p>
</article>
<article class="product" id="p2">
<h3>Product 2</h3>
<p class="description">Description 2</p>
</article>
</section>
</div>
"""
changed_html = """
<div class="new-container">
<div class="product-wrapper">
<section class="products">
<article class="product new-class" data-id="p1">
<div class="product-info">
<h3>Product 1</h3>
<p class="new-description">Description 1</p>
</div>
</article>
<article class="product new-class" data-id="p2">
<div class="product-info">
<h3>Product 2</h3>
<p class="new-description">Description 2</p>
</div>
</article>
</section>
</div>
</div>
"""
old_page = Selector(original_html, url="example.com", adaptive=True)
new_page = Selector(changed_html, url="example.com", adaptive=True)
# 'p1' was used as ID and now it's not and all the path elements have changes
# Also at the same time testing `adaptive` vs combined selectors
_ = old_page.css("#p1, #p2", auto_save=True)[0]
relocated = new_page.css("#p1", adaptive=True)
assert relocated is not None
assert relocated[0].attrib["data-id"] == "p1"
assert relocated[0].has_class("new-class")
assert relocated[0].css(".new-description")[0].text == "Description 1"
@pytest.mark.asyncio
async def test_element_relocation_async(self):
"""Test relocating element after structure change in async mode"""
original_html = """
<div class="container">
<section class="products">
<article class="product" id="p1">
<h3>Product 1</h3>
<p class="description">Description 1</p>
</article>
<article class="product" id="p2">
<h3>Product 2</h3>
<p class="description">Description 2</p>
</article>
</section>
</div>
"""
changed_html = """
<div class="new-container">
<div class="product-wrapper">
<section class="products">
<article class="product new-class" data-id="p1">
<div class="product-info">
<h3>Product 1</h3>
<p class="new-description">Description 1</p>
</div>
</article>
<article class="product new-class" data-id="p2">
<div class="product-info">
<h3>Product 2</h3>
<p class="new-description">Description 2</p>
</div>
</article>
</section>
</div>
</div>
"""
# Simulate async operation
await asyncio.sleep(0.1) # Minimal async operation
old_page = Selector(original_html, url="example.com", adaptive=True)
new_page = Selector(changed_html, url="example.com", adaptive=True)
# 'p1' was used as ID and now it's not and all the path elements have changes
# Also at the same time testing `adaptive` vs combined selectors
_ = old_page.css("#p1, #p2", auto_save=True)[0]
relocated = new_page.css("#p1", adaptive=True)
assert relocated is not None
assert relocated[0].attrib["data-id"] == "p1"
assert relocated[0].has_class("new-class")
assert relocated[0].css(".new-description")[0].text == "Description 1"
|