Karim shoair commited on
Commit
488acaf
·
1 Parent(s): 5d8eb71

test: Rewrite automatch tests to a cleaner version and adding async test

Browse files
Files changed (1) hide show
  1. tests/parser/test_automatch.py +62 -7
tests/parser/test_automatch.py CHANGED
@@ -1,10 +1,11 @@
1
- import unittest
2
 
3
- from scrapling import Adaptor
4
 
 
5
 
6
- class TestParserAutoMatch(unittest.TestCase):
7
 
 
8
  def test_element_relocation(self):
9
  """Test relocating element after structure change"""
10
  original_html = '''
@@ -50,7 +51,61 @@ class TestParserAutoMatch(unittest.TestCase):
50
  _ = old_page.css('#p1, #p2', auto_save=True)[0]
51
  relocated = new_page.css('#p1', auto_match=True)
52
 
53
- self.assertIsNotNone(relocated)
54
- self.assertEqual(relocated[0].attrib['data-id'], 'p1')
55
- self.assertTrue(relocated[0].has_class('new-class'))
56
- self.assertEqual(relocated[0].css('.new-description')[0].text, 'Description 1')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
 
3
+ import pytest
4
 
5
+ from scrapling import Adaptor
6
 
 
7
 
8
+ class TestParserAutoMatch:
9
  def test_element_relocation(self):
10
  """Test relocating element after structure change"""
11
  original_html = '''
 
51
  _ = old_page.css('#p1, #p2', auto_save=True)[0]
52
  relocated = new_page.css('#p1', auto_match=True)
53
 
54
+ assert relocated is not None
55
+ assert relocated[0].attrib['data-id'] == 'p1'
56
+ assert relocated[0].has_class('new-class')
57
+ assert relocated[0].css('.new-description')[0].text == 'Description 1'
58
+
59
+ @pytest.mark.asyncio
60
+ async def test_element_relocation_async(self):
61
+ """Test relocating element after structure change in async mode"""
62
+ original_html = '''
63
+ <div class="container">
64
+ <section class="products">
65
+ <article class="product" id="p1">
66
+ <h3>Product 1</h3>
67
+ <p class="description">Description 1</p>
68
+ </article>
69
+ <article class="product" id="p2">
70
+ <h3>Product 2</h3>
71
+ <p class="description">Description 2</p>
72
+ </article>
73
+ </section>
74
+ </div>
75
+ '''
76
+ changed_html = '''
77
+ <div class="new-container">
78
+ <div class="product-wrapper">
79
+ <section class="products">
80
+ <article class="product new-class" data-id="p1">
81
+ <div class="product-info">
82
+ <h3>Product 1</h3>
83
+ <p class="new-description">Description 1</p>
84
+ </div>
85
+ </article>
86
+ <article class="product new-class" data-id="p2">
87
+ <div class="product-info">
88
+ <h3>Product 2</h3>
89
+ <p class="new-description">Description 2</p>
90
+ </div>
91
+ </article>
92
+ </section>
93
+ </div>
94
+ </div>
95
+ '''
96
+
97
+ # Simulate async operation
98
+ await asyncio.sleep(0.1) # Minimal async operation
99
+
100
+ old_page = Adaptor(original_html, url='example.com', auto_match=True)
101
+ new_page = Adaptor(changed_html, url='example.com', auto_match=True)
102
+
103
+ # 'p1' was used as ID and now it's not and all the path elements have changes
104
+ # Also at the same time testing auto-match vs combined selectors
105
+ _ = old_page.css('#p1, #p2', auto_save=True)[0]
106
+ relocated = new_page.css('#p1', auto_match=True)
107
+
108
+ assert relocated is not None
109
+ assert relocated[0].attrib['data-id'] == 'p1'
110
+ assert relocated[0].has_class('new-class')
111
+ assert relocated[0].css('.new-description')[0].text == 'Description 1'