Karim shoair commited on
Commit
4e9888e
·
1 Parent(s): 9b40891

test: adding new tests and updating existing ones

Browse files
tests/ai/test_ai_mcp.py CHANGED
@@ -1,12 +1,18 @@
1
  import pytest
 
2
  from unittest.mock import Mock, patch
3
 
4
  from scrapling.core.ai import ScraplingMCPServer, ResponseModel
5
 
6
 
 
7
  class TestMCPServer:
8
  """Test MCP server functionality"""
9
 
 
 
 
 
10
  @pytest.fixture
11
  def server(self):
12
  return ScraplingMCPServer()
@@ -16,71 +22,46 @@ class TestMCPServer:
16
  assert server._server is not None
17
  assert server._server.name == "Scrapling"
18
 
19
- def test_get_tool(self):
20
  """Test the get tool method"""
21
- with patch('scrapling.fetchers.Fetcher.get') as mock_get:
22
- mock_response = Mock()
23
- mock_response.status = 200
24
- mock_response.url = "https://example.com"
25
- mock_get.return_value = mock_response
26
-
27
- with patch('scrapling.core.ai.Convertor._extract_content') as mock_extract:
28
- mock_extract.return_value = iter(["Content"])
29
-
30
- result = ScraplingMCPServer.get(
31
- url="https://example.com",
32
- extraction_type="markdown"
33
- )
34
-
35
- assert isinstance(result, ResponseModel)
36
- assert result.status == 200
37
- assert result.url == "https://example.com"
38
 
39
  @pytest.mark.asyncio
40
- async def test_bulk_get_tool(self):
41
  """Test the bulk_get tool method"""
42
- with patch('scrapling.engines.FetcherSession') as mock_session:
43
- mock_instance = Mock()
44
- mock_session.return_value.__aenter__.return_value = mock_instance
45
-
46
- # Mock async get method
47
- async def mock_async_get(*args, **kwargs):
48
- mock_resp = Mock()
49
- mock_resp.status = 200
50
- mock_resp.url = args[0]
51
- return mock_resp
52
-
53
- mock_instance.get = mock_async_get
54
 
55
- with patch('scrapling.core.ai.Convertor._extract_content') as mock_extract:
56
- mock_extract.return_value = iter(["Content"])
57
-
58
- results = await ScraplingMCPServer.bulk_get(
59
- urls=("https://example1.com", "https://example2.com"),
60
- extraction_type="html"
61
- )
62
-
63
- assert len(results) == 2
64
- assert all(isinstance(r, ResponseModel) for r in results)
65
 
66
  @pytest.mark.asyncio
67
- async def test_fetch_tool(self):
68
  """Test the fetch tool method"""
69
- with patch('scrapling.fetchers.DynamicFetcher.async_fetch') as mock_fetch:
70
- mock_response = Mock()
71
- mock_response.status = 200
72
- mock_response.url = "https://example.com"
73
- mock_fetch.return_value = mock_response
74
 
75
- with patch('scrapling.core.ai.Convertor._extract_content') as mock_extract:
76
- mock_extract.return_value = iter(["Content"])
 
 
 
77
 
78
- result = await ScraplingMCPServer.fetch(
79
- url="https://example.com",
80
- headless=True
81
- )
 
 
82
 
83
- assert isinstance(result, ResponseModel)
 
 
 
 
84
 
85
  def test_serve_method(self, server):
86
  """Test the serve method"""
 
1
  import pytest
2
+ import pytest_httpbin
3
  from unittest.mock import Mock, patch
4
 
5
  from scrapling.core.ai import ScraplingMCPServer, ResponseModel
6
 
7
 
8
+ @pytest_httpbin.use_class_based_httpbin
9
  class TestMCPServer:
10
  """Test MCP server functionality"""
11
 
12
+ @pytest.fixture(scope="class")
13
+ def test_url(self, httpbin):
14
+ return f"{httpbin.url}/html"
15
+
16
  @pytest.fixture
17
  def server(self):
18
  return ScraplingMCPServer()
 
22
  assert server._server is not None
23
  assert server._server.name == "Scrapling"
24
 
25
+ def test_get_tool(self, server, test_url):
26
  """Test the get tool method"""
27
+ result = server.get(url=test_url, extraction_type="markdown")
28
+ assert isinstance(result, ResponseModel)
29
+ assert result.status == 200
30
+ assert result.url == test_url
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  @pytest.mark.asyncio
33
+ async def test_bulk_get_tool(self, server, test_url):
34
  """Test the bulk_get tool method"""
35
+ results = await server.bulk_get(urls=(test_url, test_url), extraction_type="html")
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ assert len(results) == 2
38
+ assert all(isinstance(r, ResponseModel) for r in results)
 
 
 
 
 
 
 
 
39
 
40
  @pytest.mark.asyncio
41
+ async def test_fetch_tool(self, server, test_url):
42
  """Test the fetch tool method"""
43
+ result = await server.fetch(url=test_url, headless=True)
44
+ assert isinstance(result, ResponseModel)
45
+ assert result.status == 200
 
 
46
 
47
+ @pytest.mark.asyncio
48
+ async def test_bulk_fetch_tool(self, server, test_url):
49
+ """Test the bulk_fetch tool method"""
50
+ result = await server.bulk_fetch(urls=(test_url, test_url), headless=True)
51
+ assert all(isinstance(r, ResponseModel) for r in result)
52
 
53
+ @pytest.mark.asyncio
54
+ async def test_stealthy_fetch_tool(self, server, test_url):
55
+ """Test the stealthy_fetch tool method"""
56
+ result = await server.stealthy_fetch(url=test_url, headless=True)
57
+ assert isinstance(result, ResponseModel)
58
+ assert result.status == 200
59
 
60
+ @pytest.mark.asyncio
61
+ async def test_bulk_stealthy_fetch_tool(self, server, test_url):
62
+ """Test the bulk_stealthy_fetch tool method"""
63
+ result = await server.bulk_stealthy_fetch(urls=(test_url, test_url), headless=True)
64
+ assert all(isinstance(r, ResponseModel) for r in result)
65
 
66
  def test_serve_method(self, server):
67
  """Test the serve method"""
tests/core/__init__.py ADDED
File without changes
tests/core/test_shell_core.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from scrapling.core.shell import (
4
+ _CookieParser,
5
+ _ParseHeaders,
6
+ Request,
7
+ _known_logging_levels,
8
+ )
9
+
10
+
11
+ class TestCookieParser:
12
+ """Test cookie parsing functionality"""
13
+
14
+ def test_simple_cookie_parsing(self):
15
+ """Test parsing a simple cookie"""
16
+ cookie_string = "session_id=abc123"
17
+ cookies = list(_CookieParser(cookie_string))
18
+ assert len(cookies) == 1
19
+ assert cookies[0] == ("session_id", "abc123")
20
+
21
+ def test_multiple_cookies_parsing(self):
22
+ """Test parsing multiple cookies"""
23
+ cookie_string = "session_id=abc123; theme=dark; lang=en"
24
+ cookies = list(_CookieParser(cookie_string))
25
+ assert len(cookies) == 3
26
+ cookie_dict = dict(cookies)
27
+ assert cookie_dict["session_id"] == "abc123"
28
+ assert cookie_dict["theme"] == "dark"
29
+ assert cookie_dict["lang"] == "en"
30
+
31
+ def test_cookie_with_attributes(self):
32
+ """Test parsing cookies with attributes"""
33
+ cookie_string = "session_id=abc123; Path=/; HttpOnly; Secure"
34
+ cookies = list(_CookieParser(cookie_string))
35
+ assert len(cookies) == 1
36
+ assert cookies[0] == ("session_id", "abc123")
37
+
38
+ def test_empty_cookie_string(self):
39
+ """Test parsing empty cookie string"""
40
+ cookies = list(_CookieParser(""))
41
+ assert len(cookies) == 0
42
+
43
+ def test_malformed_cookie_handling(self):
44
+ """Test handling of malformed cookies"""
45
+ # Should not raise exception but may return an empty list
46
+ cookies = list(_CookieParser("invalid_cookie_format"))
47
+ assert isinstance(cookies, list)
48
+
49
+
50
+ class TestParseHeaders:
51
+ """Test header parsing functionality"""
52
+
53
+ def test_simple_headers(self):
54
+ """Test parsing simple headers"""
55
+ header_lines = [
56
+ "Content-Type: text/html",
57
+ "Content-Length: 1234",
58
+ "User-Agent: TestAgent/1.0"
59
+ ]
60
+ headers, cookies = _ParseHeaders(header_lines)
61
+
62
+ assert headers["Content-Type"] == "text/html"
63
+ assert headers["Content-Length"] == "1234"
64
+ assert headers["User-Agent"] == "TestAgent/1.0"
65
+ assert len(cookies) == 0
66
+
67
+ def test_headers_with_cookies(self):
68
+ """Test parsing headers with cookie headers"""
69
+ header_lines = [
70
+ "Content-Type: text/html",
71
+ "Set-Cookie: session_id=abc123",
72
+ "Set-Cookie: theme=dark; Path=/",
73
+ ]
74
+ headers, cookies = _ParseHeaders(header_lines)
75
+
76
+ assert headers["Content-Type"] == "text/html"
77
+ assert "Set-Cookie" in headers # Should contain the first Set-Cookie
78
+ # Cookie parsing behavior depends on implementation
79
+
80
+ def test_headers_without_colons(self):
81
+ """Test headers without colons"""
82
+ header_lines = [
83
+ "Content-Type: text/html",
84
+ "InvalidHeader;", # Header ending with semicolon
85
+ ]
86
+ headers, cookies = _ParseHeaders(header_lines)
87
+
88
+ assert headers["Content-Type"] == "text/html"
89
+ assert "InvalidHeader" in headers
90
+ assert headers["InvalidHeader"] == ""
91
+
92
+ def test_invalid_header_format(self):
93
+ """Test invalid header format raises error"""
94
+ header_lines = [
95
+ "Content-Type: text/html",
96
+ "InvalidHeaderWithoutColon", # No colon, no semicolon
97
+ ]
98
+
99
+ with pytest.raises(ValueError, match="Could not parse header without colon"):
100
+ _ParseHeaders(header_lines)
101
+
102
+ def test_headers_with_multiple_colons(self):
103
+ """Test headers with multiple colons"""
104
+ header_lines = [
105
+ "Authorization: Bearer: token123",
106
+ "X-Custom: value:with:colons",
107
+ ]
108
+ headers, cookies = _ParseHeaders(header_lines)
109
+
110
+ assert headers["Authorization"] == "Bearer: token123"
111
+ assert headers["X-Custom"] == "value:with:colons"
112
+
113
+ def test_headers_with_whitespace(self):
114
+ """Test headers with extra whitespace"""
115
+ header_lines = [
116
+ " Content-Type : text/html ",
117
+ "\tUser-Agent\t:\tTestAgent/1.0\t",
118
+ ]
119
+ headers, cookies = _ParseHeaders(header_lines)
120
+
121
+ # Should handle whitespace correctly
122
+ assert "Content-Type" in headers or " Content-Type " in headers
123
+ assert "text/html" in str(headers.values()) or " text/html " in str(headers.values())
124
+
125
+ def test_parse_cookies_disabled(self):
126
+ """Test parsing with cookies disabled"""
127
+ header_lines = [
128
+ "Content-Type: text/html",
129
+ "Set-Cookie: session_id=abc123",
130
+ ]
131
+ headers, cookies = _ParseHeaders(header_lines, parse_cookies=False)
132
+
133
+ assert headers["Content-Type"] == "text/html"
134
+ # Cookie parsing behavior when disabled
135
+ assert len(cookies) == 0 or "Set-Cookie" in headers
136
+
137
+ def test_empty_header_lines(self):
138
+ """Test parsing empty header lines"""
139
+ headers, cookies = _ParseHeaders([])
140
+ assert len(headers) == 0
141
+ assert len(cookies) == 0
142
+
143
+
144
+ class TestRequestNamedTuple:
145
+ """Test Request namedtuple functionality"""
146
+
147
+ def test_request_creation(self):
148
+ """Test creating Request namedtuple"""
149
+ request = Request(
150
+ method="GET",
151
+ url="https://example.com",
152
+ params={"q": "test"},
153
+ data=None,
154
+ json_data=None,
155
+ headers={"User-Agent": "Test"},
156
+ cookies={"session": "abc123"},
157
+ proxy=None,
158
+ follow_redirects=True
159
+ )
160
+
161
+ assert request.method == "GET"
162
+ assert request.url == "https://example.com"
163
+ assert request.params == {"q": "test"}
164
+ assert request.headers == {"User-Agent": "Test"}
165
+ assert request.follow_redirects is True
166
+
167
+ def test_request_defaults(self):
168
+ """Test Request with default/None values"""
169
+ request = Request(
170
+ method="POST",
171
+ url="https://api.example.com",
172
+ params=None,
173
+ data='{"key": "value"}',
174
+ json_data={"key": "value"},
175
+ headers={},
176
+ cookies={},
177
+ proxy="http://proxy:8080",
178
+ follow_redirects=False
179
+ )
180
+
181
+ assert request.method == "POST"
182
+ assert request.data == '{"key": "value"}'
183
+ assert request.json_data == {"key": "value"}
184
+ assert request.proxy == "http://proxy:8080"
185
+ assert request.follow_redirects is False
186
+
187
+ def test_request_field_access(self):
188
+ """Test accessing Request fields"""
189
+ request = Request(
190
+ "GET", "https://example.com", {}, None, None, {}, {}, None, True
191
+ )
192
+
193
+ # Test field access by name
194
+ assert hasattr(request, 'method')
195
+ assert hasattr(request, 'url')
196
+ assert hasattr(request, 'params')
197
+ assert hasattr(request, 'data')
198
+ assert hasattr(request, 'json_data')
199
+ assert hasattr(request, 'headers')
200
+ assert hasattr(request, 'cookies')
201
+ assert hasattr(request, 'proxy')
202
+ assert hasattr(request, 'follow_redirects')
203
+
204
+ # Test field access by index
205
+ assert request[0] == "GET"
206
+ assert request[1] == "https://example.com"
207
+
208
+
209
+ class TestLoggingLevels:
210
+ """Test logging level constants"""
211
+
212
+ def test_known_logging_levels(self):
213
+ """Test that all known logging levels are defined"""
214
+ expected_levels = ["debug", "info", "warning", "error", "critical", "fatal"]
215
+
216
+ for level in expected_levels:
217
+ assert level in _known_logging_levels
218
+ assert isinstance(_known_logging_levels[level], int)
219
+
220
+ def test_logging_level_values(self):
221
+ """Test logging level values are correct"""
222
+ from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL
223
+
224
+ assert _known_logging_levels["debug"] == DEBUG
225
+ assert _known_logging_levels["info"] == INFO
226
+ assert _known_logging_levels["warning"] == WARNING
227
+ assert _known_logging_levels["error"] == ERROR
228
+ assert _known_logging_levels["critical"] == CRITICAL
229
+ assert _known_logging_levels["fatal"] == FATAL
230
+
231
+ def test_level_hierarchy(self):
232
+ """Test that logging levels have correct hierarchy"""
233
+ levels = [
234
+ _known_logging_levels["debug"],
235
+ _known_logging_levels["info"],
236
+ _known_logging_levels["warning"],
237
+ _known_logging_levels["error"],
238
+ _known_logging_levels["critical"],
239
+ ]
240
+
241
+ # Levels should be in ascending order
242
+ for i in range(len(levels) - 1):
243
+ assert levels[i] < levels[i + 1]
tests/core/test_storage_core.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import os
3
+
4
+ from scrapling.core.storage import SQLiteStorageSystem
5
+
6
+
7
+ class TestSQLiteStorageSystem:
8
+ """Test SQLiteStorageSystem functionality"""
9
+
10
+ def test_sqlite_storage_creation(self):
11
+ """Test SQLite storage system creation"""
12
+ # Use an in-memory database for testing
13
+ storage = SQLiteStorageSystem(storage_file=":memory:")
14
+ assert storage is not None
15
+
16
+ def test_sqlite_storage_with_file(self):
17
+ """Test SQLite storage with an actual file"""
18
+ with tempfile.NamedTemporaryFile(suffix='.db', delete=False) as tmp_file:
19
+ db_path = tmp_file.name
20
+
21
+ try:
22
+ storage = SQLiteStorageSystem(storage_file=db_path)
23
+ assert storage is not None
24
+ assert os.path.exists(db_path)
25
+ finally:
26
+ if os.path.exists(db_path):
27
+ os.unlink(db_path)
28
+
29
+ def test_sqlite_storage_initialization_args(self):
30
+ """Test SQLite storage with various initialization arguments"""
31
+ # Test with URL parameter
32
+ storage = SQLiteStorageSystem(
33
+ storage_file=":memory:",
34
+ url="https://example.com"
35
+ )
36
+ assert storage is not None
37
+ assert storage.url == "https://example.com"
tests/fetchers/async/test_camoufox.py CHANGED
@@ -24,8 +24,13 @@ class TestStealthyFetcher:
24
  "html_url": f"{url}/html",
25
  "delayed_url": f"{url}/delay/10", # 10 Seconds delay response
26
  "cookies_url": f"{url}/cookies/set/test/value",
 
27
  }
28
 
 
 
 
 
29
  async def test_basic_fetch(self, fetcher, urls):
30
  """Test doing a basic fetch request with multiple statuses"""
31
  assert (await fetcher.async_fetch(urls["status_200"])).status == 200
@@ -63,7 +68,7 @@ class TestStealthyFetcher:
63
  {
64
  "network_idle": True,
65
  "wait": 10,
66
- "cookies": [],
67
  "google_search": True,
68
  "extra_headers": {"ayo": ""},
69
  "os_randomize": True,
 
24
  "html_url": f"{url}/html",
25
  "delayed_url": f"{url}/delay/10", # 10 Seconds delay response
26
  "cookies_url": f"{url}/cookies/set/test/value",
27
+ "cloudflare_url": "https://nopecha.com/demo/cloudflare", # Interactive turnstile page
28
  }
29
 
30
+ async def test_cloudflare_fetch(self, fetcher, urls):
31
+ """Test if Cloudflare bypass is working"""
32
+ assert (await fetcher.async_fetch(urls["cloudflare_url"], solve_cloudflare=True)).status == 200
33
+
34
  async def test_basic_fetch(self, fetcher, urls):
35
  """Test doing a basic fetch request with multiple statuses"""
36
  assert (await fetcher.async_fetch(urls["status_200"])).status == 200
 
68
  {
69
  "network_idle": True,
70
  "wait": 10,
71
+ "cookies": [{"name": "test", "value": "123", "domain": "example.com", "path": "/"}],
72
  "google_search": True,
73
  "extra_headers": {"ayo": ""},
74
  "os_randomize": True,
tests/fetchers/async/test_dynamic.py CHANGED
@@ -65,7 +65,7 @@ class TestDynamicFetcherAsync:
65
  "locale": "en-US",
66
  "extra_headers": {"ayo": ""},
67
  "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0",
68
- "cookies": [],
69
  "network_idle": True,
70
  "custom_config": {"keep_comments": False, "keep_cdata": False},
71
  },
 
65
  "locale": "en-US",
66
  "extra_headers": {"ayo": ""},
67
  "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0",
68
+ "cookies": [{"name": "test", "value": "123", "domain": "example.com", "path": "/"}],
69
  "network_idle": True,
70
  "custom_config": {"keep_comments": False, "keep_cdata": False},
71
  },
tests/fetchers/sync/test_camoufox.py CHANGED
@@ -2,7 +2,6 @@ import pytest
2
  import pytest_httpbin
3
 
4
  from scrapling import StealthyFetcher
5
-
6
  StealthyFetcher.adaptive = True
7
 
8
 
@@ -23,6 +22,11 @@ class TestStealthyFetcher:
23
  self.html_url = f"{httpbin.url}/html"
24
  self.delayed_url = f"{httpbin.url}/delay/10" # 10 Seconds delay response
25
  self.cookies_url = f"{httpbin.url}/cookies/set/test/value"
 
 
 
 
 
26
 
27
  def test_basic_fetch(self, fetcher):
28
  """Test doing a basic fetch request with multiple statuses"""
@@ -60,7 +64,7 @@ class TestStealthyFetcher:
60
  "network_idle": True,
61
  "wait": 10,
62
  "timeout": 30_000,
63
- "cookies": [],
64
  "google_search": True,
65
  "extra_headers": {"ayo": ""},
66
  "os_randomize": True,
 
2
  import pytest_httpbin
3
 
4
  from scrapling import StealthyFetcher
 
5
  StealthyFetcher.adaptive = True
6
 
7
 
 
22
  self.html_url = f"{httpbin.url}/html"
23
  self.delayed_url = f"{httpbin.url}/delay/10" # 10 Seconds delay response
24
  self.cookies_url = f"{httpbin.url}/cookies/set/test/value"
25
+ self.cloudflare_url = "https://nopecha.com/demo/cloudflare" # Interactive turnstile page
26
+
27
+ def test_cloudflare_fetch(self, fetcher):
28
+ """Test if Cloudflare bypass is working"""
29
+ assert fetcher.fetch(self.cloudflare_url, solve_cloudflare=True).status == 200
30
 
31
  def test_basic_fetch(self, fetcher):
32
  """Test doing a basic fetch request with multiple statuses"""
 
64
  "network_idle": True,
65
  "wait": 10,
66
  "timeout": 30_000,
67
+ "cookies": [{"name": "test", "value": "123", "domain": "example.com", "path": "/"}],
68
  "google_search": True,
69
  "extra_headers": {"ayo": ""},
70
  "os_randomize": True,
tests/fetchers/sync/test_camoufox_session.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pytest
3
+ import pytest_httpbin
4
+
5
+ from scrapling.engines._browsers._camoufox import StealthySession, __CF_PATTERN__
6
+
7
+
8
+ class TestCamoufoxConstants:
9
+ """Test Camoufox constants and patterns"""
10
+
11
+ def test_cf_pattern_regex(self):
12
+ """Test __CF_PATTERN__ regex compilation"""
13
+
14
+ assert isinstance(__CF_PATTERN__, re.Pattern)
15
+
16
+ # Test matching URLs
17
+ test_urls = [
18
+ "https://challenges.cloudflare.com/cdn-cgi/challenge-platform/h/123456",
19
+ "https://challenges.cloudflare.com/cdn-cgi/challenge-platform/orchestrate/jsch/v1",
20
+ "http://challenges.cloudflare.com/cdn-cgi/challenge-platform/scripts/abc"
21
+ ]
22
+
23
+ for url in test_urls:
24
+ assert __CF_PATTERN__.search(url) is not None
25
+
26
+ # Test non-matching URLs
27
+ non_matching_urls = [
28
+ "https://example.com/challenge",
29
+ "https://cloudflare.com/something",
30
+ "https://challenges.cloudflare.com/other-path"
31
+ ]
32
+
33
+ for url in non_matching_urls:
34
+ assert __CF_PATTERN__.search(url) is None
35
+
36
+
37
+ @pytest_httpbin.use_class_based_httpbin
38
+ class TestStealthySession:
39
+
40
+ """All the code is tested in the async version tests, so no need to repeat it here. The async class inherits from this one."""
41
+ @pytest.fixture(autouse=True)
42
+ def setup_urls(self, httpbin):
43
+ """Fixture to set up URLs for testing"""
44
+ self.status_200 = f"{httpbin.url}/status/200"
45
+ self.status_404 = f"{httpbin.url}/status/404"
46
+ self.status_501 = f"{httpbin.url}/status/501"
47
+ self.basic_url = f"{httpbin.url}/get"
48
+ self.html_url = f"{httpbin.url}/html"
49
+ self.delayed_url = f"{httpbin.url}/delay/10" # 10 Seconds delay response
50
+ self.cookies_url = f"{httpbin.url}/cookies/set/test/value"
51
+
52
+ def test_session_creation(self):
53
+ """Test if the session is created correctly"""
54
+
55
+ with StealthySession(
56
+ max_pages=3,
57
+ headless=True,
58
+ block_images=True,
59
+ disable_resources=True,
60
+ solve_cloudflare=True,
61
+ wait=1000,
62
+ timeout=60000,
63
+ cookies=[{"name": "test", "value": "123", "domain": "example.com", "path": "/"}],
64
+ ) as session:
65
+
66
+ assert session.max_pages == 3
67
+ assert session.headless is True
68
+ assert session.block_images is True
69
+ assert session.disable_resources is True
70
+ assert session.solve_cloudflare is True
71
+ assert session.wait == 1000
72
+ assert session.timeout == 60000
73
+ assert session.context is not None
74
+
75
+ # Test Cloudflare detection
76
+ for cloudflare_type in ('managed', 'interactive', 'non-interactive'):
77
+ page_content = f"""
78
+ <html>
79
+ <script>
80
+ cType: '{cloudflare_type}'
81
+ </script>
82
+ </html>
83
+ """
84
+ result = session._detect_cloudflare(page_content)
85
+ assert result == cloudflare_type
86
+
87
+ page_content = """
88
+ <html>
89
+ <body>
90
+ <p>Regular page content</p>
91
+ </body>
92
+ </html>
93
+ """
94
+
95
+ result = StealthySession._detect_cloudflare(page_content)
96
+ assert result is None
97
+ assert session.fetch(self.status_200).status == 200
tests/fetchers/sync/test_dynamic.py CHANGED
@@ -63,7 +63,7 @@ class TestDynamicFetcher:
63
  "locale": "en-US",
64
  "extra_headers": {"ayo": ""},
65
  "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0",
66
- "cookies": [],
67
  "network_idle": True,
68
  "custom_config": {"keep_comments": False, "keep_cdata": False},
69
  },
 
63
  "locale": "en-US",
64
  "extra_headers": {"ayo": ""},
65
  "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0",
66
+ "cookies": [{"name": "test", "value": "123", "domain": "example.com", "path": "/"}],
67
  "network_idle": True,
68
  "custom_config": {"keep_comments": False, "keep_cdata": False},
69
  },
tests/parser/test_general.py CHANGED
@@ -221,7 +221,7 @@ class TestElementNavigation:
221
  """Test parent and sibling navigation"""
222
  table = page.css(".product-list")[0]
223
  parent = table.parent
224
- assert parent.attrib["id"] == "products"
225
 
226
  parent_siblings = parent.siblings
227
  assert len(parent_siblings) == 1
@@ -267,7 +267,7 @@ class TestJSONAndAttributes:
267
  products = page.css(".product")
268
  product_ids = [product.attrib["data-id"] for product in products]
269
  assert product_ids == ["1", "2", "3"]
270
- assert "data-id" in products[0].attrib
271
 
272
  # Review rating calculations
273
  reviews = page.css(".review")
@@ -316,7 +316,9 @@ def test_selectors_generation(page):
316
 
317
  def _traverse(element: Selector):
318
  assert isinstance(element.generate_css_selector, str)
 
319
  assert isinstance(element.generate_xpath_selector, str)
 
320
  for branch in element.children:
321
  _traverse(branch)
322
 
 
221
  """Test parent and sibling navigation"""
222
  table = page.css(".product-list")[0]
223
  parent = table.parent
224
+ assert parent["id"] == "products"
225
 
226
  parent_siblings = parent.siblings
227
  assert len(parent_siblings) == 1
 
267
  products = page.css(".product")
268
  product_ids = [product.attrib["data-id"] for product in products]
269
  assert product_ids == ["1", "2", "3"]
270
+ assert "data-id" in products[0]
271
 
272
  # Review rating calculations
273
  reviews = page.css(".review")
 
316
 
317
  def _traverse(element: Selector):
318
  assert isinstance(element.generate_css_selector, str)
319
+ assert isinstance(element.generate_full_css_selector, str)
320
  assert isinstance(element.generate_xpath_selector, str)
321
+ assert isinstance(element.generate_full_xpath_selector, str)
322
  for branch in element.children:
323
  _traverse(branch)
324
 
tests/parser/test_parser_advanced.py CHANGED
@@ -1,8 +1,58 @@
1
  import re
2
  import pytest
 
3
 
4
  from scrapling import Selector, Selectors
5
  from scrapling.core.custom_types import TextHandler, TextHandlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  class TestAdvancedSelectors:
 
1
  import re
2
  import pytest
3
+ from unittest.mock import Mock
4
 
5
  from scrapling import Selector, Selectors
6
  from scrapling.core.custom_types import TextHandler, TextHandlers
7
+ from scrapling.core.storage import SQLiteStorageSystem
8
+
9
+
10
+ class TestSelectorAdvancedFeatures:
11
+ """Test advanced Selector features like adaptive matching"""
12
+
13
+ def test_adaptive_initialization_with_storage(self):
14
+ """Test adaptive initialization with custom storage"""
15
+ html = "<html><body><p>Test</p></body></html>"
16
+
17
+ # Use the actual SQLiteStorageSystem for this test
18
+ selector = Selector(
19
+ content=html,
20
+ adaptive=True,
21
+ storage=SQLiteStorageSystem,
22
+ storage_args={"storage_file": ":memory:", "url": "https://example.com"}
23
+ )
24
+
25
+ assert selector._Selector__adaptive_enabled is True
26
+ assert selector._storage is not None
27
+
28
+ def test_adaptive_initialization_with_default_storage_args(self):
29
+ """Test adaptive initialization with default storage args"""
30
+ html = "<html><body><p>Test</p></body></html>"
31
+ url = "https://example.com"
32
+
33
+ # Test that adaptive mode uses default storage when no explicit args provided
34
+ selector = Selector(
35
+ content=html,
36
+ url=url,
37
+ adaptive=True
38
+ )
39
+
40
+ # Should create storage with default args
41
+ assert selector._storage is not None
42
+
43
+ def test_adaptive_with_existing_storage(self):
44
+ """Test adaptive initialization with existing storage object"""
45
+ html = "<html><body><p>Test</p></body></html>"
46
+
47
+ mock_storage = Mock()
48
+
49
+ selector = Selector(
50
+ content=html,
51
+ adaptive=True,
52
+ _storage=mock_storage
53
+ )
54
+
55
+ assert selector._storage is mock_storage
56
 
57
 
58
  class TestAdvancedSelectors: