Karim shoair commited on
Commit ·
88842b1
1
Parent(s): 76ae95e
tests: Multiple changes to tests
Browse files- Remove all tests for the old encoding logic
- Stop using the `nopecha` test page to test Cloudflare solver
- Remove useless tests like testing for infinite timeout
- Fixes to make the code compatible with new changes
tests/cli/test_cli.py
CHANGED
|
@@ -14,6 +14,7 @@ def configure_selector_mock():
|
|
| 14 |
"""Helper function to create a properly configured Selector mock"""
|
| 15 |
mock_response = MagicMock(spec=Selector)
|
| 16 |
mock_response.body = "<html><body>Test content</body></html>"
|
|
|
|
| 17 |
mock_response.get_all_text.return_value = "Test content"
|
| 18 |
mock_response.css_first.return_value = mock_response
|
| 19 |
mock_response.css.return_value = [mock_response]
|
|
|
|
| 14 |
"""Helper function to create a properly configured Selector mock"""
|
| 15 |
mock_response = MagicMock(spec=Selector)
|
| 16 |
mock_response.body = "<html><body>Test content</body></html>"
|
| 17 |
+
mock_response.encoding = "utf-8"
|
| 18 |
mock_response.get_all_text.return_value = "Test content"
|
| 19 |
mock_response.css_first.return_value = mock_response
|
| 20 |
mock_response.css.return_value = [mock_response]
|
tests/fetchers/async/test_camoufox.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import pytest
|
| 2 |
import pytest_httpbin
|
| 3 |
|
|
@@ -23,14 +24,9 @@ class TestStealthyFetcher:
|
|
| 23 |
"basic_url": f"{url}/get",
|
| 24 |
"html_url": f"{url}/html",
|
| 25 |
"delayed_url": f"{url}/delay/10", # 10 Seconds delay response
|
| 26 |
-
"cookies_url": f"{url}/cookies/set/test/value"
|
| 27 |
-
"cloudflare_url": "https://nopecha.com/demo/cloudflare", # Interactive turnstile page
|
| 28 |
}
|
| 29 |
|
| 30 |
-
async def test_cloudflare_fetch(self, fetcher, urls):
|
| 31 |
-
"""Test if Cloudflare bypass is working"""
|
| 32 |
-
assert (await fetcher.async_fetch(urls["cloudflare_url"], solve_cloudflare=True)).status == 200
|
| 33 |
-
|
| 34 |
async def test_basic_fetch(self, fetcher, urls):
|
| 35 |
"""Test doing a basic fetch request with multiple statuses"""
|
| 36 |
assert (await fetcher.async_fetch(urls["status_200"])).status == 200
|
|
@@ -86,9 +82,3 @@ class TestStealthyFetcher:
|
|
| 86 |
**kwargs
|
| 87 |
)
|
| 88 |
assert response.status == 200
|
| 89 |
-
|
| 90 |
-
async def test_infinite_timeout(self, fetcher, urls):
|
| 91 |
-
"""Test if infinite timeout breaks the code or not"""
|
| 92 |
-
assert (
|
| 93 |
-
await fetcher.async_fetch(urls["delayed_url"], timeout=0)
|
| 94 |
-
).status == 200
|
|
|
|
| 1 |
+
from playwright._impl._errors import TimeoutError
|
| 2 |
import pytest
|
| 3 |
import pytest_httpbin
|
| 4 |
|
|
|
|
| 24 |
"basic_url": f"{url}/get",
|
| 25 |
"html_url": f"{url}/html",
|
| 26 |
"delayed_url": f"{url}/delay/10", # 10 Seconds delay response
|
| 27 |
+
"cookies_url": f"{url}/cookies/set/test/value"
|
|
|
|
| 28 |
}
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
async def test_basic_fetch(self, fetcher, urls):
|
| 31 |
"""Test doing a basic fetch request with multiple statuses"""
|
| 32 |
assert (await fetcher.async_fetch(urls["status_200"])).status == 200
|
|
|
|
| 82 |
**kwargs
|
| 83 |
)
|
| 84 |
assert response.status == 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/fetchers/async/test_dynamic.py
CHANGED
|
@@ -90,9 +90,3 @@ class TestDynamicFetcherAsync:
|
|
| 90 |
|
| 91 |
with pytest.raises(Exception):
|
| 92 |
await fetcher.async_fetch(urls["html_url"], cdp_url="ws://blahblah")
|
| 93 |
-
|
| 94 |
-
@pytest.mark.asyncio
|
| 95 |
-
async def test_infinite_timeout(self, fetcher, urls):
|
| 96 |
-
"""Test if infinite timeout breaks the code or not"""
|
| 97 |
-
response = await fetcher.async_fetch(urls["delayed_url"], timeout=0)
|
| 98 |
-
assert response.status == 200
|
|
|
|
| 90 |
|
| 91 |
with pytest.raises(Exception):
|
| 92 |
await fetcher.async_fetch(urls["html_url"], cdp_url="ws://blahblah")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/fetchers/sync/test_camoufox.py
CHANGED
|
@@ -77,7 +77,3 @@ class TestStealthyFetcher:
|
|
| 77 |
**kwargs
|
| 78 |
)
|
| 79 |
assert response.status == 200
|
| 80 |
-
|
| 81 |
-
def test_infinite_timeout(self, fetcher):
|
| 82 |
-
"""Test if infinite timeout breaks the code or not"""
|
| 83 |
-
assert fetcher.fetch(self.delayed_url, timeout=0).status == 200
|
|
|
|
| 77 |
**kwargs
|
| 78 |
)
|
| 79 |
assert response.status == 200
|
|
|
|
|
|
|
|
|
|
|
|
tests/fetchers/test_response_handling.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from unittest.mock import Mock
|
| 2 |
|
| 3 |
from scrapling.parser import Selector
|
| 4 |
-
from scrapling.engines.toolbelt.custom import ResponseEncoding
|
| 5 |
from scrapling.engines.toolbelt.convertor import ResponseFactory, Response
|
| 6 |
|
| 7 |
|
|
@@ -32,20 +31,6 @@ class TestResponseFactory:
|
|
| 32 |
assert response.url == "https://example.com"
|
| 33 |
assert isinstance(response, Response)
|
| 34 |
|
| 35 |
-
def test_response_encoding_edge_cases(self):
|
| 36 |
-
"""Test response encoding handling"""
|
| 37 |
-
# Test various content types
|
| 38 |
-
test_cases = [
|
| 39 |
-
(None, "utf-8"),
|
| 40 |
-
("", "utf-8"),
|
| 41 |
-
("text/html; charset=invalid", "utf-8"),
|
| 42 |
-
("application/octet-stream", "utf-8"),
|
| 43 |
-
]
|
| 44 |
-
|
| 45 |
-
for content_type, expected in test_cases:
|
| 46 |
-
encoding = ResponseEncoding.get_value(content_type)
|
| 47 |
-
assert encoding == expected
|
| 48 |
-
|
| 49 |
def test_response_history_processing(self):
|
| 50 |
"""Test processing response history"""
|
| 51 |
# Mock responses with redirects
|
|
|
|
| 1 |
from unittest.mock import Mock
|
| 2 |
|
| 3 |
from scrapling.parser import Selector
|
|
|
|
| 4 |
from scrapling.engines.toolbelt.convertor import ResponseFactory, Response
|
| 5 |
|
| 6 |
|
|
|
|
| 31 |
assert response.url == "https://example.com"
|
| 32 |
assert isinstance(response, Response)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def test_response_history_processing(self):
|
| 35 |
"""Test processing response history"""
|
| 36 |
# Mock responses with redirects
|
tests/fetchers/test_utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import pytest
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
-
from scrapling.engines.toolbelt.custom import
|
| 5 |
from scrapling.engines.toolbelt.navigation import (
|
| 6 |
construct_proxy_dict,
|
| 7 |
js_bypass_path
|
|
@@ -131,12 +131,6 @@ def status_map():
|
|
| 131 |
}
|
| 132 |
|
| 133 |
|
| 134 |
-
def test_parsing_content_type(content_type_map):
|
| 135 |
-
"""Test if parsing different types of 'content-type' returns the expected result"""
|
| 136 |
-
for header_value, expected_encoding in content_type_map.items():
|
| 137 |
-
assert ResponseEncoding.get_value(header_value) == expected_encoding
|
| 138 |
-
|
| 139 |
-
|
| 140 |
def test_parsing_response_status(status_map):
|
| 141 |
"""Test if using different http responses' status codes returns the expected result"""
|
| 142 |
for status_code, expected_status_text in status_map.items():
|
|
|
|
| 1 |
import pytest
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
+
from scrapling.engines.toolbelt.custom import StatusText, Response
|
| 5 |
from scrapling.engines.toolbelt.navigation import (
|
| 6 |
construct_proxy_dict,
|
| 7 |
js_bypass_path
|
|
|
|
| 131 |
}
|
| 132 |
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def test_parsing_response_status(status_map):
|
| 135 |
"""Test if using different http responses' status codes returns the expected result"""
|
| 136 |
for status_code, expected_status_text in status_map.items():
|
tests/parser/test_parser_advanced.py
CHANGED
|
@@ -99,7 +99,7 @@ class TestAdvancedSelectors:
|
|
| 99 |
keep_comments=False,
|
| 100 |
keep_cdata=False
|
| 101 |
)
|
| 102 |
-
content = page.
|
| 103 |
assert "Comment" not in content
|
| 104 |
|
| 105 |
def test_advanced_xpath_variables(self, complex_html):
|
|
|
|
| 99 |
keep_comments=False,
|
| 100 |
keep_cdata=False
|
| 101 |
)
|
| 102 |
+
content = page.html_content
|
| 103 |
assert "Comment" not in content
|
| 104 |
|
| 105 |
def test_advanced_xpath_variables(self, complex_html):
|