| import pytest |
|
|
| from scrapling.core.shell import ( |
| _CookieParser, |
| _ParseHeaders, |
| Request, |
| _known_logging_levels, |
| ) |
|
|
|
|
| class TestCookieParser: |
| """Test cookie parsing functionality""" |
| |
| def test_simple_cookie_parsing(self): |
| """Test parsing a simple cookie""" |
| cookie_string = "session_id=abc123" |
| cookies = list(_CookieParser(cookie_string)) |
| assert len(cookies) == 1 |
| assert cookies[0] == ("session_id", "abc123") |
| |
| def test_multiple_cookies_parsing(self): |
| """Test parsing multiple cookies""" |
| cookie_string = "session_id=abc123; theme=dark; lang=en" |
| cookies = list(_CookieParser(cookie_string)) |
| assert len(cookies) == 3 |
| cookie_dict = dict(cookies) |
| assert cookie_dict["session_id"] == "abc123" |
| assert cookie_dict["theme"] == "dark" |
| assert cookie_dict["lang"] == "en" |
| |
| def test_cookie_with_attributes(self): |
| """Test parsing cookies with attributes""" |
| cookie_string = "session_id=abc123; Path=/; HttpOnly; Secure" |
| cookies = list(_CookieParser(cookie_string)) |
| assert len(cookies) == 1 |
| assert cookies[0] == ("session_id", "abc123") |
| |
| def test_empty_cookie_string(self): |
| """Test parsing empty cookie string""" |
| cookies = list(_CookieParser("")) |
| assert len(cookies) == 0 |
| |
| def test_malformed_cookie_handling(self): |
| """Test handling of malformed cookies""" |
| |
| cookies = list(_CookieParser("invalid_cookie_format")) |
| assert isinstance(cookies, list) |
|
|
|
|
| class TestParseHeaders: |
| """Test header parsing functionality""" |
| |
| def test_simple_headers(self): |
| """Test parsing simple headers""" |
| header_lines = [ |
| "Content-Type: text/html", |
| "Content-Length: 1234", |
| "User-Agent: TestAgent/1.0" |
| ] |
| headers, cookies = _ParseHeaders(header_lines) |
| |
| assert headers["Content-Type"] == "text/html" |
| assert headers["Content-Length"] == "1234" |
| assert headers["User-Agent"] == "TestAgent/1.0" |
| assert len(cookies) == 0 |
| |
| def test_headers_with_cookies(self): |
| """Test parsing headers with cookie headers""" |
| header_lines = [ |
| "Content-Type: text/html", |
| "Set-Cookie: session_id=abc123", |
| "Set-Cookie: theme=dark; Path=/", |
| ] |
| headers, cookies = _ParseHeaders(header_lines) |
| |
| assert headers["Content-Type"] == "text/html" |
| assert "Set-Cookie" in headers |
| |
| |
| def test_headers_without_colons(self): |
| """Test headers without colons""" |
| header_lines = [ |
| "Content-Type: text/html", |
| "InvalidHeader;", |
| ] |
| headers, cookies = _ParseHeaders(header_lines) |
| |
| assert headers["Content-Type"] == "text/html" |
| assert "InvalidHeader" in headers |
| assert headers["InvalidHeader"] == "" |
| |
| def test_invalid_header_format(self): |
| """Test invalid header format raises error""" |
| header_lines = [ |
| "Content-Type: text/html", |
| "InvalidHeaderWithoutColon", |
| ] |
| |
| with pytest.raises(ValueError, match="Could not parse header without colon"): |
| _ParseHeaders(header_lines) |
| |
| def test_headers_with_multiple_colons(self): |
| """Test headers with multiple colons""" |
| header_lines = [ |
| "Authorization: Bearer: token123", |
| "X-Custom: value:with:colons", |
| ] |
| headers, cookies = _ParseHeaders(header_lines) |
| |
| assert headers["Authorization"] == "Bearer: token123" |
| assert headers["X-Custom"] == "value:with:colons" |
| |
| def test_headers_with_whitespace(self): |
| """Test headers with extra whitespace""" |
| header_lines = [ |
| " Content-Type : text/html ", |
| "\tUser-Agent\t:\tTestAgent/1.0\t", |
| ] |
| headers, cookies = _ParseHeaders(header_lines) |
| |
| |
| assert "Content-Type" in headers or " Content-Type " in headers |
| assert "text/html" in str(headers.values()) or " text/html " in str(headers.values()) |
| |
| def test_parse_cookies_disabled(self): |
| """Test parsing with cookies disabled""" |
| header_lines = [ |
| "Content-Type: text/html", |
| "Set-Cookie: session_id=abc123", |
| ] |
| headers, cookies = _ParseHeaders(header_lines, parse_cookies=False) |
| |
| assert headers["Content-Type"] == "text/html" |
| |
| assert len(cookies) == 0 or "Set-Cookie" in headers |
| |
| def test_empty_header_lines(self): |
| """Test parsing empty header lines""" |
| headers, cookies = _ParseHeaders([]) |
| assert len(headers) == 0 |
| assert len(cookies) == 0 |
|
|
|
|
| class TestRequestNamedTuple: |
| """Test Request namedtuple functionality""" |
| |
| def test_request_creation(self): |
| """Test creating Request namedtuple""" |
| request = Request( |
| method="GET", |
| url="https://example.com", |
| params={"q": "test"}, |
| data=None, |
| json_data=None, |
| headers={"User-Agent": "Test"}, |
| cookies={"session": "abc123"}, |
| proxy=None, |
| follow_redirects=True |
| ) |
| |
| assert request.method == "GET" |
| assert request.url == "https://example.com" |
| assert request.params == {"q": "test"} |
| assert request.headers == {"User-Agent": "Test"} |
| assert request.follow_redirects is True |
| |
| def test_request_defaults(self): |
| """Test Request with default/None values""" |
| request = Request( |
| method="POST", |
| url="https://api.example.com", |
| params=None, |
| data='{"key": "value"}', |
| json_data={"key": "value"}, |
| headers={}, |
| cookies={}, |
| proxy="http://proxy:8080", |
| follow_redirects=False |
| ) |
| |
| assert request.method == "POST" |
| assert request.data == '{"key": "value"}' |
| assert request.json_data == {"key": "value"} |
| assert request.proxy == "http://proxy:8080" |
| assert request.follow_redirects is False |
| |
| def test_request_field_access(self): |
| """Test accessing Request fields""" |
| request = Request( |
| "GET", "https://example.com", {}, None, None, {}, {}, None, True |
| ) |
| |
| |
| assert hasattr(request, 'method') |
| assert hasattr(request, 'url') |
| assert hasattr(request, 'params') |
| assert hasattr(request, 'data') |
| assert hasattr(request, 'json_data') |
| assert hasattr(request, 'headers') |
| assert hasattr(request, 'cookies') |
| assert hasattr(request, 'proxy') |
| assert hasattr(request, 'follow_redirects') |
| |
| |
| assert request[0] == "GET" |
| assert request[1] == "https://example.com" |
|
|
|
|
| class TestLoggingLevels: |
| """Test logging level constants""" |
| |
| def test_known_logging_levels(self): |
| """Test that all known logging levels are defined""" |
| expected_levels = ["debug", "info", "warning", "error", "critical", "fatal"] |
| |
| for level in expected_levels: |
| assert level in _known_logging_levels |
| assert isinstance(_known_logging_levels[level], int) |
| |
| def test_logging_level_values(self): |
| """Test logging level values are correct""" |
| from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL |
| |
| assert _known_logging_levels["debug"] == DEBUG |
| assert _known_logging_levels["info"] == INFO |
| assert _known_logging_levels["warning"] == WARNING |
| assert _known_logging_levels["error"] == ERROR |
| assert _known_logging_levels["critical"] == CRITICAL |
| assert _known_logging_levels["fatal"] == FATAL |
| |
| def test_level_hierarchy(self): |
| """Test that logging levels have correct hierarchy""" |
| levels = [ |
| _known_logging_levels["debug"], |
| _known_logging_levels["info"], |
| _known_logging_levels["warning"], |
| _known_logging_levels["error"], |
| _known_logging_levels["critical"], |
| ] |
| |
| |
| for i in range(len(levels) - 1): |
| assert levels[i] < levels[i + 1] |
|
|