File size: 8,749 Bytes
4e9888e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | import pytest
from scrapling.core.shell import (
_CookieParser,
_ParseHeaders,
Request,
_known_logging_levels,
)
class TestCookieParser:
"""Test cookie parsing functionality"""
def test_simple_cookie_parsing(self):
"""Test parsing a simple cookie"""
cookie_string = "session_id=abc123"
cookies = list(_CookieParser(cookie_string))
assert len(cookies) == 1
assert cookies[0] == ("session_id", "abc123")
def test_multiple_cookies_parsing(self):
"""Test parsing multiple cookies"""
cookie_string = "session_id=abc123; theme=dark; lang=en"
cookies = list(_CookieParser(cookie_string))
assert len(cookies) == 3
cookie_dict = dict(cookies)
assert cookie_dict["session_id"] == "abc123"
assert cookie_dict["theme"] == "dark"
assert cookie_dict["lang"] == "en"
def test_cookie_with_attributes(self):
"""Test parsing cookies with attributes"""
cookie_string = "session_id=abc123; Path=/; HttpOnly; Secure"
cookies = list(_CookieParser(cookie_string))
assert len(cookies) == 1
assert cookies[0] == ("session_id", "abc123")
def test_empty_cookie_string(self):
"""Test parsing empty cookie string"""
cookies = list(_CookieParser(""))
assert len(cookies) == 0
def test_malformed_cookie_handling(self):
"""Test handling of malformed cookies"""
# Should not raise exception but may return an empty list
cookies = list(_CookieParser("invalid_cookie_format"))
assert isinstance(cookies, list)
class TestParseHeaders:
"""Test header parsing functionality"""
def test_simple_headers(self):
"""Test parsing simple headers"""
header_lines = [
"Content-Type: text/html",
"Content-Length: 1234",
"User-Agent: TestAgent/1.0"
]
headers, cookies = _ParseHeaders(header_lines)
assert headers["Content-Type"] == "text/html"
assert headers["Content-Length"] == "1234"
assert headers["User-Agent"] == "TestAgent/1.0"
assert len(cookies) == 0
def test_headers_with_cookies(self):
"""Test parsing headers with cookie headers"""
header_lines = [
"Content-Type: text/html",
"Set-Cookie: session_id=abc123",
"Set-Cookie: theme=dark; Path=/",
]
headers, cookies = _ParseHeaders(header_lines)
assert headers["Content-Type"] == "text/html"
assert "Set-Cookie" in headers # Should contain the first Set-Cookie
# Cookie parsing behavior depends on implementation
def test_headers_without_colons(self):
"""Test headers without colons"""
header_lines = [
"Content-Type: text/html",
"InvalidHeader;", # Header ending with semicolon
]
headers, cookies = _ParseHeaders(header_lines)
assert headers["Content-Type"] == "text/html"
assert "InvalidHeader" in headers
assert headers["InvalidHeader"] == ""
def test_invalid_header_format(self):
"""Test invalid header format raises error"""
header_lines = [
"Content-Type: text/html",
"InvalidHeaderWithoutColon", # No colon, no semicolon
]
with pytest.raises(ValueError, match="Could not parse header without colon"):
_ParseHeaders(header_lines)
def test_headers_with_multiple_colons(self):
"""Test headers with multiple colons"""
header_lines = [
"Authorization: Bearer: token123",
"X-Custom: value:with:colons",
]
headers, cookies = _ParseHeaders(header_lines)
assert headers["Authorization"] == "Bearer: token123"
assert headers["X-Custom"] == "value:with:colons"
def test_headers_with_whitespace(self):
"""Test headers with extra whitespace"""
header_lines = [
" Content-Type : text/html ",
"\tUser-Agent\t:\tTestAgent/1.0\t",
]
headers, cookies = _ParseHeaders(header_lines)
# Should handle whitespace correctly
assert "Content-Type" in headers or " Content-Type " in headers
assert "text/html" in str(headers.values()) or " text/html " in str(headers.values())
def test_parse_cookies_disabled(self):
"""Test parsing with cookies disabled"""
header_lines = [
"Content-Type: text/html",
"Set-Cookie: session_id=abc123",
]
headers, cookies = _ParseHeaders(header_lines, parse_cookies=False)
assert headers["Content-Type"] == "text/html"
# Cookie parsing behavior when disabled
assert len(cookies) == 0 or "Set-Cookie" in headers
def test_empty_header_lines(self):
"""Test parsing empty header lines"""
headers, cookies = _ParseHeaders([])
assert len(headers) == 0
assert len(cookies) == 0
class TestRequestNamedTuple:
"""Test Request namedtuple functionality"""
def test_request_creation(self):
"""Test creating Request namedtuple"""
request = Request(
method="GET",
url="https://example.com",
params={"q": "test"},
data=None,
json_data=None,
headers={"User-Agent": "Test"},
cookies={"session": "abc123"},
proxy=None,
follow_redirects=True
)
assert request.method == "GET"
assert request.url == "https://example.com"
assert request.params == {"q": "test"}
assert request.headers == {"User-Agent": "Test"}
assert request.follow_redirects is True
def test_request_defaults(self):
"""Test Request with default/None values"""
request = Request(
method="POST",
url="https://api.example.com",
params=None,
data='{"key": "value"}',
json_data={"key": "value"},
headers={},
cookies={},
proxy="http://proxy:8080",
follow_redirects=False
)
assert request.method == "POST"
assert request.data == '{"key": "value"}'
assert request.json_data == {"key": "value"}
assert request.proxy == "http://proxy:8080"
assert request.follow_redirects is False
def test_request_field_access(self):
"""Test accessing Request fields"""
request = Request(
"GET", "https://example.com", {}, None, None, {}, {}, None, True
)
# Test field access by name
assert hasattr(request, 'method')
assert hasattr(request, 'url')
assert hasattr(request, 'params')
assert hasattr(request, 'data')
assert hasattr(request, 'json_data')
assert hasattr(request, 'headers')
assert hasattr(request, 'cookies')
assert hasattr(request, 'proxy')
assert hasattr(request, 'follow_redirects')
# Test field access by index
assert request[0] == "GET"
assert request[1] == "https://example.com"
class TestLoggingLevels:
"""Test logging level constants"""
def test_known_logging_levels(self):
"""Test that all known logging levels are defined"""
expected_levels = ["debug", "info", "warning", "error", "critical", "fatal"]
for level in expected_levels:
assert level in _known_logging_levels
assert isinstance(_known_logging_levels[level], int)
def test_logging_level_values(self):
"""Test logging level values are correct"""
from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL
assert _known_logging_levels["debug"] == DEBUG
assert _known_logging_levels["info"] == INFO
assert _known_logging_levels["warning"] == WARNING
assert _known_logging_levels["error"] == ERROR
assert _known_logging_levels["critical"] == CRITICAL
assert _known_logging_levels["fatal"] == FATAL
def test_level_hierarchy(self):
"""Test that logging levels have correct hierarchy"""
levels = [
_known_logging_levels["debug"],
_known_logging_levels["info"],
_known_logging_levels["warning"],
_known_logging_levels["error"],
_known_logging_levels["critical"],
]
# Levels should be in ascending order
for i in range(len(levels) - 1):
assert levels[i] < levels[i + 1]
|