"""Tests for the Request class.""" import pickle import pytest from scrapling.spiders.request import Request from scrapling.core._types import Any, Dict, AsyncGenerator class TestRequestCreation: """Test Request initialization and basic attributes.""" def test_basic_request_creation(self): """Test creating a request with just a URL.""" request = Request("https://example.com") assert request.url == "https://example.com" assert request.sid == "" assert request.callback is None assert request.priority == 0 assert request.dont_filter is False assert request.meta == {} assert request._retry_count == 0 assert request._session_kwargs == {} def test_request_with_all_parameters(self): """Test creating a request with all parameters.""" async def my_callback(response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield {"test": "data"} request = Request( url="https://example.com/page", sid="my_session", callback=my_callback, priority=10, dont_filter=True, meta={"key": "value"}, _retry_count=2, proxy="http://proxy:8080", timeout=30, ) assert request.url == "https://example.com/page" assert request.sid == "my_session" assert request.callback == my_callback assert request.priority == 10 assert request.dont_filter is True assert request.meta == {"key": "value"} assert request._retry_count == 2 assert request._session_kwargs == {"proxy": "http://proxy:8080", "timeout": 30} def test_request_meta_default_is_empty_dict(self): """Test that meta defaults to empty dict, not shared reference.""" r1 = Request("https://example.com") r2 = Request("https://example.com") r1.meta["key"] = "value" assert r1.meta == {"key": "value"} assert r2.meta == {} class TestRequestProperties: """Test Request computed properties.""" def test_domain_extraction(self): """Test domain property extracts netloc correctly.""" request = Request("https://www.example.com/path/page.html?query=1") assert request.domain == "www.example.com" def test_domain_with_port(self): """Test domain extraction with port number.""" request = Request("http://localhost:8080/api") assert request.domain == "localhost:8080" def test_domain_with_subdomain(self): """Test domain extraction with subdomains.""" request = Request("https://api.v2.example.com/endpoint") assert request.domain == "api.v2.example.com" def test_fingerprint_returns_bytes(self): """Test fingerprint generation returns bytes.""" request = Request("https://example.com") fp = request.update_fingerprint() assert isinstance(fp, bytes) assert len(fp) == 20 # SHA1 produces 20 bytes def test_fingerprint_is_deterministic(self): """Test same request produces same fingerprint.""" r1 = Request("https://example.com", data={"key": "value"}) r2 = Request("https://example.com", data={"key": "value"}) assert r1.update_fingerprint() == r2.update_fingerprint() def test_fingerprint_different_urls(self): """Test different URLs produce different fingerprints.""" r1 = Request("https://example.com/page1") r2 = Request("https://example.com/page2") assert r1.update_fingerprint() != r2.update_fingerprint() class TestRequestCopy: """Test Request copy functionality.""" def test_copy_creates_independent_request(self): """Test that copy creates a new independent request.""" async def callback(response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None original = Request( url="https://example.com", sid="session", callback=callback, priority=5, dont_filter=True, meta={"original": True}, _retry_count=1, proxy="http://proxy:8080", ) copied = original.copy() # Check all values are copied assert copied.url == original.url assert copied.sid == original.sid assert copied.callback == original.callback assert copied.priority == original.priority assert copied.dont_filter == original.dont_filter assert copied.meta == original.meta assert copied._retry_count == original._retry_count assert copied._session_kwargs == original._session_kwargs # Check they are different objects assert copied is not original assert copied.meta is not original.meta # Meta should be a copy def test_copy_meta_is_independent(self): """Test that modifying copied meta doesn't affect original.""" original = Request("https://example.com", meta={"key": "original"}) copied = original.copy() copied.meta["key"] = "modified" copied.meta["new_key"] = "new_value" assert original.meta == {"key": "original"} assert copied.meta == {"key": "modified", "new_key": "new_value"} class TestRequestComparison: """Test Request comparison operators.""" def test_priority_less_than(self): """Test less than comparison by priority.""" low_priority = Request("https://example.com/1", priority=1) high_priority = Request("https://example.com/2", priority=10) assert low_priority < high_priority assert not high_priority < low_priority def test_priority_greater_than(self): """Test greater than comparison by priority.""" low_priority = Request("https://example.com/1", priority=1) high_priority = Request("https://example.com/2", priority=10) assert high_priority > low_priority assert not low_priority > high_priority def test_equality_by_fingerprint(self): """Test equality comparison by fingerprint.""" r1 = Request("https://example.com") r2 = Request("https://example.com") r3 = Request("https://example.com/other") # Generate fingerprints first (required for equality) r1.update_fingerprint() r2.update_fingerprint() r3.update_fingerprint() assert r1 == r2 assert r1 != r3 def test_equality_different_priorities_same_fingerprint(self): """Test requests with same fingerprint are equal despite different priorities.""" r1 = Request("https://example.com", priority=1) r2 = Request("https://example.com", priority=100) # Generate fingerprints first r1.update_fingerprint() r2.update_fingerprint() assert r1 == r2 # Same fingerprint means equal def test_comparison_with_non_request(self): """Test comparison with non-Request types returns NotImplemented.""" request = Request("https://example.com") assert request.__lt__("not a request") == NotImplemented assert request.__gt__("not a request") == NotImplemented assert request.__eq__("not a request") == NotImplemented class TestRequestStringRepresentation: """Test Request string representations.""" def test_str_returns_url(self): """Test __str__ returns the URL.""" request = Request("https://example.com/page") assert str(request) == "https://example.com/page" def test_repr_without_callback(self): """Test __repr__ without callback.""" request = Request("https://example.com", priority=5) repr_str = repr(request) assert "Request" in repr_str assert "https://example.com" in repr_str assert "priority=5" in repr_str assert "callback=None" in repr_str def test_repr_with_callback(self): """Test __repr__ with named callback.""" async def my_custom_callback(response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None request = Request("https://example.com", callback=my_custom_callback) repr_str = repr(request) assert "callback=my_custom_callback" in repr_str class TestRequestPickling: """Test Request serialization for checkpointing.""" def test_pickle_without_callback(self): """Test pickling request without callback.""" original = Request( url="https://example.com", sid="session", priority=5, meta={"key": "value"}, ) pickled = pickle.dumps(original) restored = pickle.loads(pickled) assert restored.url == original.url assert restored.sid == original.sid assert restored.priority == original.priority assert restored.meta == original.meta assert restored.callback is None def test_pickle_with_callback_stores_name(self): """Test that callback name is stored when pickling.""" async def parse_page(response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield {"data": "test"} original = Request("https://example.com", callback=parse_page) # Check getstate stores callback name state = original.__getstate__() assert state["_callback_name"] == "parse_page" assert state["callback"] is None def test_pickle_with_none_callback(self): """Test pickling with None callback.""" original = Request("https://example.com", callback=None) state = original.__getstate__() assert state["_callback_name"] is None assert state["callback"] is None def test_setstate_stores_callback_name(self): """Test that setstate correctly handles callback name.""" request = Request("https://example.com") state = { "url": "https://example.com", "sid": "", "callback": None, "priority": 0, "dont_filter": False, "meta": {}, "_retry_count": 0, "_session_kwargs": {}, "_callback_name": "custom_parse", } request.__setstate__(state) assert hasattr(request, "_callback_name") assert request._callback_name == "custom_parse" def test_pickle_roundtrip_preserves_session_kwargs(self): """Test that session kwargs are preserved through pickle.""" original = Request( "https://example.com", proxy="http://proxy:8080", timeout=30, headers={"User-Agent": "test"}, ) pickled = pickle.dumps(original) restored = pickle.loads(pickled) assert restored._session_kwargs == { "proxy": "http://proxy:8080", "timeout": 30, "headers": {"User-Agent": "test"}, } class TestRequestRestoreCallback: """Test callback restoration from spider.""" def test_restore_callback_from_spider(self): """Test restoring callback from spider instance.""" class MockSpider: async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None async def parse_detail(self, response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield {"detail": True} spider = MockSpider() request = Request("https://example.com") request._callback_name = "parse_detail" request._restore_callback(spider) # type: ignore[arg-type] assert request.callback == spider.parse_detail assert not hasattr(request, "_callback_name") def test_restore_callback_falls_back_to_parse(self): """Test that missing callback falls back to spider.parse.""" class MockSpider: async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None spider = MockSpider() request = Request("https://example.com") request._callback_name = "nonexistent_method" request._restore_callback(spider) # type: ignore[arg-type] assert request.callback == spider.parse assert not hasattr(request, "_callback_name") def test_restore_callback_with_none_name(self): """Test restore callback when _callback_name is None.""" class MockSpider: async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None spider = MockSpider() request = Request("https://example.com") request._callback_name = None request._restore_callback(spider) # type: ignore[arg-type] # Should clean up _callback_name attribute assert not hasattr(request, "_callback_name") def test_restore_callback_without_callback_name_attr(self): """Test restore callback when _callback_name attribute doesn't exist.""" class MockSpider: async def parse(self, response) -> AsyncGenerator[Dict[str, Any] | Request | None, None]: yield None spider = MockSpider() request = Request("https://example.com") # Don't set _callback_name # Should not raise an error request._restore_callback(spider) # type: ignore[arg-type]