Spaces:
Running
Running
| """Tests for image fetcher and download manager.""" | |
| import pytest | |
| import time | |
| from unittest.mock import Mock, patch, MagicMock | |
| import requests | |
| from datetime import datetime | |
| from pathlib import Path | |
| from src.downloader.image_fetcher import ImageFetcher, DownloadManager | |
| from src.models import DownloadTask, TaskStatus | |
| class TestImageFetcher: | |
| """Test cases for ImageFetcher class.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.fetcher = ImageFetcher(rate_limit_delay=0.1, max_retries=3) | |
| def test_rate_limiting(self): | |
| """Test that rate limiting works.""" | |
| start_time = time.time() | |
| # Make two requests | |
| self.fetcher._enforce_rate_limit() | |
| self.fetcher._enforce_rate_limit() | |
| elapsed = time.time() - start_time | |
| # Should take at least the rate limit delay | |
| assert elapsed >= 0.1 | |
| def test_exponential_backoff(self): | |
| """Test exponential backoff calculation.""" | |
| assert self.fetcher._exponential_backoff(0) == 1 | |
| assert self.fetcher._exponential_backoff(1) == 2 | |
| assert self.fetcher._exponential_backoff(2) == 4 | |
| assert self.fetcher._exponential_backoff(3) == 8 | |
| # Should cap at 60 seconds | |
| assert self.fetcher._exponential_backoff(10) == 60 | |
| def test_download_image_success(self, mock_get): | |
| """Test successful image download.""" | |
| # Mock successful response | |
| mock_response = Mock() | |
| mock_response.status_code = 200 | |
| mock_response.content = b"fake image data" | |
| mock_get.return_value = mock_response | |
| success, data, error = self.fetcher.download_image("https://example.com/image.jpg") | |
| assert success is True | |
| assert data == b"fake image data" | |
| assert error is None | |
| def test_download_image_404(self, mock_get): | |
| """Test download when image doesn't exist.""" | |
| # Mock 404 response | |
| mock_response = Mock() | |
| mock_response.status_code = 404 | |
| mock_get.return_value = mock_response | |
| success, data, error = self.fetcher.download_image("https://example.com/missing.jpg") | |
| assert success is False | |
| assert data is None | |
| assert "404" in error | |
| def test_download_image_http_error_with_retry(self, mock_get): | |
| """Test download with HTTP error that gets retried.""" | |
| # Mock server error that eventually succeeds | |
| mock_response_error = Mock() | |
| mock_response_error.status_code = 500 | |
| mock_response_success = Mock() | |
| mock_response_success.status_code = 200 | |
| mock_response_success.content = b"success data" | |
| mock_get.side_effect = [mock_response_error, mock_response_success] | |
| success, data, error = self.fetcher.download_image("https://example.com/image.jpg") | |
| assert success is True | |
| assert data == b"success data" | |
| assert error is None | |
| assert mock_get.call_count == 2 | |
| def test_download_image_timeout_retry(self, mock_get): | |
| """Test download with timeout that gets retried.""" | |
| # Mock timeout then success | |
| mock_get.side_effect = [ | |
| requests.exceptions.Timeout("Timeout"), | |
| Mock(status_code=200, content=b"success after timeout") | |
| ] | |
| success, data, error = self.fetcher.download_image("https://example.com/image.jpg") | |
| assert success is True | |
| assert data == b"success after timeout" | |
| assert error is None | |
| def test_download_image_max_retries_exceeded(self, mock_get): | |
| """Test download when max retries are exceeded.""" | |
| # Mock persistent server error | |
| mock_response = Mock() | |
| mock_response.status_code = 500 | |
| mock_get.return_value = mock_response | |
| success, data, error = self.fetcher.download_image("https://example.com/image.jpg") | |
| assert success is False | |
| assert data is None | |
| assert "HTTP 500" in error | |
| assert mock_get.call_count == 3 # max_retries | |
| def test_check_image_exists_true(self, mock_head): | |
| """Test checking if image exists when it does.""" | |
| mock_response = Mock() | |
| mock_response.status_code = 200 | |
| mock_head.return_value = mock_response | |
| exists = self.fetcher.check_image_exists("https://example.com/image.jpg") | |
| assert exists is True | |
| def test_check_image_exists_false(self, mock_head): | |
| """Test checking if image exists when it doesn't.""" | |
| mock_response = Mock() | |
| mock_response.status_code = 404 | |
| mock_head.return_value = mock_response | |
| exists = self.fetcher.check_image_exists("https://example.com/image.jpg") | |
| assert exists is False | |
| def test_get_image_size(self, mock_head): | |
| """Test getting image size from headers.""" | |
| mock_response = Mock() | |
| mock_response.status_code = 200 | |
| mock_response.headers = {'content-length': '1024000'} | |
| mock_head.return_value = mock_response | |
| size = self.fetcher.get_image_size("https://example.com/image.jpg") | |
| assert size == 1024000 | |
| class TestDownloadManager: | |
| """Test cases for DownloadManager class.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.mock_fetcher = Mock(spec=ImageFetcher) | |
| self.mock_storage = Mock() | |
| self.manager = DownloadManager(self.mock_fetcher, self.mock_storage) | |
| def test_download_and_save_success(self): | |
| """Test successful download and save.""" | |
| # Set up task | |
| task = DownloadTask( | |
| url="https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", | |
| target_path=Path("data/2025/12/19/20251219_120000_4096_0211.jpg") | |
| ) | |
| # Mock storage responses | |
| self.mock_storage.file_exists.return_value = False | |
| self.mock_storage.save_image.return_value = task.target_path | |
| self.mock_storage.validate_file_integrity.return_value = True | |
| # Mock fetcher response | |
| self.mock_fetcher.download_image.return_value = (True, b"image data", None) | |
| # Execute | |
| success = self.manager.download_and_save(task) | |
| # Verify | |
| assert success is True | |
| assert task.status == TaskStatus.COMPLETED | |
| assert self.manager.get_download_count() == 1 | |
| def test_download_and_save_duplicate_skip(self): | |
| """Test skipping download when file already exists.""" | |
| task = DownloadTask( | |
| url="https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", | |
| target_path=Path("data/2025/12/19/20251219_120000_4096_0211.jpg") | |
| ) | |
| # Mock file already exists | |
| self.mock_storage.file_exists.return_value = True | |
| success = self.manager.download_and_save(task) | |
| assert success is True | |
| assert task.status == TaskStatus.COMPLETED | |
| # Should not call download since file exists | |
| self.mock_fetcher.download_image.assert_not_called() | |
| def test_download_and_save_download_failure(self): | |
| """Test handling download failure.""" | |
| task = DownloadTask( | |
| url="https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", | |
| target_path=Path("data/2025/12/19/20251219_120000_4096_0211.jpg") | |
| ) | |
| # Mock storage and fetcher | |
| self.mock_storage.file_exists.return_value = False | |
| self.mock_fetcher.download_image.return_value = (False, None, "Network error") | |
| success = self.manager.download_and_save(task) | |
| assert success is False | |
| assert task.status == TaskStatus.FAILED | |
| assert task.error_message == "Network error" | |
| assert len(self.manager.get_failed_tasks()) == 1 | |
| def test_download_and_save_integrity_failure(self): | |
| """Test handling file integrity failure.""" | |
| task = DownloadTask( | |
| url="https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", | |
| target_path=Path("data/2025/12/19/20251219_120000_4096_0211.jpg") | |
| ) | |
| # Mock successful download but failed integrity check | |
| self.mock_storage.file_exists.return_value = False | |
| self.mock_storage.save_image.return_value = task.target_path | |
| self.mock_storage.validate_file_integrity.return_value = False | |
| self.mock_fetcher.download_image.return_value = (True, b"image data", None) | |
| success = self.manager.download_and_save(task) | |
| assert success is False | |
| assert task.status == TaskStatus.FAILED | |
| assert "integrity check failed" in task.error_message.lower() | |
| def test_reset_counters(self): | |
| """Test resetting download counters.""" | |
| # Add some data | |
| self.manager.download_count = 5 | |
| self.manager.failed_tasks = [Mock(), Mock()] | |
| # Reset | |
| self.manager.reset_counters() | |
| assert self.manager.get_download_count() == 0 | |
| assert len(self.manager.get_failed_tasks()) == 0 |