Spaces:
Sleeping
Sleeping
| """Tests for NASA URL generation and validation.""" | |
| import pytest | |
| from datetime import datetime, timedelta | |
| from src.downloader.url_generator import URLGenerator | |
| class TestURLGenerator: | |
| """Test cases for URLGenerator class.""" | |
| def setup_method(self): | |
| """Set up test fixtures.""" | |
| self.generator = URLGenerator() | |
| def test_construct_url(self): | |
| """Test URL construction for specific date and time.""" | |
| date = datetime(2025, 12, 19) | |
| time_sequence = "120000" | |
| url = self.generator.construct_url(date, time_sequence) | |
| expected = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg" | |
| assert url == expected | |
| def test_validate_url_valid(self): | |
| """Test validation of valid NASA SDO URLs.""" | |
| valid_urls = [ | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2024/01/01/20240101_000000_4096_0211.jpg", | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2023/06/15/20230615_235959_4096_0211.jpg" | |
| ] | |
| for url in valid_urls: | |
| assert self.generator.validate_url(url), f"URL should be valid: {url}" | |
| def test_validate_url_invalid(self): | |
| """Test validation of invalid URLs.""" | |
| invalid_urls = [ | |
| "", # Empty string | |
| "https://example.com/image.jpg", # Wrong domain | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_2048_0211.jpg", # Wrong resolution | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0193.jpg", # Wrong instrument | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251220_120000_4096_0211.jpg", # Date mismatch | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/13/19/20251319_120000_4096_0211.jpg", # Invalid month | |
| "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_250000_4096_0211.jpg", # Invalid hour | |
| ] | |
| for url in invalid_urls: | |
| assert not self.generator.validate_url(url), f"URL should be invalid: {url}" | |
| def test_extract_metadata_from_url(self): | |
| """Test metadata extraction from valid URLs.""" | |
| url = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_143000_4096_0211.jpg" | |
| date, time_sequence = self.generator.extract_metadata_from_url(url) | |
| assert date == datetime(2025, 12, 19) | |
| assert time_sequence == "143000" | |
| def test_extract_metadata_invalid_url(self): | |
| """Test metadata extraction from invalid URLs.""" | |
| invalid_url = "https://example.com/invalid.jpg" | |
| date, time_sequence = self.generator.extract_metadata_from_url(invalid_url) | |
| assert date is None | |
| assert time_sequence is None | |
| def test_generate_daily_urls(self): | |
| """Test generation of URLs for a single day.""" | |
| date = datetime(2025, 12, 19) | |
| urls = self.generator.generate_daily_urls(date) | |
| # Should generate 360 URLs (24 hours * 5 minutes * 3 seconds = 360) | |
| assert len(urls) == 360 | |
| # Check first and last URLs | |
| assert "20251219_000000_4096_0211.jpg" in urls[0] | |
| assert "20251219_234859_4096_0211.jpg" in urls[-1] | |
| # All URLs should be valid | |
| for url in urls: | |
| assert self.generator.validate_url(url) | |
| def test_generate_last_month_urls(self): | |
| """Test generation of URLs for the last 30 days.""" | |
| end_date = datetime(2025, 12, 19) | |
| urls = self.generator.generate_last_month_urls(end_date) | |
| # Should generate 30 days * 360 URLs per day = 10800 URLs | |
| assert len(urls) == 30 * 360 | |
| # All URLs should be valid | |
| sample_urls = urls[::500] # Test every 500th URL for performance | |
| for url in sample_urls: | |
| assert self.generator.validate_url(url) | |
| def test_last_month_date_range(self): | |
| """Test that last month URLs cover the correct date range.""" | |
| end_date = datetime(2025, 12, 19) | |
| urls = self.generator.generate_last_month_urls(end_date) | |
| # Extract dates from first and last URLs | |
| first_date, _ = self.generator.extract_metadata_from_url(urls[0]) | |
| last_date, _ = self.generator.extract_metadata_from_url(urls[-1]) | |
| expected_start = end_date - timedelta(days=29) # 30 days means 29 days back + end date | |
| assert first_date.date() == expected_start.date() | |
| assert last_date.date() == end_date.date() | |
| def test_generate_default_urls(self): | |
| """Test generation of URLs for the default range (1 day).""" | |
| end_date = datetime(2025, 12, 19) | |
| urls = self.generator.generate_default_urls(end_date) | |
| # Should generate 1 day * 360 URLs per day = 360 URLs | |
| assert len(urls) == 360 | |
| # All URLs should be for the same date | |
| for url in urls[::50]: # Test every 50th URL for performance | |
| date, _ = self.generator.extract_metadata_from_url(url) | |
| assert date.date() == end_date.date() | |
| def test_generate_date_range_urls_custom(self): | |
| """Test generation of URLs for custom date ranges.""" | |
| end_date = datetime(2025, 12, 19) | |
| # Test 3 days | |
| urls_3_days = self.generator.generate_date_range_urls(3, end_date) | |
| assert len(urls_3_days) == 3 * 360 | |
| # Test 7 days | |
| urls_7_days = self.generator.generate_date_range_urls(7, end_date) | |
| assert len(urls_7_days) == 7 * 360 | |
| def test_generate_date_range_urls(self): | |
| """Test generation of URLs for custom date range.""" | |
| end_date = datetime(2025, 12, 19) | |
| days = 7 # One week | |
| urls = self.generator.generate_date_range_urls(days, end_date) | |
| # Should generate 7 days * 360 URLs per day = 2520 URLs | |
| assert len(urls) == 7 * 360 | |
| # Extract dates from first and last URLs | |
| first_date, _ = self.generator.extract_metadata_from_url(urls[0]) | |
| last_date, _ = self.generator.extract_metadata_from_url(urls[-1]) | |
| expected_start = end_date - timedelta(days=6) # 7 days means 6 days back + end date | |
| assert first_date.date() == expected_start.date() | |
| assert last_date.date() == end_date.date() | |
| def test_generate_date_range_urls_single_day(self): | |
| """Test generation of URLs for single day range.""" | |
| end_date = datetime(2025, 12, 19) | |
| urls = self.generator.generate_date_range_urls(1, end_date) | |
| # Should generate 1 day * 360 URLs per day = 360 URLs | |
| assert len(urls) == 1 * 360 | |
| # All URLs should be valid | |
| sample_urls = urls[::100] # Test every 100th URL for performance | |
| for url in sample_urls: | |
| assert self.generator.validate_url(url) |