"""Tests for NASA URL generation and validation.""" import pytest from datetime import datetime, timedelta from src.downloader.url_generator import URLGenerator class TestURLGenerator: """Test cases for URLGenerator class.""" def setup_method(self): """Set up test fixtures.""" self.generator = URLGenerator() def test_construct_url(self): """Test URL construction for specific date and time.""" date = datetime(2025, 12, 19) time_sequence = "120000" url = self.generator.construct_url(date, time_sequence) expected = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg" assert url == expected def test_validate_url_valid(self): """Test validation of valid NASA SDO URLs.""" valid_urls = [ "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg", "https://sdo.gsfc.nasa.gov/assets/img/browse/2024/01/01/20240101_000000_4096_0211.jpg", "https://sdo.gsfc.nasa.gov/assets/img/browse/2023/06/15/20230615_235959_4096_0211.jpg" ] for url in valid_urls: assert self.generator.validate_url(url), f"URL should be valid: {url}" def test_validate_url_invalid(self): """Test validation of invalid URLs.""" invalid_urls = [ "", # Empty string "https://example.com/image.jpg", # Wrong domain "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_2048_0211.jpg", # Wrong resolution "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0193.jpg", # Wrong instrument "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251220_120000_4096_0211.jpg", # Date mismatch "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/13/19/20251319_120000_4096_0211.jpg", # Invalid month "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_250000_4096_0211.jpg", # Invalid hour ] for url in invalid_urls: assert not self.generator.validate_url(url), f"URL should be invalid: {url}" def test_extract_metadata_from_url(self): """Test metadata extraction from valid URLs.""" url = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_143000_4096_0211.jpg" date, time_sequence = self.generator.extract_metadata_from_url(url) assert date == datetime(2025, 12, 19) assert time_sequence == "143000" def test_extract_metadata_invalid_url(self): """Test metadata extraction from invalid URLs.""" invalid_url = "https://example.com/invalid.jpg" date, time_sequence = self.generator.extract_metadata_from_url(invalid_url) assert date is None assert time_sequence is None def test_generate_daily_urls(self): """Test generation of URLs for a single day.""" date = datetime(2025, 12, 19) urls = self.generator.generate_daily_urls(date) # Should generate 360 URLs (24 hours * 5 minutes * 3 seconds = 360) assert len(urls) == 360 # Check first and last URLs assert "20251219_000000_4096_0211.jpg" in urls[0] assert "20251219_234859_4096_0211.jpg" in urls[-1] # All URLs should be valid for url in urls: assert self.generator.validate_url(url) def test_generate_last_month_urls(self): """Test generation of URLs for the last 30 days.""" end_date = datetime(2025, 12, 19) urls = self.generator.generate_last_month_urls(end_date) # Should generate 30 days * 360 URLs per day = 10800 URLs assert len(urls) == 30 * 360 # All URLs should be valid sample_urls = urls[::500] # Test every 500th URL for performance for url in sample_urls: assert self.generator.validate_url(url) def test_last_month_date_range(self): """Test that last month URLs cover the correct date range.""" end_date = datetime(2025, 12, 19) urls = self.generator.generate_last_month_urls(end_date) # Extract dates from first and last URLs first_date, _ = self.generator.extract_metadata_from_url(urls[0]) last_date, _ = self.generator.extract_metadata_from_url(urls[-1]) expected_start = end_date - timedelta(days=29) # 30 days means 29 days back + end date assert first_date.date() == expected_start.date() assert last_date.date() == end_date.date() def test_generate_default_urls(self): """Test generation of URLs for the default range (1 day).""" end_date = datetime(2025, 12, 19) urls = self.generator.generate_default_urls(end_date) # Should generate 1 day * 360 URLs per day = 360 URLs assert len(urls) == 360 # All URLs should be for the same date for url in urls[::50]: # Test every 50th URL for performance date, _ = self.generator.extract_metadata_from_url(url) assert date.date() == end_date.date() def test_generate_date_range_urls_custom(self): """Test generation of URLs for custom date ranges.""" end_date = datetime(2025, 12, 19) # Test 3 days urls_3_days = self.generator.generate_date_range_urls(3, end_date) assert len(urls_3_days) == 3 * 360 # Test 7 days urls_7_days = self.generator.generate_date_range_urls(7, end_date) assert len(urls_7_days) == 7 * 360 def test_generate_date_range_urls(self): """Test generation of URLs for custom date range.""" end_date = datetime(2025, 12, 19) days = 7 # One week urls = self.generator.generate_date_range_urls(days, end_date) # Should generate 7 days * 360 URLs per day = 2520 URLs assert len(urls) == 7 * 360 # Extract dates from first and last URLs first_date, _ = self.generator.extract_metadata_from_url(urls[0]) last_date, _ = self.generator.extract_metadata_from_url(urls[-1]) expected_start = end_date - timedelta(days=6) # 7 days means 6 days back + end date assert first_date.date() == expected_start.date() assert last_date.date() == end_date.date() def test_generate_date_range_urls_single_day(self): """Test generation of URLs for single day range.""" end_date = datetime(2025, 12, 19) urls = self.generator.generate_date_range_urls(1, end_date) # Should generate 1 day * 360 URLs per day = 360 URLs assert len(urls) == 1 * 360 # All URLs should be valid sample_urls = urls[::100] # Test every 100th URL for performance for url in sample_urls: assert self.generator.validate_url(url)