SolarImageDownloader / tests /test_url_generator.py
AK51's picture
Upload 13308 files
b610d23 verified
"""Tests for NASA URL generation and validation."""
import pytest
from datetime import datetime, timedelta
from src.downloader.url_generator import URLGenerator
class TestURLGenerator:
"""Test cases for URLGenerator class."""
def setup_method(self):
"""Set up test fixtures."""
self.generator = URLGenerator()
def test_construct_url(self):
"""Test URL construction for specific date and time."""
date = datetime(2025, 12, 19)
time_sequence = "120000"
url = self.generator.construct_url(date, time_sequence)
expected = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg"
assert url == expected
def test_validate_url_valid(self):
"""Test validation of valid NASA SDO URLs."""
valid_urls = [
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg",
"https://sdo.gsfc.nasa.gov/assets/img/browse/2024/01/01/20240101_000000_4096_0211.jpg",
"https://sdo.gsfc.nasa.gov/assets/img/browse/2023/06/15/20230615_235959_4096_0211.jpg"
]
for url in valid_urls:
assert self.generator.validate_url(url), f"URL should be valid: {url}"
def test_validate_url_invalid(self):
"""Test validation of invalid URLs."""
invalid_urls = [
"", # Empty string
"https://example.com/image.jpg", # Wrong domain
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_2048_0211.jpg", # Wrong resolution
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0193.jpg", # Wrong instrument
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251220_120000_4096_0211.jpg", # Date mismatch
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/13/19/20251319_120000_4096_0211.jpg", # Invalid month
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_250000_4096_0211.jpg", # Invalid hour
]
for url in invalid_urls:
assert not self.generator.validate_url(url), f"URL should be invalid: {url}"
def test_extract_metadata_from_url(self):
"""Test metadata extraction from valid URLs."""
url = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_143000_4096_0211.jpg"
date, time_sequence = self.generator.extract_metadata_from_url(url)
assert date == datetime(2025, 12, 19)
assert time_sequence == "143000"
def test_extract_metadata_invalid_url(self):
"""Test metadata extraction from invalid URLs."""
invalid_url = "https://example.com/invalid.jpg"
date, time_sequence = self.generator.extract_metadata_from_url(invalid_url)
assert date is None
assert time_sequence is None
def test_generate_daily_urls(self):
"""Test generation of URLs for a single day."""
date = datetime(2025, 12, 19)
urls = self.generator.generate_daily_urls(date)
# Should generate 360 URLs (24 hours * 5 minutes * 3 seconds = 360)
assert len(urls) == 360
# Check first and last URLs
assert "20251219_000000_4096_0211.jpg" in urls[0]
assert "20251219_234859_4096_0211.jpg" in urls[-1]
# All URLs should be valid
for url in urls:
assert self.generator.validate_url(url)
def test_generate_last_month_urls(self):
"""Test generation of URLs for the last 30 days."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_last_month_urls(end_date)
# Should generate 30 days * 360 URLs per day = 10800 URLs
assert len(urls) == 30 * 360
# All URLs should be valid
sample_urls = urls[::500] # Test every 500th URL for performance
for url in sample_urls:
assert self.generator.validate_url(url)
def test_last_month_date_range(self):
"""Test that last month URLs cover the correct date range."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_last_month_urls(end_date)
# Extract dates from first and last URLs
first_date, _ = self.generator.extract_metadata_from_url(urls[0])
last_date, _ = self.generator.extract_metadata_from_url(urls[-1])
expected_start = end_date - timedelta(days=29) # 30 days means 29 days back + end date
assert first_date.date() == expected_start.date()
assert last_date.date() == end_date.date()
def test_generate_default_urls(self):
"""Test generation of URLs for the default range (1 day)."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_default_urls(end_date)
# Should generate 1 day * 360 URLs per day = 360 URLs
assert len(urls) == 360
# All URLs should be for the same date
for url in urls[::50]: # Test every 50th URL for performance
date, _ = self.generator.extract_metadata_from_url(url)
assert date.date() == end_date.date()
def test_generate_date_range_urls_custom(self):
"""Test generation of URLs for custom date ranges."""
end_date = datetime(2025, 12, 19)
# Test 3 days
urls_3_days = self.generator.generate_date_range_urls(3, end_date)
assert len(urls_3_days) == 3 * 360
# Test 7 days
urls_7_days = self.generator.generate_date_range_urls(7, end_date)
assert len(urls_7_days) == 7 * 360
def test_generate_date_range_urls(self):
"""Test generation of URLs for custom date range."""
end_date = datetime(2025, 12, 19)
days = 7 # One week
urls = self.generator.generate_date_range_urls(days, end_date)
# Should generate 7 days * 360 URLs per day = 2520 URLs
assert len(urls) == 7 * 360
# Extract dates from first and last URLs
first_date, _ = self.generator.extract_metadata_from_url(urls[0])
last_date, _ = self.generator.extract_metadata_from_url(urls[-1])
expected_start = end_date - timedelta(days=6) # 7 days means 6 days back + end date
assert first_date.date() == expected_start.date()
assert last_date.date() == end_date.date()
def test_generate_date_range_urls_single_day(self):
"""Test generation of URLs for single day range."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_date_range_urls(1, end_date)
# Should generate 1 day * 360 URLs per day = 360 URLs
assert len(urls) == 1 * 360
# All URLs should be valid
sample_urls = urls[::100] # Test every 100th URL for performance
for url in sample_urls:
assert self.generator.validate_url(url)