Spaces:
Sleeping
Sleeping
File size: 7,251 Bytes
b610d23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | """Tests for NASA URL generation and validation."""
import pytest
from datetime import datetime, timedelta
from src.downloader.url_generator import URLGenerator
class TestURLGenerator:
"""Test cases for URLGenerator class."""
def setup_method(self):
"""Set up test fixtures."""
self.generator = URLGenerator()
def test_construct_url(self):
"""Test URL construction for specific date and time."""
date = datetime(2025, 12, 19)
time_sequence = "120000"
url = self.generator.construct_url(date, time_sequence)
expected = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg"
assert url == expected
def test_validate_url_valid(self):
"""Test validation of valid NASA SDO URLs."""
valid_urls = [
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0211.jpg",
"https://sdo.gsfc.nasa.gov/assets/img/browse/2024/01/01/20240101_000000_4096_0211.jpg",
"https://sdo.gsfc.nasa.gov/assets/img/browse/2023/06/15/20230615_235959_4096_0211.jpg"
]
for url in valid_urls:
assert self.generator.validate_url(url), f"URL should be valid: {url}"
def test_validate_url_invalid(self):
"""Test validation of invalid URLs."""
invalid_urls = [
"", # Empty string
"https://example.com/image.jpg", # Wrong domain
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_2048_0211.jpg", # Wrong resolution
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_120000_4096_0193.jpg", # Wrong instrument
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251220_120000_4096_0211.jpg", # Date mismatch
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/13/19/20251319_120000_4096_0211.jpg", # Invalid month
"https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_250000_4096_0211.jpg", # Invalid hour
]
for url in invalid_urls:
assert not self.generator.validate_url(url), f"URL should be invalid: {url}"
def test_extract_metadata_from_url(self):
"""Test metadata extraction from valid URLs."""
url = "https://sdo.gsfc.nasa.gov/assets/img/browse/2025/12/19/20251219_143000_4096_0211.jpg"
date, time_sequence = self.generator.extract_metadata_from_url(url)
assert date == datetime(2025, 12, 19)
assert time_sequence == "143000"
def test_extract_metadata_invalid_url(self):
"""Test metadata extraction from invalid URLs."""
invalid_url = "https://example.com/invalid.jpg"
date, time_sequence = self.generator.extract_metadata_from_url(invalid_url)
assert date is None
assert time_sequence is None
def test_generate_daily_urls(self):
"""Test generation of URLs for a single day."""
date = datetime(2025, 12, 19)
urls = self.generator.generate_daily_urls(date)
# Should generate 360 URLs (24 hours * 5 minutes * 3 seconds = 360)
assert len(urls) == 360
# Check first and last URLs
assert "20251219_000000_4096_0211.jpg" in urls[0]
assert "20251219_234859_4096_0211.jpg" in urls[-1]
# All URLs should be valid
for url in urls:
assert self.generator.validate_url(url)
def test_generate_last_month_urls(self):
"""Test generation of URLs for the last 30 days."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_last_month_urls(end_date)
# Should generate 30 days * 360 URLs per day = 10800 URLs
assert len(urls) == 30 * 360
# All URLs should be valid
sample_urls = urls[::500] # Test every 500th URL for performance
for url in sample_urls:
assert self.generator.validate_url(url)
def test_last_month_date_range(self):
"""Test that last month URLs cover the correct date range."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_last_month_urls(end_date)
# Extract dates from first and last URLs
first_date, _ = self.generator.extract_metadata_from_url(urls[0])
last_date, _ = self.generator.extract_metadata_from_url(urls[-1])
expected_start = end_date - timedelta(days=29) # 30 days means 29 days back + end date
assert first_date.date() == expected_start.date()
assert last_date.date() == end_date.date()
def test_generate_default_urls(self):
"""Test generation of URLs for the default range (1 day)."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_default_urls(end_date)
# Should generate 1 day * 360 URLs per day = 360 URLs
assert len(urls) == 360
# All URLs should be for the same date
for url in urls[::50]: # Test every 50th URL for performance
date, _ = self.generator.extract_metadata_from_url(url)
assert date.date() == end_date.date()
def test_generate_date_range_urls_custom(self):
"""Test generation of URLs for custom date ranges."""
end_date = datetime(2025, 12, 19)
# Test 3 days
urls_3_days = self.generator.generate_date_range_urls(3, end_date)
assert len(urls_3_days) == 3 * 360
# Test 7 days
urls_7_days = self.generator.generate_date_range_urls(7, end_date)
assert len(urls_7_days) == 7 * 360
def test_generate_date_range_urls(self):
"""Test generation of URLs for custom date range."""
end_date = datetime(2025, 12, 19)
days = 7 # One week
urls = self.generator.generate_date_range_urls(days, end_date)
# Should generate 7 days * 360 URLs per day = 2520 URLs
assert len(urls) == 7 * 360
# Extract dates from first and last URLs
first_date, _ = self.generator.extract_metadata_from_url(urls[0])
last_date, _ = self.generator.extract_metadata_from_url(urls[-1])
expected_start = end_date - timedelta(days=6) # 7 days means 6 days back + end date
assert first_date.date() == expected_start.date()
assert last_date.date() == end_date.date()
def test_generate_date_range_urls_single_day(self):
"""Test generation of URLs for single day range."""
end_date = datetime(2025, 12, 19)
urls = self.generator.generate_date_range_urls(1, end_date)
# Should generate 1 day * 360 URLs per day = 360 URLs
assert len(urls) == 1 * 360
# All URLs should be valid
sample_urls = urls[::100] # Test every 100th URL for performance
for url in sample_urls:
assert self.generator.validate_url(url) |