""" End-to-End Tests for Eurus =========================== These tests use REAL API calls to verify the complete workflow. Requires valid API keys in .env file. Run with: pytest tests/test_e2e.py -v -s Use -s flag to see output from data retrieval. """ import os import pytest import tempfile import shutil from pathlib import Path from datetime import datetime, timedelta from dotenv import load_dotenv # Load .env file load_dotenv() # ============================================================================ # FIXTURES # ============================================================================ @pytest.fixture(scope="module") def temp_data_dir(): """Create temporary data directory for tests.""" temp_dir = tempfile.mkdtemp(prefix="eurus_e2e_") yield temp_dir # Cleanup after all tests shutil.rmtree(temp_dir, ignore_errors=True) @pytest.fixture(scope="module") def has_arraylake_key(): """Check if Arraylake API key is available.""" key = os.environ.get("ARRAYLAKE_API_KEY") if not key: pytest.skip("ARRAYLAKE_API_KEY not found in environment") return True # ============================================================================ # E2E: ERA5 DATA RETRIEVAL # ============================================================================ class TestERA5Retrieval: """End-to-end tests for ERA5 data retrieval.""" @pytest.mark.slow def test_retrieve_sst_temporal_small_region(self, has_arraylake_key, temp_data_dir): """ E2E Test: Retrieve SST data for a small region and short time period. This tests the complete retrieval pipeline. """ from eurus.retrieval import retrieve_era5_data from eurus.memory import reset_memory # Reset memory for clean state reset_memory() # Use a small request to minimize download time result = retrieve_era5_data( query_type="temporal", variable_id="sst", start_date="2023-01-01", end_date="2023-01-07", # Just 1 week min_latitude=25.0, max_latitude=30.0, min_longitude=260.0, # Gulf of Mexico max_longitude=265.0, ) print(f"\n=== ERA5 Retrieval Result ===\n{result}\n") # Verify success assert "SUCCESS" in result or "CACHE HIT" in result assert "sst" in result.lower() assert ".zarr" in result @pytest.mark.slow def test_retrieve_t2m_spatial(self, has_arraylake_key, temp_data_dir): """ E2E Test: Retrieve 2m temperature as spatial data. Tests spatial query type. """ from eurus.retrieval import retrieve_era5_data from eurus.memory import reset_memory reset_memory() result = retrieve_era5_data( query_type="spatial", variable_id="t2", # 2m temperature start_date="2023-06-01", end_date="2023-06-03", # Just 3 days min_latitude=40.0, max_latitude=50.0, min_longitude=0.0, max_longitude=10.0, # Western Europe ) print(f"\n=== T2M Spatial Result ===\n{result}\n") assert "SUCCESS" in result or "CACHE HIT" in result @pytest.mark.slow def test_retrieve_and_load_dataset(self, has_arraylake_key, temp_data_dir): """ E2E Test: Retrieve data and verify it can be loaded with xarray. Tests the full data integrity pipeline. """ import xarray as xr from eurus.retrieval import retrieve_era5_data from eurus.memory import reset_memory, get_memory reset_memory() result = retrieve_era5_data( query_type="temporal", variable_id="sst", start_date="2023-02-01", end_date="2023-02-05", min_latitude=20.0, max_latitude=25.0, min_longitude=270.0, max_longitude=275.0, ) assert "SUCCESS" in result or "CACHE HIT" in result # Extract path from result # Look for the path in the result string lines = result.split('\n') path = None for line in lines: if "Path:" in line: path = line.split("Path:")[-1].strip() break if ".zarr" in line and "Load with" not in line: # Try to find zarr path parts = line.split() for part in parts: if ".zarr" in part: path = part.strip() break if path and os.path.exists(path): # Load and verify dataset ds = xr.open_dataset(path, engine='zarr') print(f"\n=== Loaded Dataset ===") print(f"Variables: {list(ds.data_vars)}") print(f"Dimensions: {dict(ds.dims)}") print(f"Time range: {ds.time.values[0]} to {ds.time.values[-1]}") assert 'sst' in ds.data_vars assert 'time' in ds.dims assert ds.dims['time'] > 0 ds.close() # ============================================================================ # E2E: PYTHON REPL ANALYSIS # ============================================================================ class TestREPLAnalysis: """End-to-end tests for REPL-based data analysis.""" def test_repl_numpy_computation(self): """ E2E Test: Use REPL to perform numpy computation. """ from eurus.tools.repl import PythonREPLTool repl = PythonREPLTool() code = """ import numpy as np data = np.random.randn(100) mean = np.mean(data) std = np.std(data) print(f"Mean: {mean:.4f}, Std: {std:.4f}") """ result = repl._run(code) print(f"\n=== REPL Result ===\n{result}\n") assert "Mean:" in result assert "Std:" in result assert "Error" not in result def test_repl_pandas_dataframe(self): """ E2E Test: Use REPL to create and manipulate pandas DataFrame. """ from eurus.tools.repl import PythonREPLTool repl = PythonREPLTool() code = """ import pandas as pd import numpy as np df = pd.DataFrame({ 'date': pd.date_range('2023-01-01', periods=10), 'temperature': np.random.randn(10) * 5 + 20, 'humidity': np.random.randn(10) * 10 + 60 }) print("DataFrame created:") print(df.head()) print(f"\\nStats: Mean temp = {df['temperature'].mean():.2f}") """ result = repl._run(code) print(f"\n=== Pandas Result ===\n{result}\n") assert "DataFrame created" in result assert "temperature" in result assert "Error" not in result @pytest.mark.slow def test_repl_load_and_analyze_data(self, has_arraylake_key): """ E2E Test: Retrieve ERA5 data, then analyze it in REPL. Full workflow test. """ from eurus.retrieval import retrieve_era5_data from eurus.tools.repl import PythonREPLTool from eurus.memory import reset_memory import xarray as xr reset_memory() # Step 1: Retrieve data result = retrieve_era5_data( query_type="temporal", variable_id="sst", start_date="2023-03-01", end_date="2023-03-05", min_latitude=25.0, max_latitude=28.0, min_longitude=265.0, max_longitude=268.0, ) assert "SUCCESS" in result or "CACHE HIT" in result # Extract path path = None for line in result.split('\n'): if "Path:" in line: path = line.split("Path:")[-1].strip() break if not path or not os.path.exists(path): pytest.skip("Could not extract data path") # Step 2: Analyze in REPL repl = PythonREPLTool() analysis_code = f""" import xarray as xr import numpy as np # Load the dataset ds = xr.open_dataset('{path}', engine='zarr') data = ds['sst'] # Calculate statistics spatial_mean = data.mean(dim=['latitude', 'longitude']) time_mean = data.mean(dim='time') print("=== SST Analysis ===") print(f"Time points: {{len(data.time)}}") print(f"Spatial shape: {{data.shape}}") print(f"Overall mean: {{float(data.mean()):.2f}} K") print(f"Overall std: {{float(data.std()):.2f}} K") print(f"Min: {{float(data.min()):.2f}} K, Max: {{float(data.max()):.2f}} K") """ analysis_result = repl._run(analysis_code) print(f"\n=== Analysis Result ===\n{analysis_result}\n") assert "SST Analysis" in analysis_result assert "Error" not in analysis_result or "Security" not in analysis_result # ============================================================================ # E2E: MEMORY PERSISTENCE # ============================================================================ class TestMemoryPersistence: """End-to-end tests for memory and dataset tracking.""" @pytest.mark.slow def test_memory_tracks_downloaded_data(self, has_arraylake_key): """ E2E Test: Verify memory tracks downloaded datasets. """ from eurus.retrieval import retrieve_era5_data from eurus.memory import reset_memory, get_memory reset_memory() memory = get_memory() # Initial state - no datasets initial_datasets = memory.list_datasets() # Download data result = retrieve_era5_data( query_type="temporal", variable_id="sst", start_date="2023-04-01", end_date="2023-04-03", min_latitude=30.0, max_latitude=32.0, min_longitude=275.0, max_longitude=278.0, ) # Check memory registered the dataset datasets = memory.list_datasets() print(f"\n=== Registered Datasets ===\n{datasets}\n") # Should have at least one dataset now if "SUCCESS" in result: assert len(datasets) > len(initial_datasets) # ============================================================================ # E2E: ROUTING (if scgraph installed) # ============================================================================ class TestRouting: """End-to-end tests for maritime routing.""" def test_routing_without_deps(self): """ E2E Test: Verify routing handles missing dependencies gracefully. """ from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route if not HAS_ROUTING_DEPS: # Should return helpful error message result = calculate_maritime_route( origin_lat=53.5, origin_lon=8.5, dest_lat=52.4, dest_lon=4.9, month=6 ) print(f"\n=== Routing (no deps) ===\n{result}\n") assert "scgraph" in result.lower() or "install" in result.lower() else: pytest.skip("scgraph is installed, skipping no-deps test") # ============================================================================ # RUN WITH: pytest tests/test_e2e.py -v -s --tb=short # Add -m "not slow" to skip slow tests # ============================================================================ if __name__ == "__main__": pytest.main([__file__, "-v", "-s", "--tb=short"])