Spaces:

CliDyn
/

Eurus

Sleeping

File size: 11,749 Bytes

ab07cb1

"""
End-to-End Tests for Eurus
===========================
These tests use REAL API calls to verify the complete workflow.
Requires valid API keys in .env file.

Run with: pytest tests/test_e2e.py -v -s
Use -s flag to see output from data retrieval.
"""

import os
import pytest
import tempfile
import shutil
from pathlib import Path
from datetime import datetime, timedelta
from dotenv import load_dotenv

# Load .env file
load_dotenv()


# ============================================================================
# FIXTURES
# ============================================================================

@pytest.fixture(scope="module")
def temp_data_dir():
    """Create temporary data directory for tests."""
    temp_dir = tempfile.mkdtemp(prefix="eurus_e2e_")
    yield temp_dir
    # Cleanup after all tests
    shutil.rmtree(temp_dir, ignore_errors=True)


@pytest.fixture(scope="module")
def has_arraylake_key():
    """Check if Arraylake API key is available."""
    key = os.environ.get("ARRAYLAKE_API_KEY")
    if not key:
        pytest.skip("ARRAYLAKE_API_KEY not found in environment")
    return True


# ============================================================================
# E2E: ERA5 DATA RETRIEVAL
# ============================================================================

class TestERA5Retrieval:
    """End-to-end tests for ERA5 data retrieval."""
    
    @pytest.mark.slow
    def test_retrieve_sst_temporal_small_region(self, has_arraylake_key, temp_data_dir):
        """
        E2E Test: Retrieve SST data for a small region and short time period.
        This tests the complete retrieval pipeline.
        """
        from eurus.retrieval import retrieve_era5_data
        from eurus.memory import reset_memory
        
        # Reset memory for clean state
        reset_memory()
        
        # Use a small request to minimize download time
        result = retrieve_era5_data(
            query_type="temporal",
            variable_id="sst",
            start_date="2023-01-01",
            end_date="2023-01-07",  # Just 1 week
            min_latitude=25.0,
            max_latitude=30.0,
            min_longitude=260.0,  # Gulf of Mexico
            max_longitude=265.0,
        )
        
        print(f"\n=== ERA5 Retrieval Result ===\n{result}\n")
        
        # Verify success
        assert "SUCCESS" in result or "CACHE HIT" in result
        assert "sst" in result.lower()
        assert ".zarr" in result
        
    @pytest.mark.slow
    def test_retrieve_t2m_spatial(self, has_arraylake_key, temp_data_dir):
        """
        E2E Test: Retrieve 2m temperature as spatial data.
        Tests spatial query type.
        """
        from eurus.retrieval import retrieve_era5_data
        from eurus.memory import reset_memory
        
        reset_memory()
        
        result = retrieve_era5_data(
            query_type="spatial",
            variable_id="t2",  # 2m temperature
            start_date="2023-06-01",
            end_date="2023-06-03",  # Just 3 days
            min_latitude=40.0,
            max_latitude=50.0,
            min_longitude=0.0,
            max_longitude=10.0,  # Western Europe
        )
        
        print(f"\n=== T2M Spatial Result ===\n{result}\n")
        
        assert "SUCCESS" in result or "CACHE HIT" in result
        
    @pytest.mark.slow
    def test_retrieve_and_load_dataset(self, has_arraylake_key, temp_data_dir):
        """
        E2E Test: Retrieve data and verify it can be loaded with xarray.
        Tests the full data integrity pipeline.
        """
        import xarray as xr
        from eurus.retrieval import retrieve_era5_data
        from eurus.memory import reset_memory, get_memory
        
        reset_memory()
        
        result = retrieve_era5_data(
            query_type="temporal",
            variable_id="sst",
            start_date="2023-02-01",
            end_date="2023-02-05",
            min_latitude=20.0,
            max_latitude=25.0,
            min_longitude=270.0,
            max_longitude=275.0,
        )
        
        assert "SUCCESS" in result or "CACHE HIT" in result
        
        # Extract path from result
        # Look for the path in the result string
        lines = result.split('\n')
        path = None
        for line in lines:
            if "Path:" in line:
                path = line.split("Path:")[-1].strip()
                break
            if ".zarr" in line and "Load with" not in line:
                # Try to find zarr path
                parts = line.split()
                for part in parts:
                    if ".zarr" in part:
                        path = part.strip()
                        break
        
        if path and os.path.exists(path):
            # Load and verify dataset
            ds = xr.open_dataset(path, engine='zarr')
            
            print(f"\n=== Loaded Dataset ===")
            print(f"Variables: {list(ds.data_vars)}")
            print(f"Dimensions: {dict(ds.dims)}")
            print(f"Time range: {ds.time.values[0]} to {ds.time.values[-1]}")
            
            assert 'sst' in ds.data_vars
            assert 'time' in ds.dims
            assert ds.dims['time'] > 0
            
            ds.close()


# ============================================================================
# E2E: PYTHON REPL ANALYSIS
# ============================================================================

class TestREPLAnalysis:
    """End-to-end tests for REPL-based data analysis."""
    
    def test_repl_numpy_computation(self):
        """
        E2E Test: Use REPL to perform numpy computation.
        """
        from eurus.tools.repl import PythonREPLTool
        
        repl = PythonREPLTool()
        
        code = """
import numpy as np
data = np.random.randn(100)
mean = np.mean(data)
std = np.std(data)
print(f"Mean: {mean:.4f}, Std: {std:.4f}")
"""
        result = repl._run(code)
        print(f"\n=== REPL Result ===\n{result}\n")
        
        assert "Mean:" in result
        assert "Std:" in result
        assert "Error" not in result
        
    def test_repl_pandas_dataframe(self):
        """
        E2E Test: Use REPL to create and manipulate pandas DataFrame.
        """
        from eurus.tools.repl import PythonREPLTool
        
        repl = PythonREPLTool()
        
        code = """
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=10),
    'temperature': np.random.randn(10) * 5 + 20,
    'humidity': np.random.randn(10) * 10 + 60
})

print("DataFrame created:")
print(df.head())
print(f"\\nStats: Mean temp = {df['temperature'].mean():.2f}")
"""
        result = repl._run(code)
        print(f"\n=== Pandas Result ===\n{result}\n")
        
        assert "DataFrame created" in result
        assert "temperature" in result
        assert "Error" not in result
        
    @pytest.mark.slow
    def test_repl_load_and_analyze_data(self, has_arraylake_key):
        """
        E2E Test: Retrieve ERA5 data, then analyze it in REPL.
        Full workflow test.
        """
        from eurus.retrieval import retrieve_era5_data
        from eurus.tools.repl import PythonREPLTool
        from eurus.memory import reset_memory
        import xarray as xr
        
        reset_memory()
        
        # Step 1: Retrieve data
        result = retrieve_era5_data(
            query_type="temporal",
            variable_id="sst",
            start_date="2023-03-01",
            end_date="2023-03-05",
            min_latitude=25.0,
            max_latitude=28.0,
            min_longitude=265.0,
            max_longitude=268.0,
        )
        
        assert "SUCCESS" in result or "CACHE HIT" in result
        
        # Extract path
        path = None
        for line in result.split('\n'):
            if "Path:" in line:
                path = line.split("Path:")[-1].strip()
                break
                
        if not path or not os.path.exists(path):
            pytest.skip("Could not extract data path")
            
        # Step 2: Analyze in REPL
        repl = PythonREPLTool()
        
        analysis_code = f"""
import xarray as xr
import numpy as np

# Load the dataset
ds = xr.open_dataset('{path}', engine='zarr')
data = ds['sst']

# Calculate statistics
spatial_mean = data.mean(dim=['latitude', 'longitude'])
time_mean = data.mean(dim='time')

print("=== SST Analysis ===")
print(f"Time points: {{len(data.time)}}")
print(f"Spatial shape: {{data.shape}}")
print(f"Overall mean: {{float(data.mean()):.2f}} K")
print(f"Overall std: {{float(data.std()):.2f}} K")
print(f"Min: {{float(data.min()):.2f}} K, Max: {{float(data.max()):.2f}} K")
"""
        analysis_result = repl._run(analysis_code)
        print(f"\n=== Analysis Result ===\n{analysis_result}\n")
        
        assert "SST Analysis" in analysis_result
        assert "Error" not in analysis_result or "Security" not in analysis_result




# ============================================================================
# E2E: MEMORY PERSISTENCE
# ============================================================================

class TestMemoryPersistence:
    """End-to-end tests for memory and dataset tracking."""
    
    @pytest.mark.slow
    def test_memory_tracks_downloaded_data(self, has_arraylake_key):
        """
        E2E Test: Verify memory tracks downloaded datasets.
        """
        from eurus.retrieval import retrieve_era5_data
        from eurus.memory import reset_memory, get_memory
        
        reset_memory()
        memory = get_memory()
        
        # Initial state - no datasets
        initial_datasets = memory.list_datasets()
        
        # Download data
        result = retrieve_era5_data(
            query_type="temporal",
            variable_id="sst",
            start_date="2023-04-01",
            end_date="2023-04-03",
            min_latitude=30.0,
            max_latitude=32.0,
            min_longitude=275.0,
            max_longitude=278.0,
        )
        
        # Check memory registered the dataset
        datasets = memory.list_datasets()
        print(f"\n=== Registered Datasets ===\n{datasets}\n")
        
        # Should have at least one dataset now
        if "SUCCESS" in result:
            assert len(datasets) > len(initial_datasets)


# ============================================================================
# E2E: ROUTING (if scgraph installed)
# ============================================================================

class TestRouting:
    """End-to-end tests for maritime routing."""
    
    def test_routing_without_deps(self):
        """
        E2E Test: Verify routing handles missing dependencies gracefully.
        """
        from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
        
        if not HAS_ROUTING_DEPS:
            # Should return helpful error message
            result = calculate_maritime_route(
                origin_lat=53.5,
                origin_lon=8.5,
                dest_lat=52.4,
                dest_lon=4.9,
                month=6
            )
            print(f"\n=== Routing (no deps) ===\n{result}\n")
            assert "scgraph" in result.lower() or "install" in result.lower()
        else:
            pytest.skip("scgraph is installed, skipping no-deps test")


# ============================================================================
# RUN WITH: pytest tests/test_e2e.py -v -s --tb=short
# Add -m "not slow" to skip slow tests
# ============================================================================

if __name__ == "__main__":
    pytest.main([__file__, "-v", "-s", "--tb=short"])