import numpy as np import pandas as pd import pytest from main import create_station_stats @pytest.fixture def sample_pivoted_data(): """Create a sample pivoted dataset that matches the expected structure""" # Create sample data index = pd.MultiIndex.from_product( [ ["1.00", "3.20"], # Station_Number [ "Depth, Secchi Disk Depth", "Temperature, Water", "Dissolved Oxygen", "Turbidity", "Salinity", "pH", ], # Org_Analyte_Name ], names=["Station_Number", "Org_Analyte_Name"], ) # Create MultiIndex columns columns = pd.MultiIndex.from_product( [ ["count", "max", "mean", "min"], # Aggregation functions ["Bottom", "Surface"], # Sample_Position ] ) # Create sample data with consistent values data = np.full((len(index), len(columns)), 10.0) # Create DataFrame first df = pd.DataFrame(data, index=index, columns=columns) # Set count values to 100 using proper MultiIndex access df.loc[:, ("count", "Bottom")] = 100 df.loc[:, ("count", "Surface")] = 100 return df def test_create_station_stats_basic(sample_pivoted_data): """Test basic functionality of create_station_stats""" station = "3.20" result = create_station_stats(sample_pivoted_data, station) # Check basic structure assert isinstance(result, pd.DataFrame) assert len(result) == 4 # Average, Maximum, Minimum, n= assert "Station" in result.columns assert "Statistic" in result.columns def test_create_station_stats_values(sample_pivoted_data): """Test that values are correctly mapped from pivoted data""" station = "3.20" result = create_station_stats(sample_pivoted_data, station) # Check specific values for Dissolved Oxygen surface_do = result["Dissolved Oxygen (mg/L) Surface"].tolist() assert surface_do == [10.0, 10.0, 10.0, 100] # mean, max, min, count def test_create_station_stats_columns(sample_pivoted_data): """Test that all expected columns are present""" station = "3.20" result = create_station_stats(sample_pivoted_data, station) expected_columns = { "Station", "Statistic", "Secchi Depth (feet)", "Temperature (°C) Surface", "Temperature (°C) Bottom", "Dissolved Oxygen (mg/L) Surface", "Dissolved Oxygen (mg/L) Bottom", "Turbidity (NTU) Surface", "Turbidity (NTU) Bottom", "Salinity (ppt) Surface", "Salinity (ppt) Bottom", "pH Surface", "pH Bottom", } assert set(result.columns) == expected_columns def test_create_station_stats_missing_data(sample_pivoted_data): """Test handling of missing data""" station = "3.20" # Introduce some NaN values sample_pivoted_data.loc[(station, "pH"), ("mean", "Surface")] = np.nan result = create_station_stats(sample_pivoted_data, station) assert pd.isna(result["pH Surface"][0]) # Check if NaN is preserved def test_create_station_stats_statistics(sample_pivoted_data): """Test that statistics are in correct order""" station = "3.20" result = create_station_stats(sample_pivoted_data, station) expected_statistics = ["Average", "Maximum", "Minimum", "n="] assert result["Statistic"].tolist() == expected_statistics def test_create_station_stats_invalid_station(sample_pivoted_data): """Test behavior with invalid station""" with pytest.raises(KeyError): create_station_stats(sample_pivoted_data, "invalid_station")