waterdb / tests /test_main.py
github-actions[bot]
Fresh start without shapefiles
5d4e96b
import numpy as np
import pandas as pd
import pytest
from main import create_station_stats
@pytest.fixture
def sample_pivoted_data():
"""Create a sample pivoted dataset that matches the expected structure"""
# Create sample data
index = pd.MultiIndex.from_product(
[
["1.00", "3.20"], # Station_Number
[
"Depth, Secchi Disk Depth",
"Temperature, Water",
"Dissolved Oxygen",
"Turbidity",
"Salinity",
"pH",
], # Org_Analyte_Name
],
names=["Station_Number", "Org_Analyte_Name"],
)
# Create MultiIndex columns
columns = pd.MultiIndex.from_product(
[
["count", "max", "mean", "min"], # Aggregation functions
["Bottom", "Surface"], # Sample_Position
]
)
# Create sample data with consistent values
data = np.full((len(index), len(columns)), 10.0)
# Create DataFrame first
df = pd.DataFrame(data, index=index, columns=columns)
# Set count values to 100 using proper MultiIndex access
df.loc[:, ("count", "Bottom")] = 100
df.loc[:, ("count", "Surface")] = 100
return df
def test_create_station_stats_basic(sample_pivoted_data):
"""Test basic functionality of create_station_stats"""
station = "3.20"
result = create_station_stats(sample_pivoted_data, station)
# Check basic structure
assert isinstance(result, pd.DataFrame)
assert len(result) == 4 # Average, Maximum, Minimum, n=
assert "Station" in result.columns
assert "Statistic" in result.columns
def test_create_station_stats_values(sample_pivoted_data):
"""Test that values are correctly mapped from pivoted data"""
station = "3.20"
result = create_station_stats(sample_pivoted_data, station)
# Check specific values for Dissolved Oxygen
surface_do = result["Dissolved Oxygen (mg/L) Surface"].tolist()
assert surface_do == [10.0, 10.0, 10.0, 100] # mean, max, min, count
def test_create_station_stats_columns(sample_pivoted_data):
"""Test that all expected columns are present"""
station = "3.20"
result = create_station_stats(sample_pivoted_data, station)
expected_columns = {
"Station",
"Statistic",
"Secchi Depth (feet)",
"Temperature (°C) Surface",
"Temperature (°C) Bottom",
"Dissolved Oxygen (mg/L) Surface",
"Dissolved Oxygen (mg/L) Bottom",
"Turbidity (NTU) Surface",
"Turbidity (NTU) Bottom",
"Salinity (ppt) Surface",
"Salinity (ppt) Bottom",
"pH Surface",
"pH Bottom",
}
assert set(result.columns) == expected_columns
def test_create_station_stats_missing_data(sample_pivoted_data):
"""Test handling of missing data"""
station = "3.20"
# Introduce some NaN values
sample_pivoted_data.loc[(station, "pH"), ("mean", "Surface")] = np.nan
result = create_station_stats(sample_pivoted_data, station)
assert pd.isna(result["pH Surface"][0]) # Check if NaN is preserved
def test_create_station_stats_statistics(sample_pivoted_data):
"""Test that statistics are in correct order"""
station = "3.20"
result = create_station_stats(sample_pivoted_data, station)
expected_statistics = ["Average", "Maximum", "Minimum", "n="]
assert result["Statistic"].tolist() == expected_statistics
def test_create_station_stats_invalid_station(sample_pivoted_data):
"""Test behavior with invalid station"""
with pytest.raises(KeyError):
create_station_stats(sample_pivoted_data, "invalid_station")