waterdb

Sleeping

waterdb / tests /test_main.py

github-actions[bot]

Fresh start without shapefiles

5d4e96b over 1 year ago

3.67 kB

	import numpy as np
	import pandas as pd
	import pytest

	from main import create_station_stats


	@pytest.fixture
	def sample_pivoted_data():
	"""Create a sample pivoted dataset that matches the expected structure"""
	# Create sample data
	index = pd.MultiIndex.from_product(
	[
	["1.00", "3.20"], # Station_Number
	[
	"Depth, Secchi Disk Depth",
	"Temperature, Water",
	"Dissolved Oxygen",
	"Turbidity",
	"Salinity",
	"pH",
	], # Org_Analyte_Name
	],
	names=["Station_Number", "Org_Analyte_Name"],
	)

	# Create MultiIndex columns
	columns = pd.MultiIndex.from_product(
	[
	["count", "max", "mean", "min"], # Aggregation functions
	["Bottom", "Surface"], # Sample_Position
	]
	)

	# Create sample data with consistent values
	data = np.full((len(index), len(columns)), 10.0)

	# Create DataFrame first
	df = pd.DataFrame(data, index=index, columns=columns)

	# Set count values to 100 using proper MultiIndex access
	df.loc[:, ("count", "Bottom")] = 100
	df.loc[:, ("count", "Surface")] = 100

	return df


	def test_create_station_stats_basic(sample_pivoted_data):
	"""Test basic functionality of create_station_stats"""
	station = "3.20"
	result = create_station_stats(sample_pivoted_data, station)

	# Check basic structure
	assert isinstance(result, pd.DataFrame)
	assert len(result) == 4 # Average, Maximum, Minimum, n=
	assert "Station" in result.columns
	assert "Statistic" in result.columns


	def test_create_station_stats_values(sample_pivoted_data):
	"""Test that values are correctly mapped from pivoted data"""
	station = "3.20"
	result = create_station_stats(sample_pivoted_data, station)

	# Check specific values for Dissolved Oxygen
	surface_do = result["Dissolved Oxygen (mg/L) Surface"].tolist()
	assert surface_do == [10.0, 10.0, 10.0, 100] # mean, max, min, count


	def test_create_station_stats_columns(sample_pivoted_data):
	"""Test that all expected columns are present"""
	station = "3.20"
	result = create_station_stats(sample_pivoted_data, station)

	expected_columns = {
	"Station",
	"Statistic",
	"Secchi Depth (feet)",
	"Temperature (°C) Surface",
	"Temperature (°C) Bottom",
	"Dissolved Oxygen (mg/L) Surface",
	"Dissolved Oxygen (mg/L) Bottom",
	"Turbidity (NTU) Surface",
	"Turbidity (NTU) Bottom",
	"Salinity (ppt) Surface",
	"Salinity (ppt) Bottom",
	"pH Surface",
	"pH Bottom",
	}

	assert set(result.columns) == expected_columns


	def test_create_station_stats_missing_data(sample_pivoted_data):
	"""Test handling of missing data"""
	station = "3.20"
	# Introduce some NaN values
	sample_pivoted_data.loc[(station, "pH"), ("mean", "Surface")] = np.nan

	result = create_station_stats(sample_pivoted_data, station)
	assert pd.isna(result["pH Surface"][0]) # Check if NaN is preserved


	def test_create_station_stats_statistics(sample_pivoted_data):
	"""Test that statistics are in correct order"""
	station = "3.20"
	result = create_station_stats(sample_pivoted_data, station)

	expected_statistics = ["Average", "Maximum", "Minimum", "n="]
	assert result["Statistic"].tolist() == expected_statistics


	def test_create_station_stats_invalid_station(sample_pivoted_data):
	"""Test behavior with invalid station"""
	with pytest.raises(KeyError):
	create_station_stats(sample_pivoted_data, "invalid_station")