mosaic / tests /test_settings_upload.py
raylim's picture
fix: resolve column mismatch error in get_settings function
343d8bf
"""Comprehensive tests for settings CSV upload functionality.
This module tests edge cases and error scenarios for the settings upload feature,
including:
- read_settings event handler behavior
- CSV format edge cases (empty, malformed, encoding issues)
- File object edge cases
- Error recovery scenarios
"""
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import pandas as pd
import pytest
class TestReadSettingsHandler:
"""Test the read_settings event handler directly."""
def test_read_settings_with_none_returns_none(self):
"""Test read_settings handler logic returns None when file is None."""
# Test the handler logic directly
# Simulate the read_settings function from app.py
def read_settings(file):
if file is None:
return None
from mosaic.ui.utils import load_settings
df = load_settings(file.name if hasattr(file, "name") else file)
return df # In actual app, returns gr.Dataframe(df, visible=True)
result = read_settings(None)
assert result is None
def test_read_settings_with_file_object_with_name(self):
"""Test read_settings handles file object with .name attribute."""
from mosaic.ui.utils import load_settings
# Create temporary CSV
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
# Create mock file object with .name attribute
mock_file = Mock()
mock_file.name = temp_path
# Simulate read_settings handler
def read_settings(file):
if file is None:
return None
df = load_settings(file.name if hasattr(file, "name") else file)
return df
result = read_settings(mock_file)
# Verify DataFrame was loaded
assert isinstance(result, pd.DataFrame)
assert len(result) == 1
assert result["Slide"].iloc[0] == "slide1.svs"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_read_settings_with_file_path_string(self):
"""Test read_settings handles direct file path string."""
from mosaic.ui.utils import load_settings
# Create temporary CSV
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
# Simulate read_settings handler with direct path
def read_settings(file):
if file is None:
return None
df = load_settings(file.name if hasattr(file, "name") else file)
return df
result = read_settings(temp_path)
# Verify DataFrame was loaded
assert isinstance(result, pd.DataFrame)
assert len(result) == 1
assert result["Slide"].iloc[0] == "slide1.svs"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_read_settings_with_file_object_without_name_attribute(self):
"""Test read_settings handles file-like object without .name attribute."""
from mosaic.ui.utils import load_settings
# Create temporary CSV
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
# Simulate read_settings handler (file without .name falls back to using file directly)
def read_settings(file):
if file is None:
return None
df = load_settings(file.name if hasattr(file, "name") else file)
return df
# When file doesn't have .name, the function uses the file object directly
# In practice, Gradio always provides .name, but test the fallback
result = read_settings(temp_path)
assert isinstance(result, pd.DataFrame)
assert len(result) == 1
finally:
Path(temp_path).unlink(missing_ok=True)
class TestCsvFormatEdgeCases:
"""Test CSV format edge cases and error handling."""
def test_load_settings_empty_csv_file(self):
"""Test loading completely empty CSV file."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
# Write nothing - completely empty file
f.flush()
temp_path = f.name
try:
# Empty CSV should raise an error
with pytest.raises(
Exception
): # Could be pd.errors.EmptyDataError or ValueError
load_settings(temp_path)
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_headers_only_csv(self):
"""Test CSV with only headers but no data rows."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
# No data rows
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should return empty DataFrame with correct columns
assert isinstance(df, pd.DataFrame)
assert len(df) == 0
assert "Slide" in df.columns
assert "Site Type" in df.columns
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_extra_columns(self):
"""Test CSV with extra unknown columns (should be filtered)."""
from mosaic.ui.utils import load_settings, SETTINGS_COLUMNS
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype,ExtraColumn1,ExtraColumn2\n")
f.write("slide1.svs,Primary,Male,Unknown,extra_value1,extra_value2\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Extra columns should be filtered out
assert "ExtraColumn1" not in df.columns
assert "ExtraColumn2" not in df.columns
# Only SETTINGS_COLUMNS should remain
assert list(df.columns) == SETTINGS_COLUMNS
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_special_characters(self):
"""Test CSV with special characters in values."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
# Special characters in slide name
f.write("slide-1_test@2024.svs,Primary,Unknown\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should handle special characters correctly
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
assert df["Slide"].iloc[0] == "slide-1_test@2024.svs"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_quotes_in_values(self):
"""Test CSV with quoted values."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
# Value with comma inside quotes
f.write('"slide1,with,commas.svs",Primary,Unknown\n')
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should parse quoted values correctly
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
assert df["Slide"].iloc[0] == "slide1,with,commas.svs"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_missing_values(self):
"""Test CSV with missing/empty values in optional columns."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype,Segmentation Config\n")
# Empty values for optional columns
f.write("slide1.svs,Primary,Male,Unknown,\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should load CSV with empty values preserved
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
# Empty strings should be preserved (validation will handle defaults later)
assert df["Segmentation Config"].iloc[0] == ""
# assert df["IHC Subtype"].iloc[0] == "" # Not yet in use
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_whitespace(self):
"""Test CSV with extra whitespace around values."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
# Values with leading/trailing whitespace
f.write(" slide1.svs , Primary , Male , Unknown \n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# pandas should handle whitespace
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
# Check if whitespace is preserved or stripped (depends on pandas behavior)
slide_value = df["Slide"].iloc[0]
assert "slide1.svs" in slide_value
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_different_line_endings(self):
"""Test CSV with different line ending styles (CRLF, LF)."""
from mosaic.ui.utils import load_settings
# Test with CRLF (Windows style)
with tempfile.NamedTemporaryFile(mode="wb", suffix=".csv", delete=False) as f:
f.write(b"Slide,Site Type,Sex,Cancer Subtype\r\n")
f.write(b"slide1.svs,Primary,Male,Unknown\r\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
assert df["Slide"].iloc[0] == "slide1.svs"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_tab_delimiter_fails(self):
"""Test that TSV (tab-delimited) file raises error."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
# Use tabs instead of commas
f.write("Slide\tSite Type\tCancer Subtype\n")
f.write("slide1.svs\tPrimary\tUnknown\n")
f.flush()
temp_path = f.name
try:
# Should fail because columns won't be parsed correctly
with pytest.raises(ValueError, match="Missing required column"):
load_settings(temp_path)
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_large_csv(self):
"""Test loading CSV with many rows (performance test)."""
from mosaic.ui.utils import load_settings
num_rows = 1000
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
for i in range(num_rows):
f.write(f"slide{i}.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should handle large CSV without issues
assert isinstance(df, pd.DataFrame)
assert len(df) == num_rows
finally:
Path(temp_path).unlink(missing_ok=True)
class TestEncodingEdgeCases:
"""Test CSV encoding edge cases."""
def test_load_settings_utf8_csv(self):
"""Test loading UTF-8 encoded CSV (should work)."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(
mode="w", encoding="utf-8", suffix=".csv", delete=False
) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
finally:
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_csv_with_unicode_characters(self):
"""Test CSV with Unicode characters in values."""
from mosaic.ui.utils import load_settings
with tempfile.NamedTemporaryFile(
mode="w", encoding="utf-8", suffix=".csv", delete=False
) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
# Unicode characters in slide name
f.write("slide_café_™_测试.svs,Primary,Unknown\n")
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
# Should handle Unicode correctly
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
assert "café" in df["Slide"].iloc[0]
finally:
Path(temp_path).unlink(missing_ok=True)
class TestErrorRecoveryScenarios:
"""Test error recovery and user experience flows."""
def test_consecutive_csv_uploads(self):
"""Test uploading multiple CSVs consecutively."""
from mosaic.ui.utils import load_settings
# First CSV
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path1 = f.name
# Second CSV (different data)
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide2.svs,Metastatic,Female,LUAD\n")
f.write("slide3.svs,Primary,Male,BRCA\n")
f.flush()
temp_path2 = f.name
try:
# Load first CSV
df1 = load_settings(temp_path1)
assert len(df1) == 1
assert df1["Slide"].iloc[0] == "slide1.svs"
# Load second CSV (should completely replace)
df2 = load_settings(temp_path2)
assert len(df2) == 2
assert df2["Slide"].iloc[0] == "slide2.svs"
assert df2["Slide"].iloc[1] == "slide3.svs"
# Should be independent DataFrames
assert len(df1) == 1 # df1 unchanged
finally:
Path(temp_path1).unlink(missing_ok=True)
Path(temp_path2).unlink(missing_ok=True)
def test_load_settings_after_failed_upload(self):
"""Test successful load after a failed upload attempt."""
from mosaic.ui.utils import load_settings
# First attempt: invalid CSV (missing required columns)
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("InvalidColumn\n")
f.write("value\n")
f.flush()
invalid_path = f.name
# Second attempt: valid CSV
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
valid_path = f.name
try:
# First load should fail
with pytest.raises(ValueError, match="Missing required column"):
load_settings(invalid_path)
# Second load should succeed
df = load_settings(valid_path)
assert isinstance(df, pd.DataFrame)
assert len(df) == 1
assert df["Slide"].iloc[0] == "slide1.svs"
finally:
Path(invalid_path).unlink(missing_ok=True)
Path(valid_path).unlink(missing_ok=True)
def test_load_settings_with_file_permission_error(self):
"""Test behavior when file cannot be read due to permissions."""
from mosaic.ui.utils import load_settings
import os
if os.name == "nt":
# Skip on Windows due to different permission model
pytest.skip("Permission test not applicable on Windows")
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n")
f.flush()
temp_path = f.name
try:
# Remove read permissions
os.chmod(temp_path, 0o000)
# Should raise permission error
with pytest.raises(PermissionError):
load_settings(temp_path)
finally:
# Restore permissions for cleanup
os.chmod(temp_path, 0o644)
Path(temp_path).unlink(missing_ok=True)
def test_load_settings_nonexistent_file(self):
"""Test loading from non-existent file path."""
from mosaic.ui.utils import load_settings
nonexistent_path = "/tmp/this_file_does_not_exist_12345.csv"
# Should raise FileNotFoundError
with pytest.raises(FileNotFoundError):
load_settings(nonexistent_path)
class TestValidationWithUpload:
"""Test validation integration with CSV upload."""
def test_csv_upload_triggers_validation(self, mock_cancer_subtype_maps):
"""Test that uploaded CSV is automatically validated."""
from mosaic.ui.utils import load_settings, validate_settings
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
mock_cancer_subtype_maps
)
# Create CSV with invalid values
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype,Segmentation Config\n")
f.write("slide1.svs,InvalidSite,Male,InvalidSubtype,InvalidConfig\n")
f.flush()
temp_path = f.name
try:
# Load and validate
df = load_settings(temp_path)
validated_df = validate_settings(
df, cancer_subtype_name_map, cancer_subtypes, reversed_map
)
# Should apply defaults for invalid values
assert validated_df["Site Type"].iloc[0] == "Primary"
assert validated_df["Cancer Subtype"].iloc[0] == "Unknown"
assert validated_df["Segmentation Config"].iloc[0] == "Biopsy"
finally:
Path(temp_path).unlink(missing_ok=True)
def test_csv_upload_with_partial_invalid_data(self, mock_cancer_subtype_maps):
"""Test CSV with mix of valid and invalid rows."""
from mosaic.ui.utils import load_settings, validate_settings
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
mock_cancer_subtype_maps
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
f.write("Slide,Site Type,Sex,Cancer Subtype\n")
f.write("slide1.svs,Primary,Male,Unknown\n") # Valid
f.write("slide2.svs,InvalidSite,Female,InvalidSubtype\n") # Invalid
f.write("slide3.svs,Metastatic,Male,LUAD\n") # Valid
f.flush()
temp_path = f.name
try:
df = load_settings(temp_path)
validated_df = validate_settings(
df, cancer_subtype_name_map, cancer_subtypes, reversed_map
)
# All rows should be present
assert len(validated_df) == 3
# Valid rows unchanged
assert validated_df.iloc[0]["Site Type"] == "Primary"
assert validated_df.iloc[2]["Site Type"] == "Metastatic"
# Invalid row corrected with defaults
assert validated_df.iloc[1]["Site Type"] == "Primary"
assert validated_df.iloc[1]["Cancer Subtype"] == "Unknown"
finally:
Path(temp_path).unlink(missing_ok=True)