"""Comprehensive tests for settings CSV upload functionality. This module tests edge cases and error scenarios for the settings upload feature, including: - read_settings event handler behavior - CSV format edge cases (empty, malformed, encoding issues) - File object edge cases - Error recovery scenarios """ import tempfile from pathlib import Path from unittest.mock import Mock, patch, MagicMock import pandas as pd import pytest class TestReadSettingsHandler: """Test the read_settings event handler directly.""" def test_read_settings_with_none_returns_none(self): """Test read_settings handler logic returns None when file is None.""" # Test the handler logic directly # Simulate the read_settings function from app.py def read_settings(file): if file is None: return None from mosaic.ui.utils import load_settings df = load_settings(file.name if hasattr(file, "name") else file) return df # In actual app, returns gr.Dataframe(df, visible=True) result = read_settings(None) assert result is None def test_read_settings_with_file_object_with_name(self): """Test read_settings handles file object with .name attribute.""" from mosaic.ui.utils import load_settings # Create temporary CSV with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: # Create mock file object with .name attribute mock_file = Mock() mock_file.name = temp_path # Simulate read_settings handler def read_settings(file): if file is None: return None df = load_settings(file.name if hasattr(file, "name") else file) return df result = read_settings(mock_file) # Verify DataFrame was loaded assert isinstance(result, pd.DataFrame) assert len(result) == 1 assert result["Slide"].iloc[0] == "slide1.svs" finally: Path(temp_path).unlink(missing_ok=True) def test_read_settings_with_file_path_string(self): """Test read_settings handles direct file path string.""" from mosaic.ui.utils import load_settings # Create temporary CSV with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: # Simulate read_settings handler with direct path def read_settings(file): if file is None: return None df = load_settings(file.name if hasattr(file, "name") else file) return df result = read_settings(temp_path) # Verify DataFrame was loaded assert isinstance(result, pd.DataFrame) assert len(result) == 1 assert result["Slide"].iloc[0] == "slide1.svs" finally: Path(temp_path).unlink(missing_ok=True) def test_read_settings_with_file_object_without_name_attribute(self): """Test read_settings handles file-like object without .name attribute.""" from mosaic.ui.utils import load_settings # Create temporary CSV with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: # Simulate read_settings handler (file without .name falls back to using file directly) def read_settings(file): if file is None: return None df = load_settings(file.name if hasattr(file, "name") else file) return df # When file doesn't have .name, the function uses the file object directly # In practice, Gradio always provides .name, but test the fallback result = read_settings(temp_path) assert isinstance(result, pd.DataFrame) assert len(result) == 1 finally: Path(temp_path).unlink(missing_ok=True) class TestCsvFormatEdgeCases: """Test CSV format edge cases and error handling.""" def test_load_settings_empty_csv_file(self): """Test loading completely empty CSV file.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: # Write nothing - completely empty file f.flush() temp_path = f.name try: # Empty CSV should raise an error with pytest.raises(Exception): # Could be pd.errors.EmptyDataError or ValueError load_settings(temp_path) finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_headers_only_csv(self): """Test CSV with only headers but no data rows.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") # No data rows f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should return empty DataFrame with correct columns assert isinstance(df, pd.DataFrame) assert len(df) == 0 assert "Slide" in df.columns assert "Site Type" in df.columns finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_extra_columns(self): """Test CSV with extra unknown columns (should be filtered).""" from mosaic.ui.utils import load_settings, SETTINGS_COLUMNS with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype,ExtraColumn1,ExtraColumn2\n") f.write("slide1.svs,Primary,Male,Unknown,extra_value1,extra_value2\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # Extra columns should be filtered out assert "ExtraColumn1" not in df.columns assert "ExtraColumn2" not in df.columns # Only SETTINGS_COLUMNS should remain assert list(df.columns) == SETTINGS_COLUMNS finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_special_characters(self): """Test CSV with special characters in values.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") # Special characters in slide name f.write("slide-1_test@2024.svs,Primary,Unknown\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should handle special characters correctly assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df["Slide"].iloc[0] == "slide-1_test@2024.svs" finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_quotes_in_values(self): """Test CSV with quoted values.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") # Value with comma inside quotes f.write('"slide1,with,commas.svs",Primary,Unknown\n') f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should parse quoted values correctly assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df["Slide"].iloc[0] == "slide1,with,commas.svs" finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_missing_values(self): """Test CSV with missing/empty values in optional columns.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype,IHC Subtype,Segmentation Config\n") # Empty values for optional columns f.write("slide1.svs,Primary,Male,Unknown,,\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should load CSV with empty values preserved assert isinstance(df, pd.DataFrame) assert len(df) == 1 # Empty strings should be preserved (validation will handle defaults later) assert df["Segmentation Config"].iloc[0] == "" assert df["IHC Subtype"].iloc[0] == "" finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_whitespace(self): """Test CSV with extra whitespace around values.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") # Values with leading/trailing whitespace f.write(" slide1.svs , Primary , Male , Unknown \n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # pandas should handle whitespace assert isinstance(df, pd.DataFrame) assert len(df) == 1 # Check if whitespace is preserved or stripped (depends on pandas behavior) slide_value = df["Slide"].iloc[0] assert "slide1.svs" in slide_value finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_different_line_endings(self): """Test CSV with different line ending styles (CRLF, LF).""" from mosaic.ui.utils import load_settings # Test with CRLF (Windows style) with tempfile.NamedTemporaryFile(mode="wb", suffix=".csv", delete=False) as f: f.write(b"Slide,Site Type,Sex,Cancer Subtype\r\n") f.write(b"slide1.svs,Primary,Male,Unknown\r\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df["Slide"].iloc[0] == "slide1.svs" finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_tab_delimiter_fails(self): """Test that TSV (tab-delimited) file raises error.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: # Use tabs instead of commas f.write("Slide\tSite Type\tCancer Subtype\n") f.write("slide1.svs\tPrimary\tUnknown\n") f.flush() temp_path = f.name try: # Should fail because columns won't be parsed correctly with pytest.raises(ValueError, match="Missing required column"): load_settings(temp_path) finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_large_csv(self): """Test loading CSV with many rows (performance test).""" from mosaic.ui.utils import load_settings num_rows = 1000 with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") for i in range(num_rows): f.write(f"slide{i}.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should handle large CSV without issues assert isinstance(df, pd.DataFrame) assert len(df) == num_rows finally: Path(temp_path).unlink(missing_ok=True) class TestEncodingEdgeCases: """Test CSV encoding edge cases.""" def test_load_settings_utf8_csv(self): """Test loading UTF-8 encoded CSV (should work).""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) assert isinstance(df, pd.DataFrame) assert len(df) == 1 finally: Path(temp_path).unlink(missing_ok=True) def test_load_settings_csv_with_unicode_characters(self): """Test CSV with Unicode characters in values.""" from mosaic.ui.utils import load_settings with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") # Unicode characters in slide name f.write("slide_café_™_测试.svs,Primary,Unknown\n") f.flush() temp_path = f.name try: df = load_settings(temp_path) # Should handle Unicode correctly assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert "café" in df["Slide"].iloc[0] finally: Path(temp_path).unlink(missing_ok=True) class TestErrorRecoveryScenarios: """Test error recovery and user experience flows.""" def test_consecutive_csv_uploads(self): """Test uploading multiple CSVs consecutively.""" from mosaic.ui.utils import load_settings # First CSV with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path1 = f.name # Second CSV (different data) with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide2.svs,Metastatic,Female,LUAD\n") f.write("slide3.svs,Primary,Male,BRCA\n") f.flush() temp_path2 = f.name try: # Load first CSV df1 = load_settings(temp_path1) assert len(df1) == 1 assert df1["Slide"].iloc[0] == "slide1.svs" # Load second CSV (should completely replace) df2 = load_settings(temp_path2) assert len(df2) == 2 assert df2["Slide"].iloc[0] == "slide2.svs" assert df2["Slide"].iloc[1] == "slide3.svs" # Should be independent DataFrames assert len(df1) == 1 # df1 unchanged finally: Path(temp_path1).unlink(missing_ok=True) Path(temp_path2).unlink(missing_ok=True) def test_load_settings_after_failed_upload(self): """Test successful load after a failed upload attempt.""" from mosaic.ui.utils import load_settings # First attempt: invalid CSV (missing required columns) with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("InvalidColumn\n") f.write("value\n") f.flush() invalid_path = f.name # Second attempt: valid CSV with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() valid_path = f.name try: # First load should fail with pytest.raises(ValueError, match="Missing required column"): load_settings(invalid_path) # Second load should succeed df = load_settings(valid_path) assert isinstance(df, pd.DataFrame) assert len(df) == 1 assert df["Slide"].iloc[0] == "slide1.svs" finally: Path(invalid_path).unlink(missing_ok=True) Path(valid_path).unlink(missing_ok=True) def test_load_settings_with_file_permission_error(self): """Test behavior when file cannot be read due to permissions.""" from mosaic.ui.utils import load_settings import os if os.name == 'nt': # Skip on Windows due to different permission model pytest.skip("Permission test not applicable on Windows") with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") f.flush() temp_path = f.name try: # Remove read permissions os.chmod(temp_path, 0o000) # Should raise permission error with pytest.raises(PermissionError): load_settings(temp_path) finally: # Restore permissions for cleanup os.chmod(temp_path, 0o644) Path(temp_path).unlink(missing_ok=True) def test_load_settings_nonexistent_file(self): """Test loading from non-existent file path.""" from mosaic.ui.utils import load_settings nonexistent_path = "/tmp/this_file_does_not_exist_12345.csv" # Should raise FileNotFoundError with pytest.raises(FileNotFoundError): load_settings(nonexistent_path) class TestValidationWithUpload: """Test validation integration with CSV upload.""" def test_csv_upload_triggers_validation(self, mock_cancer_subtype_maps): """Test that uploaded CSV is automatically validated.""" from mosaic.ui.utils import load_settings, validate_settings cancer_subtype_name_map, reversed_map, cancer_subtypes = mock_cancer_subtype_maps # Create CSV with invalid values with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype,Segmentation Config\n") f.write("slide1.svs,InvalidSite,Male,InvalidSubtype,InvalidConfig\n") f.flush() temp_path = f.name try: # Load and validate df = load_settings(temp_path) validated_df = validate_settings( df, cancer_subtype_name_map, cancer_subtypes, reversed_map ) # Should apply defaults for invalid values assert validated_df["Site Type"].iloc[0] == "Primary" assert validated_df["Cancer Subtype"].iloc[0] == "Unknown" assert validated_df["Segmentation Config"].iloc[0] == "Biopsy" finally: Path(temp_path).unlink(missing_ok=True) def test_csv_upload_with_partial_invalid_data(self, mock_cancer_subtype_maps): """Test CSV with mix of valid and invalid rows.""" from mosaic.ui.utils import load_settings, validate_settings cancer_subtype_name_map, reversed_map, cancer_subtypes = mock_cancer_subtype_maps with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: f.write("Slide,Site Type,Sex,Cancer Subtype\n") f.write("slide1.svs,Primary,Male,Unknown\n") # Valid f.write("slide2.svs,InvalidSite,Female,InvalidSubtype\n") # Invalid f.write("slide3.svs,Metastatic,Male,LUAD\n") # Valid f.flush() temp_path = f.name try: df = load_settings(temp_path) validated_df = validate_settings( df, cancer_subtype_name_map, cancer_subtypes, reversed_map ) # All rows should be present assert len(validated_df) == 3 # Valid rows unchanged assert validated_df.iloc[0]["Site Type"] == "Primary" assert validated_df.iloc[2]["Site Type"] == "Metastatic" # Invalid row corrected with defaults assert validated_df.iloc[1]["Site Type"] == "Primary" assert validated_df.iloc[1]["Cancer Subtype"] == "Unknown" finally: Path(temp_path).unlink(missing_ok=True)