Spaces:
Sleeping
Sleeping
| """Comprehensive tests for settings CSV upload functionality. | |
| This module tests edge cases and error scenarios for the settings upload feature, | |
| including: | |
| - read_settings event handler behavior | |
| - CSV format edge cases (empty, malformed, encoding issues) | |
| - File object edge cases | |
| - Error recovery scenarios | |
| """ | |
| import tempfile | |
| from pathlib import Path | |
| from unittest.mock import Mock, patch, MagicMock | |
| import pandas as pd | |
| import pytest | |
| class TestReadSettingsHandler: | |
| """Test the read_settings event handler directly.""" | |
| def test_read_settings_with_none_returns_none(self): | |
| """Test read_settings handler logic returns None when file is None.""" | |
| # Test the handler logic directly | |
| # Simulate the read_settings function from app.py | |
| def read_settings(file): | |
| if file is None: | |
| return None | |
| from mosaic.ui.utils import load_settings | |
| df = load_settings(file.name if hasattr(file, "name") else file) | |
| return df # In actual app, returns gr.Dataframe(df, visible=True) | |
| result = read_settings(None) | |
| assert result is None | |
| def test_read_settings_with_file_object_with_name(self): | |
| """Test read_settings handles file object with .name attribute.""" | |
| from mosaic.ui.utils import load_settings | |
| # Create temporary CSV | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Create mock file object with .name attribute | |
| mock_file = Mock() | |
| mock_file.name = temp_path | |
| # Simulate read_settings handler | |
| def read_settings(file): | |
| if file is None: | |
| return None | |
| df = load_settings(file.name if hasattr(file, "name") else file) | |
| return df | |
| result = read_settings(mock_file) | |
| # Verify DataFrame was loaded | |
| assert isinstance(result, pd.DataFrame) | |
| assert len(result) == 1 | |
| assert result["Slide"].iloc[0] == "slide1.svs" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_read_settings_with_file_path_string(self): | |
| """Test read_settings handles direct file path string.""" | |
| from mosaic.ui.utils import load_settings | |
| # Create temporary CSV | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Simulate read_settings handler with direct path | |
| def read_settings(file): | |
| if file is None: | |
| return None | |
| df = load_settings(file.name if hasattr(file, "name") else file) | |
| return df | |
| result = read_settings(temp_path) | |
| # Verify DataFrame was loaded | |
| assert isinstance(result, pd.DataFrame) | |
| assert len(result) == 1 | |
| assert result["Slide"].iloc[0] == "slide1.svs" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_read_settings_with_file_object_without_name_attribute(self): | |
| """Test read_settings handles file-like object without .name attribute.""" | |
| from mosaic.ui.utils import load_settings | |
| # Create temporary CSV | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Simulate read_settings handler (file without .name falls back to using file directly) | |
| def read_settings(file): | |
| if file is None: | |
| return None | |
| df = load_settings(file.name if hasattr(file, "name") else file) | |
| return df | |
| # When file doesn't have .name, the function uses the file object directly | |
| # In practice, Gradio always provides .name, but test the fallback | |
| result = read_settings(temp_path) | |
| assert isinstance(result, pd.DataFrame) | |
| assert len(result) == 1 | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| class TestCsvFormatEdgeCases: | |
| """Test CSV format edge cases and error handling.""" | |
| def test_load_settings_empty_csv_file(self): | |
| """Test loading completely empty CSV file.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| # Write nothing - completely empty file | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Empty CSV should raise an error | |
| with pytest.raises(Exception): # Could be pd.errors.EmptyDataError or ValueError | |
| load_settings(temp_path) | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_headers_only_csv(self): | |
| """Test CSV with only headers but no data rows.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| # No data rows | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should return empty DataFrame with correct columns | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 0 | |
| assert "Slide" in df.columns | |
| assert "Site Type" in df.columns | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_extra_columns(self): | |
| """Test CSV with extra unknown columns (should be filtered).""" | |
| from mosaic.ui.utils import load_settings, SETTINGS_COLUMNS | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype,ExtraColumn1,ExtraColumn2\n") | |
| f.write("slide1.svs,Primary,Male,Unknown,extra_value1,extra_value2\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Extra columns should be filtered out | |
| assert "ExtraColumn1" not in df.columns | |
| assert "ExtraColumn2" not in df.columns | |
| # Only SETTINGS_COLUMNS should remain | |
| assert list(df.columns) == SETTINGS_COLUMNS | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_special_characters(self): | |
| """Test CSV with special characters in values.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| # Special characters in slide name | |
| f.write("slide-1_test@2024.svs,Primary,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should handle special characters correctly | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| assert df["Slide"].iloc[0] == "slide-1_test@2024.svs" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_quotes_in_values(self): | |
| """Test CSV with quoted values.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| # Value with comma inside quotes | |
| f.write('"slide1,with,commas.svs",Primary,Unknown\n') | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should parse quoted values correctly | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| assert df["Slide"].iloc[0] == "slide1,with,commas.svs" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_missing_values(self): | |
| """Test CSV with missing/empty values in optional columns.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype,IHC Subtype,Segmentation Config\n") | |
| # Empty values for optional columns | |
| f.write("slide1.svs,Primary,Male,Unknown,,\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should load CSV with empty values preserved | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| # Empty strings should be preserved (validation will handle defaults later) | |
| assert df["Segmentation Config"].iloc[0] == "" | |
| assert df["IHC Subtype"].iloc[0] == "" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_whitespace(self): | |
| """Test CSV with extra whitespace around values.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| # Values with leading/trailing whitespace | |
| f.write(" slide1.svs , Primary , Male , Unknown \n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # pandas should handle whitespace | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| # Check if whitespace is preserved or stripped (depends on pandas behavior) | |
| slide_value = df["Slide"].iloc[0] | |
| assert "slide1.svs" in slide_value | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_different_line_endings(self): | |
| """Test CSV with different line ending styles (CRLF, LF).""" | |
| from mosaic.ui.utils import load_settings | |
| # Test with CRLF (Windows style) | |
| with tempfile.NamedTemporaryFile(mode="wb", suffix=".csv", delete=False) as f: | |
| f.write(b"Slide,Site Type,Sex,Cancer Subtype\r\n") | |
| f.write(b"slide1.svs,Primary,Male,Unknown\r\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| assert df["Slide"].iloc[0] == "slide1.svs" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_tab_delimiter_fails(self): | |
| """Test that TSV (tab-delimited) file raises error.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| # Use tabs instead of commas | |
| f.write("Slide\tSite Type\tCancer Subtype\n") | |
| f.write("slide1.svs\tPrimary\tUnknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Should fail because columns won't be parsed correctly | |
| with pytest.raises(ValueError, match="Missing required column"): | |
| load_settings(temp_path) | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_large_csv(self): | |
| """Test loading CSV with many rows (performance test).""" | |
| from mosaic.ui.utils import load_settings | |
| num_rows = 1000 | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| for i in range(num_rows): | |
| f.write(f"slide{i}.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should handle large CSV without issues | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == num_rows | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| class TestEncodingEdgeCases: | |
| """Test CSV encoding edge cases.""" | |
| def test_load_settings_utf8_csv(self): | |
| """Test loading UTF-8 encoded CSV (should work).""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_csv_with_unicode_characters(self): | |
| """Test CSV with Unicode characters in values.""" | |
| from mosaic.ui.utils import load_settings | |
| with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| # Unicode characters in slide name | |
| f.write("slide_café_™_测试.svs,Primary,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| # Should handle Unicode correctly | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| assert "café" in df["Slide"].iloc[0] | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| class TestErrorRecoveryScenarios: | |
| """Test error recovery and user experience flows.""" | |
| def test_consecutive_csv_uploads(self): | |
| """Test uploading multiple CSVs consecutively.""" | |
| from mosaic.ui.utils import load_settings | |
| # First CSV | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path1 = f.name | |
| # Second CSV (different data) | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide2.svs,Metastatic,Female,LUAD\n") | |
| f.write("slide3.svs,Primary,Male,BRCA\n") | |
| f.flush() | |
| temp_path2 = f.name | |
| try: | |
| # Load first CSV | |
| df1 = load_settings(temp_path1) | |
| assert len(df1) == 1 | |
| assert df1["Slide"].iloc[0] == "slide1.svs" | |
| # Load second CSV (should completely replace) | |
| df2 = load_settings(temp_path2) | |
| assert len(df2) == 2 | |
| assert df2["Slide"].iloc[0] == "slide2.svs" | |
| assert df2["Slide"].iloc[1] == "slide3.svs" | |
| # Should be independent DataFrames | |
| assert len(df1) == 1 # df1 unchanged | |
| finally: | |
| Path(temp_path1).unlink(missing_ok=True) | |
| Path(temp_path2).unlink(missing_ok=True) | |
| def test_load_settings_after_failed_upload(self): | |
| """Test successful load after a failed upload attempt.""" | |
| from mosaic.ui.utils import load_settings | |
| # First attempt: invalid CSV (missing required columns) | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("InvalidColumn\n") | |
| f.write("value\n") | |
| f.flush() | |
| invalid_path = f.name | |
| # Second attempt: valid CSV | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| valid_path = f.name | |
| try: | |
| # First load should fail | |
| with pytest.raises(ValueError, match="Missing required column"): | |
| load_settings(invalid_path) | |
| # Second load should succeed | |
| df = load_settings(valid_path) | |
| assert isinstance(df, pd.DataFrame) | |
| assert len(df) == 1 | |
| assert df["Slide"].iloc[0] == "slide1.svs" | |
| finally: | |
| Path(invalid_path).unlink(missing_ok=True) | |
| Path(valid_path).unlink(missing_ok=True) | |
| def test_load_settings_with_file_permission_error(self): | |
| """Test behavior when file cannot be read due to permissions.""" | |
| from mosaic.ui.utils import load_settings | |
| import os | |
| if os.name == 'nt': | |
| # Skip on Windows due to different permission model | |
| pytest.skip("Permission test not applicable on Windows") | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Remove read permissions | |
| os.chmod(temp_path, 0o000) | |
| # Should raise permission error | |
| with pytest.raises(PermissionError): | |
| load_settings(temp_path) | |
| finally: | |
| # Restore permissions for cleanup | |
| os.chmod(temp_path, 0o644) | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_load_settings_nonexistent_file(self): | |
| """Test loading from non-existent file path.""" | |
| from mosaic.ui.utils import load_settings | |
| nonexistent_path = "/tmp/this_file_does_not_exist_12345.csv" | |
| # Should raise FileNotFoundError | |
| with pytest.raises(FileNotFoundError): | |
| load_settings(nonexistent_path) | |
| class TestValidationWithUpload: | |
| """Test validation integration with CSV upload.""" | |
| def test_csv_upload_triggers_validation(self, mock_cancer_subtype_maps): | |
| """Test that uploaded CSV is automatically validated.""" | |
| from mosaic.ui.utils import load_settings, validate_settings | |
| cancer_subtype_name_map, reversed_map, cancer_subtypes = mock_cancer_subtype_maps | |
| # Create CSV with invalid values | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype,Segmentation Config\n") | |
| f.write("slide1.svs,InvalidSite,Male,InvalidSubtype,InvalidConfig\n") | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| # Load and validate | |
| df = load_settings(temp_path) | |
| validated_df = validate_settings( | |
| df, cancer_subtype_name_map, cancer_subtypes, reversed_map | |
| ) | |
| # Should apply defaults for invalid values | |
| assert validated_df["Site Type"].iloc[0] == "Primary" | |
| assert validated_df["Cancer Subtype"].iloc[0] == "Unknown" | |
| assert validated_df["Segmentation Config"].iloc[0] == "Biopsy" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |
| def test_csv_upload_with_partial_invalid_data(self, mock_cancer_subtype_maps): | |
| """Test CSV with mix of valid and invalid rows.""" | |
| from mosaic.ui.utils import load_settings, validate_settings | |
| cancer_subtype_name_map, reversed_map, cancer_subtypes = mock_cancer_subtype_maps | |
| with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: | |
| f.write("Slide,Site Type,Sex,Cancer Subtype\n") | |
| f.write("slide1.svs,Primary,Male,Unknown\n") # Valid | |
| f.write("slide2.svs,InvalidSite,Female,InvalidSubtype\n") # Invalid | |
| f.write("slide3.svs,Metastatic,Male,LUAD\n") # Valid | |
| f.flush() | |
| temp_path = f.name | |
| try: | |
| df = load_settings(temp_path) | |
| validated_df = validate_settings( | |
| df, cancer_subtype_name_map, cancer_subtypes, reversed_map | |
| ) | |
| # All rows should be present | |
| assert len(validated_df) == 3 | |
| # Valid rows unchanged | |
| assert validated_df.iloc[0]["Site Type"] == "Primary" | |
| assert validated_df.iloc[2]["Site Type"] == "Metastatic" | |
| # Invalid row corrected with defaults | |
| assert validated_df.iloc[1]["Site Type"] == "Primary" | |
| assert validated_df.iloc[1]["Cancer Subtype"] == "Unknown" | |
| finally: | |
| Path(temp_path).unlink(missing_ok=True) | |