Spaces:
Running
Running
| import pandas as pd | |
| from server import _read_any, clean_data | |
| from scrubdata import apply_plan, mock_plan | |
| def test_clean_data_handles_zero_byte_upload(tmp_path): | |
| path = tmp_path / "empty.csv" | |
| path.write_bytes(b"") | |
| result = clean_data(str(path), use_model=False) | |
| assert result["before"] == [] | |
| assert result["after"] == [] | |
| assert result["total_rows_before"] == 0 | |
| assert "Couldn't read this file" in result["summary"] | |
| def test_clean_data_handles_headers_without_rows(tmp_path): | |
| path = tmp_path / "headers.csv" | |
| path.write_text("name,email\n") | |
| result = clean_data(str(path), use_model=False) | |
| assert result["before"] == [] | |
| assert result["after"] == [] | |
| assert result["total_rows_before"] == 0 | |
| assert result["summary"] == "That file looks empty — no rows or columns to clean." | |
| def test_sanitized_header_suffixes_remain_unique(tmp_path): | |
| path = tmp_path / "colliding_headers.csv" | |
| path.write_text("a,a.1, a\nx,y,z\n") | |
| raw = _read_any(str(path)) | |
| result = clean_data(str(path), use_model=False) | |
| assert list(raw.columns) == ["a", "a.1", "a.2"] | |
| assert raw.columns.is_unique | |
| assert result["columns_before"] == ["a", "a.1", "a.2"] | |
| assert result["total_rows_before"] == 1 | |
| assert "Something went wrong while cleaning" not in result["summary"] | |
| def test_maria_sample_cleaning_is_preserved(): | |
| raw = pd.read_csv("samples/maria_crm_export.csv", dtype=str, keep_default_na=False) | |
| via_server = _read_any("samples/maria_crm_export.csv") | |
| expected, expected_log = apply_plan(raw, mock_plan(raw)) | |
| actual, actual_log = apply_plan(via_server, mock_plan(via_server)) | |
| pd.testing.assert_frame_equal(actual, expected) | |
| assert actual_log == expected_log | |