scrubdata / tests /test_server_upload_safety.py
OpenAI Codex
deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build
16dc556
Raw
History Blame Contribute Delete
1.75 kB
import pandas as pd
from server import _read_any, clean_data
from scrubdata import apply_plan, mock_plan
def test_clean_data_handles_zero_byte_upload(tmp_path):
path = tmp_path / "empty.csv"
path.write_bytes(b"")
result = clean_data(str(path), use_model=False)
assert result["before"] == []
assert result["after"] == []
assert result["total_rows_before"] == 0
assert "Couldn't read this file" in result["summary"]
def test_clean_data_handles_headers_without_rows(tmp_path):
path = tmp_path / "headers.csv"
path.write_text("name,email\n")
result = clean_data(str(path), use_model=False)
assert result["before"] == []
assert result["after"] == []
assert result["total_rows_before"] == 0
assert result["summary"] == "That file looks empty — no rows or columns to clean."
def test_sanitized_header_suffixes_remain_unique(tmp_path):
path = tmp_path / "colliding_headers.csv"
path.write_text("a,a.1, a\nx,y,z\n")
raw = _read_any(str(path))
result = clean_data(str(path), use_model=False)
assert list(raw.columns) == ["a", "a.1", "a.2"]
assert raw.columns.is_unique
assert result["columns_before"] == ["a", "a.1", "a.2"]
assert result["total_rows_before"] == 1
assert "Something went wrong while cleaning" not in result["summary"]
def test_maria_sample_cleaning_is_preserved():
raw = pd.read_csv("samples/maria_crm_export.csv", dtype=str, keep_default_na=False)
via_server = _read_any("samples/maria_crm_export.csv")
expected, expected_log = apply_plan(raw, mock_plan(raw))
actual, actual_log = apply_plan(via_server, mock_plan(via_server))
pd.testing.assert_frame_equal(actual, expected)
assert actual_log == expected_log