Spaces:

build-small-hackathon
/

scrubdata

Running

scrubdata / tests /test_server_upload_safety.py

OpenAI Codex

deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build

16dc556 17 days ago

1.75 kB

	import pandas as pd

	from server import _read_any, clean_data
	from scrubdata import apply_plan, mock_plan


	def test_clean_data_handles_zero_byte_upload(tmp_path):
	path = tmp_path / "empty.csv"
	path.write_bytes(b"")

	result = clean_data(str(path), use_model=False)

	assert result["before"] == []
	assert result["after"] == []
	assert result["total_rows_before"] == 0
	assert "Couldn't read this file" in result["summary"]


	def test_clean_data_handles_headers_without_rows(tmp_path):
	path = tmp_path / "headers.csv"
	path.write_text("name,email\n")

	result = clean_data(str(path), use_model=False)

	assert result["before"] == []
	assert result["after"] == []
	assert result["total_rows_before"] == 0
	assert result["summary"] == "That file looks empty — no rows or columns to clean."


	def test_sanitized_header_suffixes_remain_unique(tmp_path):
	path = tmp_path / "colliding_headers.csv"
	path.write_text("a,a.1, a\nx,y,z\n")

	raw = _read_any(str(path))
	result = clean_data(str(path), use_model=False)

	assert list(raw.columns) == ["a", "a.1", "a.2"]
	assert raw.columns.is_unique
	assert result["columns_before"] == ["a", "a.1", "a.2"]
	assert result["total_rows_before"] == 1
	assert "Something went wrong while cleaning" not in result["summary"]


	def test_maria_sample_cleaning_is_preserved():
	raw = pd.read_csv("samples/maria_crm_export.csv", dtype=str, keep_default_na=False)
	via_server = _read_any("samples/maria_crm_export.csv")

	expected, expected_log = apply_plan(raw, mock_plan(raw))
	actual, actual_log = apply_plan(via_server, mock_plan(via_server))

	pd.testing.assert_frame_equal(actual, expected)
	assert actual_log == expected_log