File size: 1,129 Bytes
d2f0b77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from pathlib import Path
from scripts.validate_normalization import validate_file
def _write(path: Path, content: str) -> Path:
path.write_text(content, encoding="utf-8")
return path
def test_validate_file_passes_with_valid_tsv(tmp_path: Path) -> None:
file_path = _write(
tmp_path / "valid.tsv",
"id\traw_text\tnormalized_text\tnote\n"
"n001\tfoo\tfoo\tok\n",
)
assert validate_file(file_path) == []
def test_validate_file_fails_on_duplicate_id(tmp_path: Path) -> None:
file_path = _write(
tmp_path / "dup.tsv",
"id\traw_text\tnormalized_text\tnote\n"
"n001\tfoo\tfoo\tok\n"
"n001\tbar\tbar\tdup\n",
)
errors = validate_file(file_path)
assert any("duplicate id" in error for error in errors)
def test_validate_file_fails_on_missing_columns(tmp_path: Path) -> None:
file_path = _write(
tmp_path / "missing.tsv",
"id\traw_text\tnote\n"
"n001\tfoo\tmissing normalized\n",
)
errors = validate_file(file_path)
assert errors
assert "Missing required columns: normalized_text" in errors[0]
|