scrub / tests /test_replacement_decision.py
solidprivacy-nl
Add replacement decision helper tests
5e58572
Raw
History Blame Contribute Delete
9.14 kB
import pytest
from replacement_decision import (
ReplacementDecision,
build_replacement_audit,
build_replacement_decision,
matching_occurrence_ids,
normalize_match_text,
)
def test_build_default_decision_is_review_only_and_not_mapping_candidate():
decision = build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
)
assert decision.review_state == "needs_review"
assert decision.replacement_value is None
assert decision.creates_mapping is False
assert decision.as_dict()["report_only"] is None if "report_only" in decision.as_dict() else True
def test_accepted_decision_uses_suggested_replacement():
decision = build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
)
assert decision.replacement_value == "[PERSOON_1]"
assert decision.creates_mapping is True
def test_edited_decision_uses_final_replacement():
decision = build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
final_replacement="[CLIËNT_A]",
review_state="edited",
)
assert decision.replacement_value == "[CLIËNT_A]"
assert decision.creates_mapping is True
def test_ignored_and_context_decisions_do_not_create_mapping():
ignored = build_replacement_decision(
occurrence_id="occ-1",
source_text="ROL-TERM",
entity_type="ROLE_LABEL",
display_label="Rolterm",
suggested_replacement="[ROL_1]",
review_state="ignored",
)
context = build_replacement_decision(
occurrence_id="occ-2",
source_text="GETUIGE",
entity_type="ROLE_LABEL",
display_label="Contextterm",
suggested_replacement="[ROL_2]",
review_state="preserve_context",
)
assert ignored.replacement_value is None
assert ignored.creates_mapping is False
assert context.replacement_value is None
assert context.creates_mapping is False
def test_invalid_state_scope_and_confidence_are_rejected():
with pytest.raises(ValueError, match="Unsupported review_state"):
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="done",
)
with pytest.raises(ValueError, match="Unsupported scope"):
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
scope="fuzzy_all",
)
with pytest.raises(ValueError, match="confidence"):
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
confidence=1.5,
)
def test_this_occurrence_scope_only_selects_one_item():
decision = build_replacement_decision(
occurrence_id="occ-2",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
scope="this_occurrence",
)
occurrences = [
{"occurrence_id": "occ-1", "source_text": "SYNTHETISCH-PERSOON-A"},
{"occurrence_id": "occ-2", "source_text": "SYNTHETISCH-PERSOON-A"},
]
assert matching_occurrence_ids(decision, occurrences) == ["occ-2"]
def test_exact_scope_selects_exact_same_text_only():
decision = build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
scope="all_exact",
)
occurrences = [
{"occurrence_id": "occ-1", "source_text": "SYNTHETISCH-PERSOON-A"},
{"occurrence_id": "occ-2", "source_text": "SYNTHETISCH-PERSOON-A"},
{"occurrence_id": "occ-3", "source_text": "synthetisch-persoon-a"},
{"occurrence_id": "occ-4", "source_text": "SYNTHETISCH-PERSOON-B"},
]
assert matching_occurrence_ids(decision, occurrences) == ["occ-1", "occ-2"]
def test_normalized_scope_is_conservative_not_fuzzy():
decision = build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH PERSOON A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
scope="all_normalized",
)
occurrences = [
{"occurrence_id": "occ-1", "source_text": "SYNTHETISCH PERSOON A"},
{"occurrence_id": "occ-2", "source_text": "synthetisch persoon a"},
{"occurrence_id": "occ-3", "source_text": "SYNTHETISCH PERSOON B"},
]
assert normalize_match_text("SYNTHETISCH PERSOON A") == "synthetisch persoon a"
assert matching_occurrence_ids(decision, occurrences) == ["occ-1", "occ-2"]
def test_audit_counts_states_and_is_report_only():
decisions = [
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
scope="all_exact",
risk_flags=["same_value_scope_confirmed"],
),
build_replacement_decision(
occurrence_id="occ-2",
source_text="ROL-TERM",
entity_type="ROLE_LABEL",
display_label="Contextterm",
suggested_replacement="[ROL_1]",
review_state="preserve_context",
),
build_replacement_decision(
occurrence_id="occ-3",
source_text="SYNTHETISCH-MISSER",
entity_type="CUSTOM",
display_label="Handmatig",
suggested_replacement="[HANDMATIG_1]",
review_state="manual_added",
origin="manual",
),
]
audit = build_replacement_audit(decisions)
assert audit["total_decisions"] == 3
assert audit["state_counts"]["accepted"] == 1
assert audit["state_counts"]["preserve_context"] == 1
assert audit["state_counts"]["manual_added"] == 1
assert audit["context_preserved"] == ["occ-2"]
assert audit["manual_additions"] == ["occ-3"]
assert audit["apply_to_same_value_actions"] == ["occ-1"]
assert audit["risk_flags"] == ["same_value_scope_confirmed"]
assert audit["report_only"] is True
assert audit["export_blocking"] is False
def test_export_readiness_is_advisory_only():
ready = build_replacement_audit(
[
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
)
]
)
review = build_replacement_audit(
[
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
)
]
)
unresolved = build_replacement_audit(
[
build_replacement_decision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="unresolved",
)
]
)
assert ready["export_readiness"] == "ready_for_export"
assert review["export_readiness"] == "review_recommended"
assert unresolved["export_readiness"] == "high_risk_unresolved"
assert ready["export_blocking"] is False
assert review["export_blocking"] is False
assert unresolved["export_blocking"] is False
def test_helper_signature_does_not_require_streamlit_or_scrub_key_objects():
decision = ReplacementDecision(
occurrence_id="occ-1",
source_text="SYNTHETISCH-PERSOON-A",
entity_type="PERSON",
display_label="Naam",
suggested_replacement="[PERSOON_1]",
review_state="accepted",
)
assert decision.as_dict()["replacement_value"] == "[PERSOON_1]"
assert decision.as_dict()["creates_mapping"] is True