MemPrepMate / tests /unit /test_contact_utils.py
Christian Kniep
new webapp
1fff71f
"""
Unit tests for contact name normalization utility.
Feature: 001-refine-memory-producer-logic (T021)
"""
import sys
from pathlib import Path
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
import pytest
from utils.contact_utils import normalize_contact_name
class TestNormalizeContactName:
"""Test suite for normalize_contact_name function."""
def test_basic_lowercase(self):
"""Test basic lowercase conversion."""
assert normalize_contact_name("JohnDoe") == "johndoe"
assert normalize_contact_name("ALICE") == "alice"
assert normalize_contact_name("Bob") == "bob"
def test_spaces_removed(self):
"""Test that spaces are removed."""
assert normalize_contact_name("John Doe") == "johndoe"
assert normalize_contact_name("Alice Mary Smith") == "alicemarysmith"
assert normalize_contact_name(" Bob ") == "bob"
def test_apostrophes_removed(self):
"""Test that apostrophes are removed."""
assert normalize_contact_name("O'Brien") == "obrien"
assert normalize_contact_name("D'Angelo") == "dangelo"
assert normalize_contact_name("O'Neil") == "oneil"
def test_hyphens_removed(self):
"""Test that hyphens are removed."""
assert normalize_contact_name("Jean-Pierre") == "jeanpierre"
assert normalize_contact_name("Mary-Kate") == "marykate"
assert normalize_contact_name("Wu-Tang") == "wutang"
def test_dots_removed(self):
"""Test that dots/periods are removed."""
assert normalize_contact_name("Dr. Smith") == "drsmith"
assert normalize_contact_name("J.K. Rowling") == "jkrowling"
assert normalize_contact_name("Mr. Anderson") == "mranderson"
def test_special_characters(self):
"""Test that special characters are removed."""
assert normalize_contact_name("José García") == "josgarca"
assert normalize_contact_name("François") == "franois"
assert normalize_contact_name("Müller") == "mller"
assert normalize_contact_name("Søren") == "sren"
def test_mixed_special_characters(self):
"""Test combinations of special characters."""
assert normalize_contact_name("O'Brien-Smith Jr.") == "obriensmithjr"
assert normalize_contact_name("Mary-Kate O'Neil") == "marykateoneil"
assert normalize_contact_name("Dr. Jean-Pierre D'Angelo") == "drjeanpierredangelo"
def test_numbers_preserved(self):
"""Test that numbers are preserved (alphanumeric)."""
assert normalize_contact_name("Agent007") == "agent007"
assert normalize_contact_name("User123") == "user123"
assert normalize_contact_name("R2D2") == "r2d2"
def test_underscores_and_symbols(self):
"""Test that underscores and symbols are removed."""
assert normalize_contact_name("john_doe") == "johndoe"
assert normalize_contact_name("alice@example") == "aliceexample"
assert normalize_contact_name("user#123") == "user123"
def test_empty_string(self):
"""Test that empty string returns empty string."""
assert normalize_contact_name("") == ""
assert normalize_contact_name(" ") == ""
def test_only_special_chars(self):
"""Test strings with only special characters."""
assert normalize_contact_name("---") == ""
assert normalize_contact_name("...") == ""
assert normalize_contact_name("'") == ""
assert normalize_contact_name("@#$%") == ""
def test_unicode_letters_preserved(self):
"""Test that unicode letters are preserved."""
# Note: The current implementation removes non-ASCII, but we document expected behavior
# If unicode support is needed, the regex should be updated to [\W_]+ instead
assert normalize_contact_name("María") == "mara" # Current behavior
assert normalize_contact_name("François") == "franois"
assert normalize_contact_name("北京") == "" # Non-Latin removed
def test_collision_scenarios(self):
"""Test names that should normalize to the same value (collision detection)."""
# These should all normalize to "obrien"
variants = ["O'Brien", "OBrien", "O Brien", "o'brien", "O'BRIEN", "O-Brien"]
normalized = [normalize_contact_name(v) for v in variants]
# All should normalize to the same value
assert len(set(normalized)) == 1
assert normalized[0] == "obrien"
def test_real_world_examples(self):
"""Test with realistic contact names."""
assert normalize_contact_name("Jane Doe") == "janedoe"
assert normalize_contact_name("Christian Kniep") == "christiankniep"
assert normalize_contact_name("Dr. Sarah Johnson-Smith") == "drsarahjohnsonsmith"
assert normalize_contact_name("José María García") == "josmaragarca"
assert normalize_contact_name("李明") == "" # Chinese characters removed
assert normalize_contact_name("محمد") == "" # Arabic characters removed
def test_idempotency(self):
"""Test that normalizing twice produces the same result."""
names = ["John Doe", "O'Brien", "Mary-Kate", "Dr. Smith", "José"]
for name in names:
normalized_once = normalize_contact_name(name)
normalized_twice = normalize_contact_name(normalized_once)
assert normalized_once == normalized_twice
def test_case_insensitivity(self):
"""Test that case variations normalize to the same value."""
assert (
normalize_contact_name("JOHN DOE")
== normalize_contact_name("john doe")
== normalize_contact_name("John Doe")
== normalize_contact_name("JoHn DoE")
)
def test_whitespace_variations(self):
"""Test various whitespace scenarios."""
assert normalize_contact_name("John Doe") == "johndoe"
assert normalize_contact_name("John\tDoe") == "johndoe"
assert normalize_contact_name("John\nDoe") == "johndoe"
assert normalize_contact_name("\n John Doe \n") == "johndoe"
def test_leading_trailing_special_chars(self):
"""Test names with leading/trailing special characters."""
assert normalize_contact_name("'John'") == "john"
assert normalize_contact_name("-Mary-") == "mary"
assert normalize_contact_name(".Dr. Smith.") == "drsmith"