""" Unit tests for contact name normalization utility. Feature: 001-refine-memory-producer-logic (T021) """ import sys from pathlib import Path # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) import pytest from utils.contact_utils import normalize_contact_name class TestNormalizeContactName: """Test suite for normalize_contact_name function.""" def test_basic_lowercase(self): """Test basic lowercase conversion.""" assert normalize_contact_name("JohnDoe") == "johndoe" assert normalize_contact_name("ALICE") == "alice" assert normalize_contact_name("Bob") == "bob" def test_spaces_removed(self): """Test that spaces are removed.""" assert normalize_contact_name("John Doe") == "johndoe" assert normalize_contact_name("Alice Mary Smith") == "alicemarysmith" assert normalize_contact_name(" Bob ") == "bob" def test_apostrophes_removed(self): """Test that apostrophes are removed.""" assert normalize_contact_name("O'Brien") == "obrien" assert normalize_contact_name("D'Angelo") == "dangelo" assert normalize_contact_name("O'Neil") == "oneil" def test_hyphens_removed(self): """Test that hyphens are removed.""" assert normalize_contact_name("Jean-Pierre") == "jeanpierre" assert normalize_contact_name("Mary-Kate") == "marykate" assert normalize_contact_name("Wu-Tang") == "wutang" def test_dots_removed(self): """Test that dots/periods are removed.""" assert normalize_contact_name("Dr. Smith") == "drsmith" assert normalize_contact_name("J.K. Rowling") == "jkrowling" assert normalize_contact_name("Mr. Anderson") == "mranderson" def test_special_characters(self): """Test that special characters are removed.""" assert normalize_contact_name("José García") == "josgarca" assert normalize_contact_name("François") == "franois" assert normalize_contact_name("Müller") == "mller" assert normalize_contact_name("Søren") == "sren" def test_mixed_special_characters(self): """Test combinations of special characters.""" assert normalize_contact_name("O'Brien-Smith Jr.") == "obriensmithjr" assert normalize_contact_name("Mary-Kate O'Neil") == "marykateoneil" assert normalize_contact_name("Dr. Jean-Pierre D'Angelo") == "drjeanpierredangelo" def test_numbers_preserved(self): """Test that numbers are preserved (alphanumeric).""" assert normalize_contact_name("Agent007") == "agent007" assert normalize_contact_name("User123") == "user123" assert normalize_contact_name("R2D2") == "r2d2" def test_underscores_and_symbols(self): """Test that underscores and symbols are removed.""" assert normalize_contact_name("john_doe") == "johndoe" assert normalize_contact_name("alice@example") == "aliceexample" assert normalize_contact_name("user#123") == "user123" def test_empty_string(self): """Test that empty string returns empty string.""" assert normalize_contact_name("") == "" assert normalize_contact_name(" ") == "" def test_only_special_chars(self): """Test strings with only special characters.""" assert normalize_contact_name("---") == "" assert normalize_contact_name("...") == "" assert normalize_contact_name("'") == "" assert normalize_contact_name("@#$%") == "" def test_unicode_letters_preserved(self): """Test that unicode letters are preserved.""" # Note: The current implementation removes non-ASCII, but we document expected behavior # If unicode support is needed, the regex should be updated to [\W_]+ instead assert normalize_contact_name("María") == "mara" # Current behavior assert normalize_contact_name("François") == "franois" assert normalize_contact_name("北京") == "" # Non-Latin removed def test_collision_scenarios(self): """Test names that should normalize to the same value (collision detection).""" # These should all normalize to "obrien" variants = ["O'Brien", "OBrien", "O Brien", "o'brien", "O'BRIEN", "O-Brien"] normalized = [normalize_contact_name(v) for v in variants] # All should normalize to the same value assert len(set(normalized)) == 1 assert normalized[0] == "obrien" def test_real_world_examples(self): """Test with realistic contact names.""" assert normalize_contact_name("Jane Doe") == "janedoe" assert normalize_contact_name("Christian Kniep") == "christiankniep" assert normalize_contact_name("Dr. Sarah Johnson-Smith") == "drsarahjohnsonsmith" assert normalize_contact_name("José María García") == "josmaragarca" assert normalize_contact_name("李明") == "" # Chinese characters removed assert normalize_contact_name("محمد") == "" # Arabic characters removed def test_idempotency(self): """Test that normalizing twice produces the same result.""" names = ["John Doe", "O'Brien", "Mary-Kate", "Dr. Smith", "José"] for name in names: normalized_once = normalize_contact_name(name) normalized_twice = normalize_contact_name(normalized_once) assert normalized_once == normalized_twice def test_case_insensitivity(self): """Test that case variations normalize to the same value.""" assert ( normalize_contact_name("JOHN DOE") == normalize_contact_name("john doe") == normalize_contact_name("John Doe") == normalize_contact_name("JoHn DoE") ) def test_whitespace_variations(self): """Test various whitespace scenarios.""" assert normalize_contact_name("John Doe") == "johndoe" assert normalize_contact_name("John\tDoe") == "johndoe" assert normalize_contact_name("John\nDoe") == "johndoe" assert normalize_contact_name("\n John Doe \n") == "johndoe" def test_leading_trailing_special_chars(self): """Test names with leading/trailing special characters.""" assert normalize_contact_name("'John'") == "john" assert normalize_contact_name("-Mary-") == "mary" assert normalize_contact_name(".Dr. Smith.") == "drsmith"