Spaces:
Sleeping
Sleeping
| """ | |
| Unit tests for contact name normalization utility. | |
| Feature: 001-refine-memory-producer-logic (T021) | |
| """ | |
| import sys | |
| from pathlib import Path | |
| # Add src to path | |
| sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) | |
| import pytest | |
| from utils.contact_utils import normalize_contact_name | |
| class TestNormalizeContactName: | |
| """Test suite for normalize_contact_name function.""" | |
| def test_basic_lowercase(self): | |
| """Test basic lowercase conversion.""" | |
| assert normalize_contact_name("JohnDoe") == "johndoe" | |
| assert normalize_contact_name("ALICE") == "alice" | |
| assert normalize_contact_name("Bob") == "bob" | |
| def test_spaces_removed(self): | |
| """Test that spaces are removed.""" | |
| assert normalize_contact_name("John Doe") == "johndoe" | |
| assert normalize_contact_name("Alice Mary Smith") == "alicemarysmith" | |
| assert normalize_contact_name(" Bob ") == "bob" | |
| def test_apostrophes_removed(self): | |
| """Test that apostrophes are removed.""" | |
| assert normalize_contact_name("O'Brien") == "obrien" | |
| assert normalize_contact_name("D'Angelo") == "dangelo" | |
| assert normalize_contact_name("O'Neil") == "oneil" | |
| def test_hyphens_removed(self): | |
| """Test that hyphens are removed.""" | |
| assert normalize_contact_name("Jean-Pierre") == "jeanpierre" | |
| assert normalize_contact_name("Mary-Kate") == "marykate" | |
| assert normalize_contact_name("Wu-Tang") == "wutang" | |
| def test_dots_removed(self): | |
| """Test that dots/periods are removed.""" | |
| assert normalize_contact_name("Dr. Smith") == "drsmith" | |
| assert normalize_contact_name("J.K. Rowling") == "jkrowling" | |
| assert normalize_contact_name("Mr. Anderson") == "mranderson" | |
| def test_special_characters(self): | |
| """Test that special characters are removed.""" | |
| assert normalize_contact_name("José García") == "josgarca" | |
| assert normalize_contact_name("François") == "franois" | |
| assert normalize_contact_name("Müller") == "mller" | |
| assert normalize_contact_name("Søren") == "sren" | |
| def test_mixed_special_characters(self): | |
| """Test combinations of special characters.""" | |
| assert normalize_contact_name("O'Brien-Smith Jr.") == "obriensmithjr" | |
| assert normalize_contact_name("Mary-Kate O'Neil") == "marykateoneil" | |
| assert normalize_contact_name("Dr. Jean-Pierre D'Angelo") == "drjeanpierredangelo" | |
| def test_numbers_preserved(self): | |
| """Test that numbers are preserved (alphanumeric).""" | |
| assert normalize_contact_name("Agent007") == "agent007" | |
| assert normalize_contact_name("User123") == "user123" | |
| assert normalize_contact_name("R2D2") == "r2d2" | |
| def test_underscores_and_symbols(self): | |
| """Test that underscores and symbols are removed.""" | |
| assert normalize_contact_name("john_doe") == "johndoe" | |
| assert normalize_contact_name("alice@example") == "aliceexample" | |
| assert normalize_contact_name("user#123") == "user123" | |
| def test_empty_string(self): | |
| """Test that empty string returns empty string.""" | |
| assert normalize_contact_name("") == "" | |
| assert normalize_contact_name(" ") == "" | |
| def test_only_special_chars(self): | |
| """Test strings with only special characters.""" | |
| assert normalize_contact_name("---") == "" | |
| assert normalize_contact_name("...") == "" | |
| assert normalize_contact_name("'") == "" | |
| assert normalize_contact_name("@#$%") == "" | |
| def test_unicode_letters_preserved(self): | |
| """Test that unicode letters are preserved.""" | |
| # Note: The current implementation removes non-ASCII, but we document expected behavior | |
| # If unicode support is needed, the regex should be updated to [\W_]+ instead | |
| assert normalize_contact_name("María") == "mara" # Current behavior | |
| assert normalize_contact_name("François") == "franois" | |
| assert normalize_contact_name("北京") == "" # Non-Latin removed | |
| def test_collision_scenarios(self): | |
| """Test names that should normalize to the same value (collision detection).""" | |
| # These should all normalize to "obrien" | |
| variants = ["O'Brien", "OBrien", "O Brien", "o'brien", "O'BRIEN", "O-Brien"] | |
| normalized = [normalize_contact_name(v) for v in variants] | |
| # All should normalize to the same value | |
| assert len(set(normalized)) == 1 | |
| assert normalized[0] == "obrien" | |
| def test_real_world_examples(self): | |
| """Test with realistic contact names.""" | |
| assert normalize_contact_name("Jane Doe") == "janedoe" | |
| assert normalize_contact_name("Christian Kniep") == "christiankniep" | |
| assert normalize_contact_name("Dr. Sarah Johnson-Smith") == "drsarahjohnsonsmith" | |
| assert normalize_contact_name("José María García") == "josmaragarca" | |
| assert normalize_contact_name("李明") == "" # Chinese characters removed | |
| assert normalize_contact_name("محمد") == "" # Arabic characters removed | |
| def test_idempotency(self): | |
| """Test that normalizing twice produces the same result.""" | |
| names = ["John Doe", "O'Brien", "Mary-Kate", "Dr. Smith", "José"] | |
| for name in names: | |
| normalized_once = normalize_contact_name(name) | |
| normalized_twice = normalize_contact_name(normalized_once) | |
| assert normalized_once == normalized_twice | |
| def test_case_insensitivity(self): | |
| """Test that case variations normalize to the same value.""" | |
| assert ( | |
| normalize_contact_name("JOHN DOE") | |
| == normalize_contact_name("john doe") | |
| == normalize_contact_name("John Doe") | |
| == normalize_contact_name("JoHn DoE") | |
| ) | |
| def test_whitespace_variations(self): | |
| """Test various whitespace scenarios.""" | |
| assert normalize_contact_name("John Doe") == "johndoe" | |
| assert normalize_contact_name("John\tDoe") == "johndoe" | |
| assert normalize_contact_name("John\nDoe") == "johndoe" | |
| assert normalize_contact_name("\n John Doe \n") == "johndoe" | |
| def test_leading_trailing_special_chars(self): | |
| """Test names with leading/trailing special characters.""" | |
| assert normalize_contact_name("'John'") == "john" | |
| assert normalize_contact_name("-Mary-") == "mary" | |
| assert normalize_contact_name(".Dr. Smith.") == "drsmith" | |