Spaces:

Memverge
/

MemPrepMate

Sleeping

MemPrepMate / tests /unit /test_contact_utils.py

Christian Kniep

new webapp

1fff71f 4 months ago

6.46 kB

	"""
	Unit tests for contact name normalization utility.
	Feature: 001-refine-memory-producer-logic (T021)
	"""

	import sys
	from pathlib import Path

	# Add src to path
	sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))

	import pytest
	from utils.contact_utils import normalize_contact_name


	class TestNormalizeContactName:
	"""Test suite for normalize_contact_name function."""

	def test_basic_lowercase(self):
	"""Test basic lowercase conversion."""
	assert normalize_contact_name("JohnDoe") == "johndoe"
	assert normalize_contact_name("ALICE") == "alice"
	assert normalize_contact_name("Bob") == "bob"

	def test_spaces_removed(self):
	"""Test that spaces are removed."""
	assert normalize_contact_name("John Doe") == "johndoe"
	assert normalize_contact_name("Alice Mary Smith") == "alicemarysmith"
	assert normalize_contact_name(" Bob ") == "bob"

	def test_apostrophes_removed(self):
	"""Test that apostrophes are removed."""
	assert normalize_contact_name("O'Brien") == "obrien"
	assert normalize_contact_name("D'Angelo") == "dangelo"
	assert normalize_contact_name("O'Neil") == "oneil"

	def test_hyphens_removed(self):
	"""Test that hyphens are removed."""
	assert normalize_contact_name("Jean-Pierre") == "jeanpierre"
	assert normalize_contact_name("Mary-Kate") == "marykate"
	assert normalize_contact_name("Wu-Tang") == "wutang"

	def test_dots_removed(self):
	"""Test that dots/periods are removed."""
	assert normalize_contact_name("Dr. Smith") == "drsmith"
	assert normalize_contact_name("J.K. Rowling") == "jkrowling"
	assert normalize_contact_name("Mr. Anderson") == "mranderson"

	def test_special_characters(self):
	"""Test that special characters are removed."""
	assert normalize_contact_name("José García") == "josgarca"
	assert normalize_contact_name("François") == "franois"
	assert normalize_contact_name("Müller") == "mller"
	assert normalize_contact_name("Søren") == "sren"

	def test_mixed_special_characters(self):
	"""Test combinations of special characters."""
	assert normalize_contact_name("O'Brien-Smith Jr.") == "obriensmithjr"
	assert normalize_contact_name("Mary-Kate O'Neil") == "marykateoneil"
	assert normalize_contact_name("Dr. Jean-Pierre D'Angelo") == "drjeanpierredangelo"

	def test_numbers_preserved(self):
	"""Test that numbers are preserved (alphanumeric)."""
	assert normalize_contact_name("Agent007") == "agent007"
	assert normalize_contact_name("User123") == "user123"
	assert normalize_contact_name("R2D2") == "r2d2"

	def test_underscores_and_symbols(self):
	"""Test that underscores and symbols are removed."""
	assert normalize_contact_name("john_doe") == "johndoe"
	assert normalize_contact_name("alice@example") == "aliceexample"
	assert normalize_contact_name("user#123") == "user123"

	def test_empty_string(self):
	"""Test that empty string returns empty string."""
	assert normalize_contact_name("") == ""
	assert normalize_contact_name(" ") == ""

	def test_only_special_chars(self):
	"""Test strings with only special characters."""
	assert normalize_contact_name("---") == ""
	assert normalize_contact_name("...") == ""
	assert normalize_contact_name("'") == ""
	assert normalize_contact_name("@#$%") == ""

	def test_unicode_letters_preserved(self):
	"""Test that unicode letters are preserved."""
	# Note: The current implementation removes non-ASCII, but we document expected behavior
	# If unicode support is needed, the regex should be updated to [\W_]+ instead
	assert normalize_contact_name("María") == "mara" # Current behavior
	assert normalize_contact_name("François") == "franois"
	assert normalize_contact_name("北京") == "" # Non-Latin removed

	def test_collision_scenarios(self):
	"""Test names that should normalize to the same value (collision detection)."""
	# These should all normalize to "obrien"
	variants = ["O'Brien", "OBrien", "O Brien", "o'brien", "O'BRIEN", "O-Brien"]
	normalized = [normalize_contact_name(v) for v in variants]

	# All should normalize to the same value
	assert len(set(normalized)) == 1
	assert normalized[0] == "obrien"

	def test_real_world_examples(self):
	"""Test with realistic contact names."""
	assert normalize_contact_name("Jane Doe") == "janedoe"
	assert normalize_contact_name("Christian Kniep") == "christiankniep"
	assert normalize_contact_name("Dr. Sarah Johnson-Smith") == "drsarahjohnsonsmith"
	assert normalize_contact_name("José María García") == "josmaragarca"
	assert normalize_contact_name("李明") == "" # Chinese characters removed
	assert normalize_contact_name("محمد") == "" # Arabic characters removed

	def test_idempotency(self):
	"""Test that normalizing twice produces the same result."""
	names = ["John Doe", "O'Brien", "Mary-Kate", "Dr. Smith", "José"]
	for name in names:
	normalized_once = normalize_contact_name(name)
	normalized_twice = normalize_contact_name(normalized_once)
	assert normalized_once == normalized_twice

	def test_case_insensitivity(self):
	"""Test that case variations normalize to the same value."""
	assert (
	normalize_contact_name("JOHN DOE")
	== normalize_contact_name("john doe")
	== normalize_contact_name("John Doe")
	== normalize_contact_name("JoHn DoE")
	)

	def test_whitespace_variations(self):
	"""Test various whitespace scenarios."""
	assert normalize_contact_name("John Doe") == "johndoe"
	assert normalize_contact_name("John\tDoe") == "johndoe"
	assert normalize_contact_name("John\nDoe") == "johndoe"
	assert normalize_contact_name("\n John Doe \n") == "johndoe"

	def test_leading_trailing_special_chars(self):
	"""Test names with leading/trailing special characters."""
	assert normalize_contact_name("'John'") == "john"
	assert normalize_contact_name("-Mary-") == "mary"
	assert normalize_contact_name(".Dr. Smith.") == "drsmith"