"""
Unit tests for contact name normalization utility.
Feature: 001-refine-memory-producer-logic (T021)
"""

import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))

import pytest
from utils.contact_utils import normalize_contact_name


class TestNormalizeContactName:
    """Test suite for normalize_contact_name function."""

    def test_basic_lowercase(self):
        """Test basic lowercase conversion."""
        assert normalize_contact_name("JohnDoe") == "johndoe"
        assert normalize_contact_name("ALICE") == "alice"
        assert normalize_contact_name("Bob") == "bob"

    def test_spaces_removed(self):
        """Test that spaces are removed."""
        assert normalize_contact_name("John Doe") == "johndoe"
        assert normalize_contact_name("Alice Mary Smith") == "alicemarysmith"
        assert normalize_contact_name("  Bob  ") == "bob"

    def test_apostrophes_removed(self):
        """Test that apostrophes are removed."""
        assert normalize_contact_name("O'Brien") == "obrien"
        assert normalize_contact_name("D'Angelo") == "dangelo"
        assert normalize_contact_name("O'Neil") == "oneil"

    def test_hyphens_removed(self):
        """Test that hyphens are removed."""
        assert normalize_contact_name("Jean-Pierre") == "jeanpierre"
        assert normalize_contact_name("Mary-Kate") == "marykate"
        assert normalize_contact_name("Wu-Tang") == "wutang"

    def test_dots_removed(self):
        """Test that dots/periods are removed."""
        assert normalize_contact_name("Dr. Smith") == "drsmith"
        assert normalize_contact_name("J.K. Rowling") == "jkrowling"
        assert normalize_contact_name("Mr. Anderson") == "mranderson"

    def test_special_characters(self):
        """Test that special characters are removed."""
        assert normalize_contact_name("José García") == "josgarca"
        assert normalize_contact_name("François") == "franois"
        assert normalize_contact_name("Müller") == "mller"
        assert normalize_contact_name("Søren") == "sren"

    def test_mixed_special_characters(self):
        """Test combinations of special characters."""
        assert normalize_contact_name("O'Brien-Smith Jr.") == "obriensmithjr"
        assert normalize_contact_name("Mary-Kate O'Neil") == "marykateoneil"
        assert normalize_contact_name("Dr. Jean-Pierre D'Angelo") == "drjeanpierredangelo"

    def test_numbers_preserved(self):
        """Test that numbers are preserved (alphanumeric)."""
        assert normalize_contact_name("Agent007") == "agent007"
        assert normalize_contact_name("User123") == "user123"
        assert normalize_contact_name("R2D2") == "r2d2"

    def test_underscores_and_symbols(self):
        """Test that underscores and symbols are removed."""
        assert normalize_contact_name("john_doe") == "johndoe"
        assert normalize_contact_name("alice@example") == "aliceexample"
        assert normalize_contact_name("user#123") == "user123"

    def test_empty_string(self):
        """Test that empty string returns empty string."""
        assert normalize_contact_name("") == ""
        assert normalize_contact_name("   ") == ""

    def test_only_special_chars(self):
        """Test strings with only special characters."""
        assert normalize_contact_name("---") == ""
        assert normalize_contact_name("...") == ""
        assert normalize_contact_name("'") == ""
        assert normalize_contact_name("@#$%") == ""

    def test_unicode_letters_preserved(self):
        """Test that unicode letters are preserved."""
        # Note: The current implementation removes non-ASCII, but we document expected behavior
        # If unicode support is needed, the regex should be updated to [\W_]+ instead
        assert normalize_contact_name("María") == "mara"  # Current behavior
        assert normalize_contact_name("François") == "franois"
        assert normalize_contact_name("北京") == ""  # Non-Latin removed

    def test_collision_scenarios(self):
        """Test names that should normalize to the same value (collision detection)."""
        # These should all normalize to "obrien"
        variants = ["O'Brien", "OBrien", "O Brien", "o'brien", "O'BRIEN", "O-Brien"]
        normalized = [normalize_contact_name(v) for v in variants]
        
        # All should normalize to the same value
        assert len(set(normalized)) == 1
        assert normalized[0] == "obrien"

    def test_real_world_examples(self):
        """Test with realistic contact names."""
        assert normalize_contact_name("Jane Doe") == "janedoe"
        assert normalize_contact_name("Christian Kniep") == "christiankniep"
        assert normalize_contact_name("Dr. Sarah Johnson-Smith") == "drsarahjohnsonsmith"
        assert normalize_contact_name("José María García") == "josmaragarca"
        assert normalize_contact_name("李明") == ""  # Chinese characters removed
        assert normalize_contact_name("محمد") == ""  # Arabic characters removed

    def test_idempotency(self):
        """Test that normalizing twice produces the same result."""
        names = ["John Doe", "O'Brien", "Mary-Kate", "Dr. Smith", "José"]
        for name in names:
            normalized_once = normalize_contact_name(name)
            normalized_twice = normalize_contact_name(normalized_once)
            assert normalized_once == normalized_twice

    def test_case_insensitivity(self):
        """Test that case variations normalize to the same value."""
        assert (
            normalize_contact_name("JOHN DOE")
            == normalize_contact_name("john doe")
            == normalize_contact_name("John Doe")
            == normalize_contact_name("JoHn DoE")
        )

    def test_whitespace_variations(self):
        """Test various whitespace scenarios."""
        assert normalize_contact_name("John   Doe") == "johndoe"
        assert normalize_contact_name("John\tDoe") == "johndoe"
        assert normalize_contact_name("John\nDoe") == "johndoe"
        assert normalize_contact_name("\n  John  Doe  \n") == "johndoe"

    def test_leading_trailing_special_chars(self):
        """Test names with leading/trailing special characters."""
        assert normalize_contact_name("'John'") == "john"
        assert normalize_contact_name("-Mary-") == "mary"
        assert normalize_contact_name(".Dr. Smith.") == "drsmith"