File size: 1,475 Bytes
1fff71f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""
Contact name normalization utilities for producer identifier generation.

This module provides functions to normalize contact names for use in producer
identifiers, ensuring consistent handling of special characters and collisions.
"""

import re


def normalize_contact_name(name: str) -> str:
    """
    Normalize contact name for producer identifier.
    
    Converts to lowercase and removes all non-alphanumeric characters,
    keeping only letters and digits. This enables consistent producer
    identifiers while allowing collisions to be handled by sequence numbers.
    
    Args:
        name: Contact name to normalize (e.g., "John O'Brien")
        
    Returns:
        Normalized name with only lowercase alphanumeric characters (e.g., "johnobrien")
        
    Examples:
        >>> normalize_contact_name("John Smith")
        'johnsmith'
        >>> normalize_contact_name("O'Brien")
        'obrien'
        >>> normalize_contact_name("Mary-Ann")
        'maryann'
        >>> normalize_contact_name("María García")
        'maríagarcía'
        >>> normalize_contact_name("李明")
        '李明'
        >>> normalize_contact_name("John (Johnny) Smith")
        'johnjohnnysmith'
    """
    if not name:
        return ""
    
    # Convert to lowercase and remove all non-alphanumeric characters
    # This regex keeps unicode letters/digits but removes spaces, punctuation, special chars
    return re.sub(r'[^a-z0-9]', '', name.lower())