""" Contact name normalization utilities for producer identifier generation. This module provides functions to normalize contact names for use in producer identifiers, ensuring consistent handling of special characters and collisions. """ import re def normalize_contact_name(name: str) -> str: """ Normalize contact name for producer identifier. Converts to lowercase and removes all non-alphanumeric characters, keeping only letters and digits. This enables consistent producer identifiers while allowing collisions to be handled by sequence numbers. Args: name: Contact name to normalize (e.g., "John O'Brien") Returns: Normalized name with only lowercase alphanumeric characters (e.g., "johnobrien") Examples: >>> normalize_contact_name("John Smith") 'johnsmith' >>> normalize_contact_name("O'Brien") 'obrien' >>> normalize_contact_name("Mary-Ann") 'maryann' >>> normalize_contact_name("María García") 'maríagarcía' >>> normalize_contact_name("李明") '李明' >>> normalize_contact_name("John (Johnny) Smith") 'johnjohnnysmith' """ if not name: return "" # Convert to lowercase and remove all non-alphanumeric characters # This regex keeps unicode letters/digits but removes spaces, punctuation, special chars return re.sub(r'[^a-z0-9]', '', name.lower())