Spaces:

Memverge
/

MemPrepMate

Sleeping

MemPrepMate / src /utils /contact_utils.py

Christian Kniep

new webapp

1fff71f 3 months ago

1.48 kB

	"""
	Contact name normalization utilities for producer identifier generation.

	This module provides functions to normalize contact names for use in producer
	identifiers, ensuring consistent handling of special characters and collisions.
	"""

	import re


	def normalize_contact_name(name: str) -> str:
	"""
	Normalize contact name for producer identifier.

	Converts to lowercase and removes all non-alphanumeric characters,
	keeping only letters and digits. This enables consistent producer
	identifiers while allowing collisions to be handled by sequence numbers.

	Args:
	name: Contact name to normalize (e.g., "John O'Brien")

	Returns:
	Normalized name with only lowercase alphanumeric characters (e.g., "johnobrien")

	Examples:
	>>> normalize_contact_name("John Smith")
	'johnsmith'
	>>> normalize_contact_name("O'Brien")
	'obrien'
	>>> normalize_contact_name("Mary-Ann")
	'maryann'
	>>> normalize_contact_name("María García")
	'maríagarcía'
	>>> normalize_contact_name("李明")
	'李明'
	>>> normalize_contact_name("John (Johnny) Smith")
	'johnjohnnysmith'
	"""
	if not name:
	return ""

	# Convert to lowercase and remove all non-alphanumeric characters
	# This regex keeps unicode letters/digits but removes spaces, punctuation, special chars
	return re.sub(r'[^a-z0-9]', '', name.lower())