ml-services / semantic_engine /normalize.py
ABHINAY2025
add FastAPI ML service with semantic engine
00ff675
raw
history blame contribute delete
448 Bytes
import re
def normalize_text(text: str) -> str:
text = text.lower()
# Replace bullets, numbering, headings
text = re.sub(r'\n+', '. ', text)
text = re.sub(r'[\u2022•\-–]', '. ', text)
text = re.sub(r'\d+\.', '. ', text)
text = re.sub(r':', '. ', text)
# Remove brackets but keep content
text = re.sub(r'[\(\)]', ' ', text)
# Normalize spaces
text = re.sub(r'\s+', ' ', text)
return text.strip()