File size: 448 Bytes
00ff675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import re

def normalize_text(text: str) -> str:
    text = text.lower()

    # Replace bullets, numbering, headings
    text = re.sub(r'\n+', '. ', text)
    text = re.sub(r'[\u2022•\-–]', '. ', text)
    text = re.sub(r'\d+\.', '. ', text)
    text = re.sub(r':', '. ', text)

    # Remove brackets but keep content
    text = re.sub(r'[\(\)]', ' ', text)

    # Normalize spaces
    text = re.sub(r'\s+', ' ', text)

    return text.strip()