File size: 432 Bytes
19dc325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import re

class TextCleaner:
    @staticmethod
    def clean_text(text: str) -> str:
        if not text:
            return ""
        
        # Normalize newlines
        text = text.replace('\r', '\n')
        
        # Remove multiple newlines
        text = re.sub(r'\n\s*\n', '\n\n', text)
        
        # Remove extra spaces within lines
        text = re.sub(r'[ \t]+', ' ', text)
        
        return text.strip()