skillsync-backend / app /utils /text_cleaner.py
GitHub Actions
sync: github commit e4109213b5cedf256d6e30f65518976b7d530541 to HF Space
19dc325
Raw
History Blame Contribute Delete
432 Bytes
import re
class TextCleaner:
@staticmethod
def clean_text(text: str) -> str:
if not text:
return ""
# Normalize newlines
text = text.replace('\r', '\n')
# Remove multiple newlines
text = re.sub(r'\n\s*\n', '\n\n', text)
# Remove extra spaces within lines
text = re.sub(r'[ \t]+', ' ', text)
return text.strip()