File size: 424 Bytes
bbd259b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
print("preprocessing module loaded")
import re
def clean_text(text):
text = re.sub(r"http\S+", "", text)
text = re.sub(r"\s+", " ", text)
return text.strip()
import re
def clean_text(text: str) -> str:
"""
Basic text normalization for Reddit posts
"""
text = re.sub(r"http\S+", "", text) # remove URLs
text = re.sub(r"\s+", " ", text) # normalize spaces
return text.strip()
|