File size: 424 Bytes
bbd259b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
print("preprocessing module loaded")

import re

def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()
import re

def clean_text(text: str) -> str:
    """
    Basic text normalization for Reddit posts
    """
    text = re.sub(r"http\S+", "", text)     # remove URLs
    text = re.sub(r"\s+", " ", text)        # normalize spaces
    return text.strip()