Diabetes_readmissions / data /preprocess.py
MP44's picture
Upload 14 files
d51135f verified
raw
history blame contribute delete
199 Bytes
import re
def clean_text(text: str) -> str:
text = text.lower()
text = re.sub(r"\[\*\*.*?\*\*\]", "", text) # remove PHI
text = re.sub(r"\n+", " ", text)
return text.strip()