lynn-twinkl commited on
Commit
82e0bc6
·
1 Parent(s): 26ad793

for use in ML pipeline

Browse files
Files changed (1) hide show
  1. src/preprocess_text.py +12 -0
src/preprocess_text.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+ def preprocess_text(text):
4
+ """
5
+ This function normalises text for later use in
6
+ a machine learning pipeline
7
+ """
8
+ if isinstance(text, str):
9
+ text = text.lower()
10
+ text = text.translate(str.maketrans('','', string.punctuation))
11
+
12
+ return ' '.join(text.split())