jflo commited on
Commit
1fbeb9d
·
1 Parent(s): be8eaf8

Delete maven_text_preprocessing.py

Browse files
Files changed (1) hide show
  1. maven_text_preprocessing.py +0 -27
maven_text_preprocessing.py DELETED
@@ -1,27 +0,0 @@
1
- # import necessary libraries
2
- import pandas as pd
3
- import spacy
4
-
5
- # download the spacy model
6
- nlp = spacy.load("en_core_web_sm")
7
-
8
- # helper functions from text preprocessing section
9
- def lower_replace(series):
10
- output = series.str.lower()
11
- output = output.str.replace(r'\[.*?\]', '', regex=True)
12
- output = output.str.replace(r'[^\w\s]', '', regex=True)
13
- return output
14
-
15
- def token_lemma_nonstop(text):
16
- doc = nlp(text)
17
- output = [token.lemma_ for token in doc if not token.is_stop]
18
- return ' '.join(output)
19
-
20
- def clean_and_normalize(series):
21
- output = lower_replace(series)
22
- output = output.apply(token_lemma_nonstop)
23
- return output
24
-
25
- # allow command-line execution
26
- if __name__ == "__main__":
27
- print("Text preprocessing module ready to use.")