File size: 566 Bytes
1afb34d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from re import sub


def clean_text(text):
    """
    Applies some pre-processing on the given text.

    Steps :
    - Lowering text
    - Removing backslashes
    - removes a. out of the answers
    - replaces 'b.', 'c.', and 'd.' with comma ','
    """
    text = text.lower()
    text = text.replace('\\', '')  # generic replace was advised by Danit
    text = text.replace('a.', '')
    text = text.replace('b.', ',')
    text = text.replace('c.', ',')
    text = text.replace('d.', ',')
    text = sub("\d+", "<num>", text)

    return ' '.join(text.split())