File size: 370 Bytes
ae2ef1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import string



def preprocess_bn(txt):

	"""
	This functions is used for preprocessing Bangla text.
	Two major preprocessing is done-
		1. Punctuations removal.
		2. Whitespace removal.
	Processed text is returned as a return value.
	"""

	for punc in string.punctuation:
		txt=txt.replace(punc, '')
	txt=txt.replace('।', '')

	txt=' '.join(txt.split())

	return txt