|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| import nltk
|
| import numpy
|
| import xlrd
|
| import openpyxl
|
| import re
|
| import sys
|
|
|
|
|
|
|
|
|
| file1 = open(sys.argv[1],"r+",encoding='utf-8')
|
| data = file1.read()
|
|
|
| file1.close()
|
|
|
|
|
|
|
|
|
| wb_obj = openpyxl.load_workbook(sys.argv[2])
|
| sheet_obj = wb_obj.active
|
|
|
|
|
|
|
| data = data.replace('?','')
|
| data = data.replace(' ',' ')
|
| data = data.replace(';','')
|
| data = data.replace(')','')
|
| data = data.replace('(','')
|
| data = data.replace('!','')
|
| data = data.replace(' – ',' ')
|
| data = data.replace('-',' ')
|
| data = data.replace('।','')
|
| data = data.replace('&','')
|
| data = data.replace('’','')
|
| data = data.replace('‘','')
|
| data = data.replace(':','')
|
| data = data.replace(',','')
|
| data = data.replace('/','')
|
| data = data.replace(',','')
|
| data = data.replace('.','')
|
| data = data.replace('|','')
|
| m_row = sheet_obj.max_row
|
| line = data
|
|
|
| for i in range(1,m_row+1):
|
| num = sheet_obj.cell(row = i, column = 1).value
|
| word = sheet_obj.cell(row = i, column = 2).value
|
|
|
|
|
| line = line.replace(str(num), word)
|
|
|
|
|
|
|
| file1 = open(sys.argv[3],"w+",encoding='utf-8')
|
|
|
| file1.write(line)
|
| file1.close()
|
|
|
|
|