| |
| |
|
|
| |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| |
|
|
|
|
| import nltk |
| import numpy |
| import xlrd |
| import openpyxl |
| import re |
| import sys |
|
|
| |
|
|
| |
| file1 = open(sys.argv[1],"r+",encoding='utf-8') |
| data = file1.read() |
| |
| file1.close() |
|
|
|
|
| |
|
|
| wb_obj = openpyxl.load_workbook(sys.argv[2]) |
| sheet_obj = wb_obj.active |
|
|
| |
| |
| data = data.replace('?','') |
| data = data.replace(' ',' ') |
| data = data.replace(';','') |
| data = data.replace(')','') |
| data = data.replace('(','') |
| data = data.replace('!','') |
| data = data.replace(' – ',' ') |
| data = data.replace('-',' ') |
| data = data.replace('।','') |
| data = data.replace('&','') |
| data = data.replace('’','') |
| data = data.replace('‘','') |
| data = data.replace(':','') |
| data = data.replace(',','') |
| data = data.replace('/','') |
| data = data.replace(',','') |
| data = data.replace('.','') |
| data = data.replace('|','') |
| m_row = sheet_obj.max_row |
| line = data |
|
|
| for i in range(1,m_row+1): |
| num = sheet_obj.cell(row = i, column = 1).value |
| word = sheet_obj.cell(row = i, column = 2).value |
| |
| |
| line = line.replace(str(num), word) |
| |
| |
| |
| file1 = open(sys.argv[3],"w+",encoding='utf-8') |
| |
| file1.write(line) |
| file1.close() |
|
|
|
|