File size: 6,636 Bytes
382124c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import pandas as pd
import numpy as np
import csv,pickle,json,bz2
from romtoslp import *
loaded_DCS = pickle.load(open('../Simultaneous_DCS_ho.p', 'rb'))
folder = '../NewData/skt_dcs_DS.bz2_4K_bigram_mir_heldout/'
def open_dsbz2(filename):
with bz2.BZ2File(filename, 'r') as f:
loader = pickle.load(f)
conflicts_Dict_correct = loader['conflicts_Dict_correct']
nodelist_to_correct_mapping = loader['nodelist_to_correct_mapping']
nodelist_correct = loader['nodelist_correct']
featVMat_correct = loader['featVMat_correct']
featVMat = loader['featVMat']
conflicts_Dict = loader['conflicts_Dict']
nodelist = loader['nodelist']
return (nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\
nodelist, conflicts_Dict, featVMat)
#snippet for forming the groundtruth csv file
with open('groundtruth2.csv','w') as fh:
rd = csv.writer(fh)
rd.writerow(['File','Lemma','CNG','lemmaCorr','lemmaCNGcorr','predCNG','Conflicts'])
count=0
for ii in range(4):
with open("BM2_NLoss_proc"+str(ii)+".csv",'r') as fh:
rd = csv.reader(fh)
while(True):
try:
print(count)
count+=1
x=next(rd) #predicted lemmas
sentid = x[0]
dcsobj = loaded_DCS[str(sentid)+'.p2']
# print(dcsobj.cng)
# print(dcsobj.lemmas)
# print(dcsobj.dcs_chunks)
nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\
nodelist, conflicts_Dict, featVMat = open_dsbz2(folder+str(sentid)+'.ds.bz2')
# print(conflicts_Dict_correct)
# print(nodelist_correct)
# break
dll = 0
for i in dcsobj.lemmas:
dll+=len(i)
if(dll!=len(nodelist_correct)):
print('here')
print(dcsobj.lemmas)
print(nodelist_correct)
gtlemmas = []
for outerlist in dcsobj.lemmas:
for element in outerlist:
gtlemmas.append(rom_slp(element))
pdlemmas = x[1:]
x=next(rd) #predicted cngs
gtcngs = []
i = 0
for outerlist in dcsobj.cng:
for element in outerlist:
gtcngs.append((element,len(conflicts_Dict_correct[i])))
i+=1
pdcngs = x[1:]
for i in range(4):
x=(next(rd))
# print(gtlemmas)
# print(pdlemmas)
# print(gtcngs)
# print(pdcngs)
pdldict = dict()
gtldict = dict()
for i in range(len(gtlemmas)):
if(gtlemmas[i] in gtldict):
gtldict[gtlemmas[i]].append(gtcngs[i])
else:
gtldict[gtlemmas[i]] = [gtcngs[i]]
for i in range(len(pdlemmas)):
if(pdlemmas[i] in pdldict):
pdldict[pdlemmas[i]].append(pdcngs[i])
else:
pdldict[pdlemmas[i]] = [pdcngs[i]]
# print(gtldict)
# print(gtldict)
lemmaround2 = []
cnground2 = []
for gtl in gtldict.keys():
for gtlcng in gtldict[gtl]:
lemmacorr = 0
lemmaCNGcorr=0
predictedcng = 'nil'
confcount = gtlcng[1]
gtlcng = gtlcng[0]
if(gtl in pdldict.keys()):
if(len(pdldict[gtl])>0):
if(gtlcng in pdldict[gtl]):
lemmacorr = 1
predictedcng = gtlcng
lemmaCNGcorr = 1
pdldict[gtl].remove(gtlcng)
with open('groundtruth2.csv','a') as fh:
rwd = csv.writer(fh)
row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,gtlcng,confcount]
rwd.writerow(row)
else:
lemmaround2.append(gtl)
cnground2.append((gtlcng,confcount))
else:
with open('groundtruth2.csv','a') as fh:
rwd = csv.writer(fh)
row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount]
rwd.writerow(row)
else:
with open('groundtruth2.csv','a') as fh:
rwd = csv.writer(fh)
row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount]
rwd.writerow(row)
# now all elements with lemmaCNGcorr ==1 are out of the way
# reiterating for the lemmas which didnt have a cng but had a lemma earlier
for i in range(len(lemmaround2)):
gtl = lemmaround2[i]
gtlcng = cnground2[i]
confcount = gtlcng[1]
gtlcng = gtlcng[0]
lemmacorr = 0
lemmaCNGcorr = 0
predictedcng = 'nil'
if(gtl in pdldict.keys()):
if(len(pdldict[gtl])>0):
lemmacorr = 1
predictedcng = pdldict[gtl][0]
pdldict[gtl].remove(pdldict[gtl][0])
with open('groundtruth2.csv','a') as fh:
rwd = csv.writer(fh)
row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount]
rwd.writerow(row)
# print('done here')
except Exception as e:
print(e)
print('been there')
# break
continue
|