| from tokenizer.ptbtokenizer import PTBTokenizer |
| from bleu.bleu import Bleu |
| from meteor.meteor import Meteor |
| from rouge.rouge import Rouge |
| from cider.cider import Cider |
|
|
| """ |
| Input: refer and Res = [{ref_id, sent}] |
| |
| Things of interest |
| evalRefs - list of ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR'] |
| eval - dict of {metric: score} |
| refToEval - dict of {ref_id: ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR']} |
| """ |
|
|
| class RefEvaluation: |
| def __init__ (self, refer, Res): |
| """ |
| :param refer: refer class of current dataset |
| :param Res: [{'ref_id', 'sent'}] |
| """ |
| self.evalRefs = [] |
| self.eval = {} |
| self.refToEval = {} |
| self.refer = refer |
| self.Res = Res |
|
|
| def evaluate(self): |
|
|
| evalRefIds = [ann['ref_id'] for ann in self.Res] |
|
|
| refToGts = {} |
| for ref_id in evalRefIds: |
| ref = self.refer.Refs[ref_id] |
| gt_sents = [sent['sent'].encode('ascii', 'ignore').decode('ascii') for sent in ref['sentences']] |
| refToGts[ref_id] = gt_sents |
| refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} |
|
|
| print 'tokenization...' |
| tokenizer = PTBTokenizer() |
| self.refToRes = tokenizer.tokenize(refToRes) |
| self.refToGts = tokenizer.tokenize(refToGts) |
|
|
| |
| |
| |
| print 'setting up scorers...' |
| scorers = [ |
| (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), |
| (Meteor(),"METEOR"), |
| (Rouge(), "ROUGE_L"), |
| (Cider(), "CIDEr") |
| ] |
|
|
| |
| |
| |
| for scorer, method in scorers: |
| print 'computing %s score...'%(scorer.method()) |
| score, scores = scorer.compute_score(self.refToGts, self.refToRes) |
| if type(method) == list: |
| for sc, scs, m in zip(score, scores, method): |
| self.setEval(sc, m) |
| self.setRefToEvalRefs(scs, self.refToGts.keys(), m) |
| print "%s: %0.3f"%(m, sc) |
| else: |
| self.setEval(score, method) |
| self.setRefToEvalRefs(scores, self.refToGts.keys(), method) |
| print "%s: %0.3f"%(method, score) |
| self.setEvalRefs() |
|
|
| def setEval(self, score, method): |
| self.eval[method] = score |
|
|
| def setRefToEvalRefs(self, scores, refIds, method): |
| for refId, score in zip(refIds, scores): |
| if not refId in self.refToEval: |
| self.refToEval[refId] = {} |
| self.refToEval[refId]["ref_id"] = refId |
| self.refToEval[refId][method] = score |
|
|
| def setEvalRefs(self): |
| self.evalRefs = [eval for refId, eval in self.refToEval.items()] |
|
|
|
|
| if __name__ == '__main__': |
|
|
| import os.path as osp |
| import sys |
| ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) |
| sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets')) |
| from refer import REFER |
|
|
| |
| dataset = 'refcoco' |
| refer = REFER(dataset, splitBy = 'google') |
|
|
| |
| val_refIds = refer.getRefIds(split='test') |
| ref_id = 49767 |
| print "GD: %s" % refer.Refs[ref_id]['sentences'] |
| Res = [{'ref_id': ref_id, 'sent': 'left bottle'}] |
|
|
| |
| refEval = RefEvaluation(refer, Res) |
| refEval.evaluate() |
|
|
| |
| for metric, score in refEval.eval.items(): |
| print '%s: %.3f'%(metric, score) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|