|
|
''' |
|
|
Automatic generation evaluation metrics wrapper |
|
|
|
|
|
The most useful function here is |
|
|
|
|
|
get_all_metrics(refs, cands) |
|
|
''' |
|
|
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer |
|
|
from pycocoevalcap.spice.spice import Spice |
|
|
from pycocoevalcap.meteor.meteor import Meteor |
|
|
from pycocoevalcap.bleu.bleu import Bleu |
|
|
from pycocoevalcap.cider.cider import Cider |
|
|
from pycocoevalcap.rouge.rouge import Rouge |
|
|
from pycocoevalcap.spice.spice import Spice |
|
|
|
|
|
|
|
|
def get_all_metrics(refs, cands, return_per_cap=False): |
|
|
metrics = [] |
|
|
names = [] |
|
|
|
|
|
pycoco_eval_cap_scorers = [(Bleu(4), 'bleu'), |
|
|
(Meteor(), 'meteor'), |
|
|
(Rouge(), 'rouge'), |
|
|
(Cider(), 'cider'), |
|
|
(Spice(), 'spice')] |
|
|
|
|
|
for scorer, name in pycoco_eval_cap_scorers: |
|
|
overall, per_cap = pycoco_eval(scorer, refs, cands) |
|
|
if return_per_cap: |
|
|
metrics.append(per_cap) |
|
|
else: |
|
|
metrics.append(overall) |
|
|
names.append(name) |
|
|
|
|
|
metrics = dict(zip(names, metrics)) |
|
|
return metrics |
|
|
|
|
|
|
|
|
def tokenize(refs, cands, no_op=False): |
|
|
|
|
|
|
|
|
tokenizer = PTBTokenizer() |
|
|
|
|
|
if no_op: |
|
|
refs = {idx: [r for r in c_refs] for idx, c_refs in enumerate(refs)} |
|
|
cands = {idx: [c] for idx, c in enumerate(cands)} |
|
|
|
|
|
else: |
|
|
refs = {idx: [{'caption':r} for r in c_refs] for idx, c_refs in enumerate(refs)} |
|
|
cands = {idx: [{'caption':c}] for idx, c in enumerate(cands)} |
|
|
|
|
|
refs = tokenizer.tokenize(refs) |
|
|
cands = tokenizer.tokenize(cands) |
|
|
|
|
|
return refs, cands |
|
|
|
|
|
|
|
|
def pycoco_eval(scorer, refs, cands): |
|
|
''' |
|
|
scorer is assumed to have a compute_score function. |
|
|
refs is a list of lists of strings |
|
|
cands is a list of predictions |
|
|
''' |
|
|
refs, cands = tokenize(refs, cands) |
|
|
average_score, scores = scorer.compute_score(refs, cands) |
|
|
return average_score, scores |
|
|
|