''' Automatic generation evaluation metrics wrapper The most useful function here is get_all_metrics(refs, cands) ''' from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer from pycocoevalcap.spice.spice import Spice from pycocoevalcap.meteor.meteor import Meteor from pycocoevalcap.bleu.bleu import Bleu from pycocoevalcap.cider.cider import Cider from pycocoevalcap.rouge.rouge import Rouge from pycocoevalcap.spice.spice import Spice def get_all_metrics(refs, cands, return_per_cap=False): metrics = [] names = [] pycoco_eval_cap_scorers = [(Bleu(4), 'bleu'), (Meteor(), 'meteor'), (Rouge(), 'rouge'), (Cider(), 'cider'), (Spice(), 'spice')] for scorer, name in pycoco_eval_cap_scorers: overall, per_cap = pycoco_eval(scorer, refs, cands) if return_per_cap: metrics.append(per_cap) else: metrics.append(overall) names.append(name) metrics = dict(zip(names, metrics)) return metrics def tokenize(refs, cands, no_op=False): # no_op is a debug option to see how significantly not using the PTB tokenizer # affects things tokenizer = PTBTokenizer() if no_op: refs = {idx: [r for r in c_refs] for idx, c_refs in enumerate(refs)} cands = {idx: [c] for idx, c in enumerate(cands)} else: refs = {idx: [{'caption':r} for r in c_refs] for idx, c_refs in enumerate(refs)} cands = {idx: [{'caption':c}] for idx, c in enumerate(cands)} refs = tokenizer.tokenize(refs) cands = tokenizer.tokenize(cands) return refs, cands def pycoco_eval(scorer, refs, cands): ''' scorer is assumed to have a compute_score function. refs is a list of lists of strings cands is a list of predictions ''' refs, cands = tokenize(refs, cands) average_score, scores = scorer.compute_score(refs, cands) return average_score, scores