from rouge import Rouge import sys from nltk.translate.meteor_score import single_meteor_score # import nltk # nltk.download('wordnet') # nltk.download('omw-1.4') def evaluate(ref, hyp): with open(hyp, 'r') as r: hypothesis = r.readlines() hyps = [v.strip().lower() for v in hypothesis] with open(ref, 'r') as r: references = r.readlines() refs = [v.strip().lower() for v in references] sentence_meteor_lst = [single_meteor_score(ref_sentence, gen_sentence) for ref_sentence, gen_sentence in zip(refs, hyps)] stc_meteor = sum(sentence_meteor_lst) / len(sentence_meteor_lst) print("Meteor = " + str(stc_meteor * 100)) rouge = Rouge() scores_Rouge = rouge.get_scores(hyps=hyps, refs=refs, avg=True) for k, v in scores_Rouge.items(): print(k + ' = ' + str(v['f'] * 100)) if __name__ == '__main__': evaluate(sys.argv[1], sys.argv[2])