RogerFerrod
/

RSICRC

change-detection

image-captioning

Model card Files Files and versions

RSICRC / src /eval_func /rouge /rouge.py

RogerFerrod's picture

upload code

f6ffda2 verified 3 days ago

history blame contribute delete

3.82 kB

	#!/usr/bin/env python

	#

	# File Name : rouge.py

	#

	# Description : Computes ROUGE-L metric as described by Lin and Hovey (2004)

	#

	# Creation Date : 2015-01-07 06:03

	# Author : Ramakrishna Vedantam <vrama91@vt.edu>


	import numpy as np

	import pdb


	def my_lcs(string, sub):
	"""

	Calculates longest common subsequence for a pair of tokenized strings

	:param string : list of str : tokens from a string split using whitespace

	:param sub : list of str : shorter string, also split using whitespace

	:returns: length (list of int): length of the longest common subsequence between the two strings



	Note: my_lcs only gives length of the longest common subsequence, not the actual LCS

	"""

	if (len(string) < len(sub)):
	sub, string = string, sub

	lengths = [[0 for i in range(0, len(sub) + 1)] for j in range(0, len(string) + 1)]

	for j in range(1, len(sub) + 1):

	for i in range(1, len(string) + 1):

	if (string[i - 1] == sub[j - 1]):

	lengths[i][j] = lengths[i - 1][j - 1] + 1

	else:

	lengths[i][j] = max(lengths[i - 1][j], lengths[i][j - 1])

	return lengths[len(string)][len(sub)]


	class Rouge():
	'''

	Class for computing ROUGE-L score for a set of candidate sentences for the MS COCO test set



	'''

	def __init__(self):

	# vrama91: updated the value below based on discussion with Hovey

	self.beta = 1.2

	def calc_score(self, candidate, refs):

	"""

	Compute ROUGE-L score given one candidate and references for an image

	:param candidate: str : candidate sentence to be evaluated

	:param refs: list of str : COCO reference sentences for the particular image to be evaluated

	:returns score: int (ROUGE-L score for the candidate evaluated against references)

	"""


	assert (len(candidate) == 1)

	assert (len(refs) > 0)

	prec = []

	rec = []

	# split into tokens

	token_c = candidate[0].split(" ")

	for reference in refs:
	# split into tokens
	hh =1

	token_r = reference.split(" ")

	# compute the longest common subsequence

	lcs = my_lcs(token_r, token_c)

	prec.append(lcs / float(len(token_c)))

	rec.append(lcs / float(len(token_r)))

	prec_max = max(prec)

	rec_max = max(rec)

	if (prec_max != 0 and rec_max != 0):

	score = ((1 + self.beta ** 2) * prec_max * rec_max) / float(rec_max + self.beta ** 2 * prec_max)

	else:

	score = 0.0

	return score

	def compute_score(self, references, hypotheses):

	"""

	Computes Rouge-L score given a set of reference and candidate sentences for the dataset

	Invoked by evaluate_captions.py

	:param hypo_for_image: dict : candidate / test sentences with "image name" key and "tokenized sentences" as values

	:param ref_for_image: dict : reference MS-COCO sentences with "image name" key and "tokenized sentences" as values

	:returns: average_score: float (mean ROUGE-L score computed by averaging scores for all the images)

	"""

	# assert (gts.keys() == res.keys())
	#
	# imgIds = gts.keys()

	score = []

	for i in range(len(hypotheses)):
	hypo = hypotheses[i]
	ref = references[i]

	score.append(self.calc_score(hypo, ref))

	# Sanity check.

	assert (type(hypo) is list)

	assert (len(hypo) == 1)

	assert (type(ref) is list)

	assert (len(ref) > 0)

	average_score = np.mean(np.array(score))

	return average_score, np.array(score)

	def method(self):

	return "Rouge"