Spaces:

markytools
/

strexp

Build error

App Files Files Community

strexp / captum_improve_trba.py

markytools

added strexp

d61b9c7 over 2 years ago

raw

history blame contribute delete

48.7 kB

	import os
	import time
	import string
	import argparse
	import re
	import sys
	import random
	import pickle

	import torch
	import torch.backends.cudnn as cudnn
	import torch.utils.data
	import torch.nn.functional as F
	from torch.autograd import Variable
	import numpy as np
	from skimage.color import gray2rgb
	from nltk.metrics.distance import edit_distance
	import cv2

	from utils import CTCLabelConverter, AttnLabelConverter, Averager
	from dataset_trba import hierarchical_dataset, AlignCollate
	from model_trba import Model, SuperPixler, CastNumpy, STRScore
	# import hiddenlayer as hl
	from lime import lime_image
	from lime.wrappers.scikit_image import SegmentationAlgorithm
	import matplotlib.pyplot as plt
	import random
	from captum._utils.models.linear_model import SkLearnLinearModel, SkLearnRidge
	import statistics
	import settings
	import sys
	import copy
	from captum_test import acquire_average_auc, saveAttrData
	from captum_improve_vitstr import rankedAttributionsBySegm
	from matplotlib import pyplot as plt
	from captum.attr._utils.visualization import visualize_image_attr

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	from captum.attr import (
	GradientShap,
	DeepLift,
	DeepLiftShap,
	IntegratedGradients,
	LayerConductance,
	NeuronConductance,
	NoiseTunnel,
	Saliency,
	InputXGradient,
	GuidedBackprop,
	Deconvolution,
	GuidedGradCam,
	FeatureAblation,
	ShapleyValueSampling,
	Lime,
	KernelShap
	)

	from captum.metrics import (
	infidelity,
	sensitivity_max
	)

	def getPredAndConf(opt, model, scoring, image, converter, labels):
	batch_size = image.size(0)
	length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
	text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)
	text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length)
	if 'CTC' in opt.Prediction:
	preds = model(image, text_for_pred)

	confScore = scoring(preds)
	confScore = confScore.detach().cpu().numpy()

	# Calculate evaluation loss for CTC deocder.
	preds_size = torch.IntTensor([preds.size(1)] * batch_size)

	# Select max probabilty (greedy decoding) then decode index to character
	if opt.baiduCTC:
	_, preds_index = preds.max(2)
	preds_index = preds_index.view(-1)
	else:
	_, preds_index = preds.max(2)
	preds_str = converter.decode(preds_index.data, preds_size.data)[0]
	else:
	preds = model(image, text_for_pred, is_train=False)

	confScore = scoring(preds)
	confScore = confScore.detach().cpu().numpy()

	preds = preds[:, :text_for_loss.shape[1] - 1, :]
	target = text_for_loss[:, 1:] # without [GO] Symbol
	# cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1))

	# select max probabilty (greedy decoding) then decode index to character
	_, preds_index = preds.max(2)
	preds_str = converter.decode(preds_index, length_for_pred)

	### Remove all chars after '[s]'
	preds_str = preds_str[0]
	preds_str = preds_str[:preds_str.find('[s]')]
	# pred = pred[:pred_EOS]
	return preds_str, confScore

	### Output and save segmentations only for one dataset only
	def outputSegmOnly(opt):
	### targetDataset - one dataset only, SVTP-645, CUTE80-288images
	targetDataset = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']
	targetHeight = 32
	targetWidth = 100
	segmRootDir = "/home/uclpc1/Documents/STR/datasets/segmen"\
	"tations/{}X{}/{}/".format(targetHeight, targetWidth, targetDataset)

	if not os.path.exists(segmRootDir):
	os.makedirs(segmRootDir)

	opt.eval = True
	### Only IIIT5k_3000
	eval_data_list = [targetDataset]
	target_output_orig = opt.outputOrigDir

	### Taken from LIME
	segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
	max_dist=200, ratio=0.2,
	random_seed=random.randint(0, 1000))
	for eval_data in eval_data_list:
	eval_data_path = os.path.join(opt.eval_data, eval_data)
	AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
	eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig)
	evaluation_loader = torch.utils.data.DataLoader(
	eval_data, batch_size=1,
	shuffle=False,
	num_workers=int(opt.workers),
	collate_fn=AlignCollate_evaluation, pin_memory=True)
	for i, (image_tensors, labels) in enumerate(evaluation_loader):
	image_tensors = ((image_tensors + 1.0) / 2.0) * 255.0
	imgDataDict = {}
	img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
	if img_numpy.shape[0] == 1:
	img_numpy = gray2rgb(img_numpy[0])
	# print("img_numpy shape: ", img_numpy.shape) # (32,100,3)
	segmOutput = segmentation_fn(img_numpy)
	# print("segmOutput unique: ", len(np.unique(segmOutput)))
	imgDataDict['segdata'] = segmOutput
	imgDataDict['label'] = labels[0]
	outputPickleFile = segmRootDir + "{}.pkl".format(i)
	with open(outputPickleFile, 'wb') as f:
	pickle.dump(imgDataDict, f)

	### Returns the mean for each segmentation having shape as the same as the input
	### This function can only one attribution image at a time
	def averageSegmentsOut(attr, segments):
	averagedInput = torch.clone(attr)
	sortedDict = {}
	for x in np.unique(segments):
	segmentMean = torch.mean(attr[segments == x][:])
	sortedDict[x] = float(segmentMean.detach().cpu().numpy())
	averagedInput[segments == x] = segmentMean
	return averagedInput, sortedDict

	def acquireSelectivityHit(origImg, attributions, segmentations, model, converter, labels, scoring):
	# print("segmentations unique len: ", np.unique(segmentations))
	aveSegmentations, sortedDict = averageSegmentsOut(attributions[0,0], segmentations)
	sortedKeys = [k for k, v in sorted(sortedDict.items(), key=lambda item: item[1])]
	sortedKeys = sortedKeys[::-1] ### A list that should contain largest to smallest score
	# print("sortedDict: ", sortedDict) # {0: -5.51e-06, 1: -1.469e-05, 2: -3.06e-05,...}
	# print("aveSegmentations unique len: ", np.unique(aveSegmentations))
	# print("aveSegmentations device: ", aveSegmentations.device) # cuda:0
	# print("aveSegmentations shape: ", aveSegmentations.shape) # (224,224)
	# print("aveSegmentations: ", aveSegmentations)

	n_correct = []
	confidenceList = [] # First index is one feature removed, second index two features removed, and so on...
	clonedImg = torch.clone(origImg)
	gt = str(labels[0])
	for totalSegToHide in range(0, len(sortedKeys)):
	### Acquire LIME prediction result
	currentSegmentToHide = sortedKeys[totalSegToHide]
	clonedImg[0,0][segmentations == currentSegmentToHide] = 0.0
	pred, confScore = getPredAndConf(opt, model, scoring, clonedImg, converter, np.array([gt]))
	# To evaluate 'case sensitive model' with alphanumeric and case insensitve setting.
	if opt.sensitive and opt.data_filtering_off:
	pred = pred.lower()
	gt = gt.lower()
	alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz'
	out_of_alphanumeric_case_insensitve = f'[^{alphanumeric_case_insensitve}]'
	pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred)
	gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt)
	if pred == gt:
	n_correct.append(1)
	else:
	n_correct.append(0)
	confScore = confScore[0][0]*100
	confidenceList.append(confScore)
	return n_correct, confidenceList

	def main(opt):
	# 'IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'
	datasetName = settings.TARGET_DATASET
	custom_segm_dataroot = "{}/{}X{}/{}/".format(settings.SEGM_DIR, opt.imgH, opt.imgW, datasetName)
	outputSelectivityPkl = "strexp_ave_{}_{}.pkl".format(settings.MODEL, datasetName)
	outputDir = "./attributionImgs/{}/{}/".format(settings.MODEL, datasetName)
	attrOutputDir = "./attributionData/{}/{}/".format(settings.MODEL, datasetName)
	acquireSelectivity = True
	acquireInfidelity = False
	acquireSensitivity = False ### GPU error
	imgHeight = 32
	imgWidth = 100
	if not os.path.exists(outputDir):
	os.makedirs(outputDir)
	if not os.path.exists(attrOutputDir):
	os.makedirs(attrOutputDir)

	""" model configuration """
	if 'CTC' in opt.Prediction:
	converter = CTCLabelConverter(opt.character)
	else:
	converter = AttnLabelConverter(opt.character)
	opt.num_class = len(converter.character)

	if opt.rgb:
	opt.input_channel = 3
	model_obj = Model(opt, device)
	print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
	opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
	opt.SequenceModeling, opt.Prediction)
	model = torch.nn.DataParallel(model_obj).to(device)

	# load model
	print('loading pretrained model from %s' % opt.saved_model)
	model.load_state_dict(torch.load(opt.saved_model, map_location=device))
	opt.exp_name = '_'.join(opt.saved_model.split('/')[1:])

	modelCopy = copy.deepcopy(model)
	scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True)
	super_pixel_model_singlechar = torch.nn.Sequential(
	# super_pixler,
	# numpy2torch_converter,
	modelCopy,
	scoring_singlechar
	).to(device)
	modelCopy.train()
	scoring_singlechar.train()
	super_pixel_model_singlechar.train()

	scoring = STRScore(opt=opt, converter=converter, device=device)
	super_pixel_model = torch.nn.Sequential(
	model,
	scoring
	)
	model.train()
	scoring.train()
	super_pixel_model.train()

	""" keep evaluation model and result logs """
	os.makedirs(f'./result/{opt.exp_name}', exist_ok=True)
	os.system(f'cp {opt.saved_model} ./result/{opt.exp_name}/')

	""" setup loss """
	if 'CTC' in opt.Prediction:
	criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
	else:
	criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0

	"""Output shap values"""
	""" evaluation with 10 benchmark evaluation datasets """
	# The evaluation datasets, dataset order is same with Table 1 in our paper.
	# eval_data_list = ['IIIT5k_3000', 'IC03_860', 'IC03_867', 'IC15_1811']
	target_output_orig = opt.outputOrigDir
	# eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857',
	# 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
	# eval_data_list = ['IIIT5k_3000']
	eval_data_list = [datasetName]
	# # To easily compute the total accuracy of our paper.
	# eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_867',
	# 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']

	list_accuracy = []
	total_forward_time = 0
	total_evaluation_data_number = 0
	total_correct_number = 0
	log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a')
	dashed_line = '-' * 80
	print(dashed_line)
	log.write(dashed_line + '\n')

	selectivity_eval_results = []
	imageData = []
	targetText = "all"
	middleMaskThreshold = 5
	testImgCount = 0
	imgResultDir = str(opt.Transformation) + "-" + str(opt.FeatureExtraction) + "-" + str(opt.SequenceModeling) + "-" + str(opt.Prediction) + "-" + str(opt.scorer)

	# define a perturbation function for the input (used for calculating infidelity)
	def perturb_fn(modelInputs):
	noise = torch.tensor(np.random.normal(0, 0.003, modelInputs.shape)).float()
	noise = noise.to(device)
	return noise, modelInputs - noise

	if opt.blackbg:
	shapImgLs = np.zeros(shape=(1, 1, 32, 100)).astype(np.float32)
	trainList = np.array(shapImgLs)
	background = torch.from_numpy(trainList).to(device)
	if imgResultDir != "":
	if not os.path.exists(imgResultDir):
	os.makedirs(imgResultDir)
	for eval_data in eval_data_list:
	eval_data_path = os.path.join(opt.eval_data, eval_data)
	AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
	eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig)
	evaluation_loader = torch.utils.data.DataLoader(
	eval_data, batch_size=1,
	shuffle=False,
	num_workers=int(opt.workers),
	collate_fn=AlignCollate_evaluation, pin_memory=True)
	# image_tensors, labels = next(iter(evaluation_loader)) ### Iterate one batch only
	for i, (orig_img_tensors, labels) in enumerate(evaluation_loader):
	# img_rgb *= 255.0
	# img_rgb = img_rgb.astype('int')
	# print("img_rgb max: ", img_rgb.max()) ### 255
	# img_rgb = np.asarray(orig_img_tensors)
	# segmentations = segmentation_fn(img_rgb)
	# print("segmentations shape: ", segmentations.shape) # (224, 224)
	# print("segmentations min: ", segmentations.min()) 0
	# print("Unique: ", len(np.unique(segmentations))) # (70)
	# print("target: ", target) tensor([[ 0, 29, 26, 25, 12
	results_dict = {}
	pklFilename = custom_segm_dataroot + "{}.pkl".format(i)
	with open(pklFilename, 'rb') as f:
	pklData = pickle.load(f)
	segmDataNP = pklData["segdata"]
	# print("segmDataNP unique: ", len(np.unique(segmDataNP)))
	assert pklData["label"] == labels[0]
	segmTensor = torch.from_numpy(segmDataNP).unsqueeze(0).unsqueeze(0)
	# print("segmTensor min: ", segmTensor.min()) # 0 starting segmentation
	segmTensor = segmTensor.to(device)
	# print("segmTensor shape: ", segmTensor.shape)
	# img1 = np.asarray(imgPIL.convert('L'))
	# sys.exit()
	# img1 = img1 / 255.0
	# img1 = torch.from_numpy(img1).unsqueeze(0).unsqueeze(0).type(torch.FloatTensor).to(device)
	img1 = orig_img_tensors.to(device)
	img1.requires_grad = True
	bgImg = torch.zeros(img1.shape).to(device)
	# preds = model(img1, seqlen=converter.batch_max_length)
	target = converter.encode(labels)
	target = target[0][:, 1:]
	charOffset = 0
	input = img1
	origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224)
	origImgNP = gray2rgb(origImgNP)

	# preds = model(input)
	# preds_prob = F.softmax(preds, dim=2)
	# preds_max_prob, preds_max_idx = preds_prob.max(dim=2)
	# print("preds_max_idx: ", preds_max_idx) tensor([[14, 26, 25, 12

	### Captum test
	collectedAttributions = []
	for charIdx in range(0, len(labels)):
	scoring_singlechar.setSingleCharOutput(charIdx + charOffset)
	gtClassNum = target[0][charIdx + charOffset]

	### Shapley Value Sampling
	svs = ShapleyValueSampling(super_pixel_model_singlechar)
	# attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate
	attributions = svs.attribute(input, target=gtClassNum, feature_mask=segmTensor)
	collectedAttributions.append(attributions)
	aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
	rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_shapley_l.pkl', aveAttributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, aveAttributions, segmDataNP, modelCopy, converter, labels, scoring_singlechar)
	results_dict["shapley_local_acc"] = n_correct
	results_dict["shapley_local_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model_singlechar, perturb_fn, img1, aveAttributions).detach().cpu().numpy())
	results_dict["shapley_local_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["shapley_local_sens"] = sens

	### Shapley Value Sampling
	svs = ShapleyValueSampling(super_pixel_model)
	# attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate
	attributions = svs.attribute(input, target=0, feature_mask=segmTensor)
	collectedAttributions.append(attributions)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_shapley.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_shapley.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["shapley_acc"] = n_correct
	results_dict["shapley_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["shapley_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["shapley_sens"] = sens

	### Global + Local context
	aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
	rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_shapley_gl.pkl', aveAttributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, aveAttributions, segmDataNP, modelCopy, converter, labels, scoring_singlechar)
	results_dict["shapley_global_local_acc"] = n_correct
	results_dict["shapley_global_local_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model_singlechar, perturb_fn, img1, aveAttributions).detach().cpu().numpy())
	results_dict["shapley_global_local_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["shapley_global_local_sens"] = sens


	# Baselines
	### Integrated Gradients
	ig = IntegratedGradients(super_pixel_model)
	attributions = ig.attribute(input, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_intgrad.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_intgrad.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["intgrad_acc"] = n_correct
	results_dict["intgrad_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["intgrad_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(ig.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["intgrad_sens"] = sens

	### Gradient SHAP using zero-background
	gs = GradientShap(super_pixel_model)
	# We define a distribution of baselines and draw `n_samples` from that
	# distribution in order to estimate the expectations of gradients across all baselines
	baseline_dist = torch.zeros((1, 1, imgHeight, imgWidth))
	baseline_dist = baseline_dist.to(device)
	attributions = gs.attribute(input, baselines=baseline_dist, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_gradshap.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_gradshap.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["gradshap_acc"] = n_correct
	results_dict["gradshap_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["gradshap_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(gs.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["gradshap_sens"] = sens

	### DeepLift using zero-background
	dl = DeepLift(super_pixel_model)
	attributions = dl.attribute(input, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_deeplift.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_deeplift.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["deeplift_acc"] = n_correct
	results_dict["deeplift_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["deeplift_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(dl.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["deeplift_sens"] = sens

	### Saliency
	saliency = Saliency(super_pixel_model)
	attributions = saliency.attribute(input, target=0) ### target=class0
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_saliency.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_saliency.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["saliency_acc"] = n_correct
	results_dict["saliency_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["saliency_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(saliency.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["saliency_sens"] = sens

	### InputXGradient
	input_x_gradient = InputXGradient(super_pixel_model)
	attributions = input_x_gradient.attribute(input, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_inpxgrad.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_inpxgrad.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["inpxgrad_acc"] = n_correct
	results_dict["inpxgrad_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["inpxgrad_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(input_x_gradient.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["inpxgrad_sens"] = sens

	## GuidedBackprop
	gbp = GuidedBackprop(super_pixel_model)
	attributions = gbp.attribute(input, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_guidedbp.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_guidedbp.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["guidedbp_acc"] = n_correct
	results_dict["guidedbp_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["guidedbp_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(gbp.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["guidedbp_sens"] = sens
	#
	# ## Deconvolution
	deconv = Deconvolution(super_pixel_model)
	attributions = deconv.attribute(input, target=0)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_deconv.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_deconv.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["deconv_acc"] = n_correct
	results_dict["deconv_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["deconv_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(deconv.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["deconv_sens"] = sens

	### Feature ablator
	ablator = FeatureAblation(super_pixel_model)
	attributions = ablator.attribute(input, target=0, feature_mask=segmTensor)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_featablt.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_featablt.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["featablt_acc"] = n_correct
	results_dict["featablt_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["featablt_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(ablator.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["featablt_sens"] = sens

	## LIME
	interpretable_model = SkLearnRidge(alpha=1, fit_intercept=True) ### This is the default used by LIME
	lime = Lime(super_pixel_model, interpretable_model=interpretable_model)
	attributions = lime.attribute(input, target=0, feature_mask=segmTensor)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_lime.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_lime.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["lime_acc"] = n_correct
	results_dict["lime_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["lime_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(lime.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["lime_sens"] = sens

	### KernelSHAP
	ks = KernelShap(super_pixel_model)
	attributions = ks.attribute(input, target=0, feature_mask=segmTensor)
	rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
	rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
	rankedAttr = gray2rgb(rankedAttr)
	mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map')
	mplotfig.savefig(outputDir + '{}_kernelshap.png'.format(i))
	mplotfig.clear()
	plt.close(mplotfig)
	saveAttrData(attrOutputDir + f'{i}_kernelshap.pkl', attributions, segmDataNP, origImgNP)
	if acquireSelectivity:
	n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring)
	results_dict["kernelshap_acc"] = n_correct
	results_dict["kernelshap_conf"] = confidenceList
	if acquireInfidelity:
	infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy())
	results_dict["kernelshap_infid"] = infid
	if acquireSensitivity:
	sens = float(sensitivity_max(ks.attribute, img1, target=0).detach().cpu().numpy())
	results_dict["kernelshap_sens"] = sens

	selectivity_eval_results.append(results_dict)

	with open(outputSelectivityPkl, 'wb') as f:
	pickle.dump(selectivity_eval_results, f)

	testImgCount += 1
	print("testImgCount: ", testImgCount)

	def outputOrigImagesOnly(opt):
	datasetName = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']
	opt.outputOrigDir = "./datasetOrigImgs/{}/".format(datasetName)
	opt.output_orig = True
	opt.corruption_num = 0
	opt.apply_corruptions = False
	opt.min_imgnum = 0
	opt.max_imgnum = 1000

	target_output_orig = opt.outputOrigDir
	if not os.path.exists(target_output_orig):
	os.makedirs(target_output_orig)

	""" model configuration """
	if 'CTC' in opt.Prediction:
	converter = CTCLabelConverter(opt.character)
	else:
	converter = AttnLabelConverter(opt.character)
	opt.num_class = len(converter.character)

	if opt.rgb:
	opt.input_channel = 3
	model_obj = Model(opt, device)
	print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
	opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
	opt.SequenceModeling, opt.Prediction)
	model = torch.nn.DataParallel(model_obj).to(device)

	# load model
	print('loading pretrained model from %s' % opt.saved_model)
	model.load_state_dict(torch.load(opt.saved_model, map_location=device))
	opt.exp_name = '_'.join(opt.saved_model.split('/')[1:])
	scoring = STRScore(opt=opt, converter=converter, device=device)
	###

	super_pixel_model = torch.nn.Sequential(
	model,
	scoring
	)
	model.train()
	scoring.train()
	super_pixel_model.train()
	# print(model)

	""" keep evaluation model and result logs """
	os.makedirs(f'./result/{opt.exp_name}', exist_ok=True)
	os.system(f'cp {opt.saved_model} ./result/{opt.exp_name}/')

	""" setup loss """
	if 'CTC' in opt.Prediction:
	criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
	else:
	criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0

	"""Output shap values"""
	""" evaluation with 10 benchmark evaluation datasets """
	# The evaluation datasets, dataset order is same with Table 1 in our paper.
	# eval_data_list = ['IIIT5k_3000', 'IC03_860', 'IC03_867', 'IC15_1811']
	# eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857',
	# 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
	# eval_data_list = ['IIIT5k_3000']
	eval_data_list = [datasetName]
	# # To easily compute the total accuracy of our paper.
	# eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_867',
	# 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']

	list_accuracy = []
	total_forward_time = 0
	total_evaluation_data_number = 0
	total_correct_number = 0
	log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a')
	dashed_line = '-' * 80
	print(dashed_line)
	log.write(dashed_line + '\n')

	selectivity_eval_results = []
	imageData = []
	targetText = "all"
	middleMaskThreshold = 5
	testImgCount = 0
	imgResultDir = str(opt.Transformation) + "-" + str(opt.FeatureExtraction) + "-" + str(opt.SequenceModeling) + "-" + str(opt.Prediction) + "-" + str(opt.scorer)

	if opt.blackbg:
	shapImgLs = np.zeros(shape=(1, 1, 32, 100)).astype(np.float32)
	trainList = np.array(shapImgLs)
	background = torch.from_numpy(trainList).to(device)
	if imgResultDir != "":
	if not os.path.exists(imgResultDir):
	os.makedirs(imgResultDir)
	for eval_data in eval_data_list:
	eval_data_path = os.path.join(opt.eval_data, eval_data)
	AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
	eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig)
	evaluation_loader = torch.utils.data.DataLoader(
	eval_data, batch_size=1,
	shuffle=False,
	num_workers=int(opt.workers),
	collate_fn=AlignCollate_evaluation, pin_memory=True)
	# image_tensors, labels = next(iter(evaluation_loader)) ### Iterate one batch only
	for i, (orig_img_tensors, labels) in enumerate(evaluation_loader):
	testImgCount += 1
	print("testImgCount: ", testImgCount)

	### Use to check if the model predicted the image or not. Output a pickle file with the image index.
	def modelDatasetPredOnly(opt):
	### targetDataset - one dataset only, CUTE80 has 288 samples
	targetDataset = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']
	outputSelectivityPkl = "metrics_predictonly_results_{}.pkl".format(targetDataset)
	start_time = time.time()

	""" model configuration """
	if 'CTC' in opt.Prediction:
	converter = CTCLabelConverter(opt.character)
	else:
	converter = AttnLabelConverter(opt.character)
	opt.num_class = len(converter.character)

	if opt.rgb:
	opt.input_channel = 3
	model_obj = Model(opt, device)
	print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel,
	opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction,
	opt.SequenceModeling, opt.Prediction)
	model = torch.nn.DataParallel(model_obj).to(device)

	# load model
	print('loading pretrained model from %s' % opt.saved_model)
	model.load_state_dict(torch.load(opt.saved_model, map_location=device))
	opt.exp_name = '_'.join(opt.saved_model.split('/')[1:])
	scoring = STRScore(opt=opt, converter=converter, device=device)
	###

	super_pixel_model = torch.nn.Sequential(
	model,
	scoring
	)
	model.train()
	scoring.train()
	super_pixel_model.train()

	if opt.blackbg:
	shapImgLs = np.zeros(shape=(1, 1, 224, 224)).astype(np.float32)
	trainList = np.array(shapImgLs)
	background = torch.from_numpy(trainList).to(device)

	opt.eval = True
	eval_data_list = [targetDataset]

	testImgCount = 0
	list_accuracy = []
	total_forward_time = 0
	total_evaluation_data_number = 0
	total_correct_number = 0
	log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a')
	dashed_line = '-' * 80
	print(dashed_line)
	log.write(dashed_line + '\n')
	target_output_orig = opt.outputOrigDir
	predOutput = []
	for eval_data in eval_data_list:
	eval_data_path = os.path.join(opt.eval_data, eval_data)
	AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
	eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig)
	evaluation_loader = torch.utils.data.DataLoader(
	eval_data, batch_size=1,
	shuffle=False,
	num_workers=int(opt.workers),
	collate_fn=AlignCollate_evaluation, pin_memory=True)
	testImgCount = 0
	for i, (orig_img_tensors, labels) in enumerate(evaluation_loader):
	image = orig_img_tensors.to(device)
	batch_size = 1
	length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
	text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)
	text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length)
	if 'CTC' in opt.Prediction:
	preds = model(image, text_for_pred)

	confScore = scoring(preds)
	confScore = confScore.detach().cpu().numpy()

	# Calculate evaluation loss for CTC deocder.
	preds_size = torch.IntTensor([preds.size(1)] * batch_size)

	# Select max probabilty (greedy decoding) then decode index to character
	if opt.baiduCTC:
	_, preds_index = preds.max(2)
	preds_index = preds_index.view(-1)
	else:
	_, preds_index = preds.max(2)
	preds_str = converter.decode(preds_index.data, preds_size.data)[0]
	else:
	preds = model(image, text_for_pred, is_train=False)

	confScore = scoring(preds)
	confScore = confScore.detach().cpu().numpy()

	preds = preds[:, :text_for_loss.shape[1] - 1, :]
	target = text_for_loss[:, 1:] # without [GO] Symbol
	# cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1))

	# select max probabilty (greedy decoding) then decode index to character
	_, preds_index = preds.max(2)
	preds_str = converter.decode(preds_index, length_for_pred)

	### Remove all chars after '[s]'
	preds_str = preds_str[0]
	preds_str = preds_str[:preds_str.find('[s]')]
	# print("preds_str: ", preds_str) # lowercased prediction
	# print("labels: ", labels[0]) # gt already in lowercased
	if preds_str==labels[0]: predOutput.append(1)
	else: predOutput.append(0)

	with open(outputSelectivityPkl, 'wb') as f:
	pickle.dump(predOutput, f)

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--eval_data', required=True, help='path to evaluation dataset')
	parser.add_argument('--benchmark_all_eval', action='store_true', help='evaluate 10 benchmark evaluation datasets')
	parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
	parser.add_argument('--batch_size', type=int, default=192, help='input batch size')
	parser.add_argument('--saved_model', required=True, help="path to saved_model to evaluation")
	""" Data processing """
	parser.add_argument('--batch_max_length', type=int, default=25, help='maximum-label-length')
	parser.add_argument('--imgH', type=int, default=32, help='the height of the input image')
	parser.add_argument('--imgW', type=int, default=100, help='the width of the input image')
	parser.add_argument('--superHeight', type=int, default=5, help='the height of the superpixel')
	parser.add_argument('--superWidth', type=int, default=2, help='the width of the superpixel')
	parser.add_argument('--min_imgnum', type=int, default=0, help='set this to skip for loop index of specific image number')
	parser.add_argument('--max_imgnum', type=int, default=2, help='set this to skip for loop index of specific image number')
	parser.add_argument('--severity', type=int, default=1, help='severity level if apply corruptions')
	parser.add_argument('--scorer', type=str, default='cumprod', help='See STRScore: cumprod \| mean')
	parser.add_argument('--corruption_num', type=int, default=0, help='corruption to apply')
	parser.add_argument('--confidence_mode', type=int, default=0, help='0-sum of argmax; 1-edit distance')
	parser.add_argument('--outputOrigDir', type=str, default="output_orig/", help='output directory to save original \
	images. This will be automatically created. Needs --output_orig too.')
	parser.add_argument('--output_orig', action='store_true', help='if true, output first original rgb image of each batch')
	parser.add_argument('--compare_corrupt', action='store_true', help='set to true to output results across corruptions')
	parser.add_argument('--is_shap', action='store_true', help='no need to call in command line')
	parser.add_argument('--blackbg', action='store_true', help='if True, background color for covering features will be black(0)')
	parser.add_argument('--rgb', action='store_true', help='use rgb input')
	parser.add_argument('--character', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz', help='character label')
	parser.add_argument('--sensitive', action='store_true', help='for sensitive character mode')
	parser.add_argument('--PAD', action='store_true', help='whether to keep ratio then pad for image resize')
	parser.add_argument('--data_filtering_off', action='store_true', help='for data_filtering_off mode')
	parser.add_argument('--apply_corruptions', action='store_true', help='apply corruptions to images')
	parser.add_argument('--output_feat_maps', action='store_true', help='toggle this to output images of featmaps')
	parser.add_argument('--baiduCTC', action='store_true', help='for data_filtering_off mode')
	""" Model Architecture """
	parser.add_argument('--Transformation', type=str, required=True, help='Transformation stage. None\|TPS')
	parser.add_argument('--FeatureExtraction', type=str, required=True, help='FeatureExtraction stage. VGG\|RCNN\|ResNet')
	parser.add_argument('--SequenceModeling', type=str, required=True, help='SequenceModeling stage. None\|BiLSTM')
	parser.add_argument('--Prediction', type=str, required=True, help='Prediction stage. CTC\|Attn')
	parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN')
	parser.add_argument('--input_channel', type=int, default=1, help='the number of input channel of Feature extractor')
	parser.add_argument('--output_channel', type=int, default=512,
	help='the number of output channel of Feature extractor')
	parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state')

	opt = parser.parse_args()

	""" vocab / character number configuration """
	if opt.sensitive:
	opt.character = string.printable[:-6] # same with ASTER setting (use 94 char).

	cudnn.benchmark = True
	cudnn.deterministic = True
	opt.num_gpu = torch.cuda.device_count()

	# acquire_average_auc(opt)
	main(opt)
	# outputOrigImagesOnly(opt)