Spaces:
Build error
Build error
| import os | |
| import time | |
| import string | |
| import argparse | |
| import re | |
| import sys | |
| import random | |
| import pickle | |
| import torch | |
| import torch.backends.cudnn as cudnn | |
| import torch.utils.data | |
| import torch.nn.functional as F | |
| from torch.autograd import Variable | |
| import numpy as np | |
| from skimage.color import gray2rgb | |
| from nltk.metrics.distance import edit_distance | |
| import cv2 | |
| from utils import CTCLabelConverter, AttnLabelConverter, Averager | |
| from dataset_trba import hierarchical_dataset, AlignCollate | |
| from model_trba import Model, SuperPixler, CastNumpy, STRScore | |
| # import hiddenlayer as hl | |
| from lime import lime_image | |
| from lime.wrappers.scikit_image import SegmentationAlgorithm | |
| import matplotlib.pyplot as plt | |
| import random | |
| from captum._utils.models.linear_model import SkLearnLinearModel, SkLearnRidge | |
| import statistics | |
| import settings | |
| import sys | |
| import copy | |
| from captum_test import acquire_average_auc, saveAttrData | |
| from captum_improve_vitstr import rankedAttributionsBySegm | |
| from matplotlib import pyplot as plt | |
| from captum.attr._utils.visualization import visualize_image_attr | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| from captum.attr import ( | |
| GradientShap, | |
| DeepLift, | |
| DeepLiftShap, | |
| IntegratedGradients, | |
| LayerConductance, | |
| NeuronConductance, | |
| NoiseTunnel, | |
| Saliency, | |
| InputXGradient, | |
| GuidedBackprop, | |
| Deconvolution, | |
| GuidedGradCam, | |
| FeatureAblation, | |
| ShapleyValueSampling, | |
| Lime, | |
| KernelShap | |
| ) | |
| from captum.metrics import ( | |
| infidelity, | |
| sensitivity_max | |
| ) | |
| def getPredAndConf(opt, model, scoring, image, converter, labels): | |
| batch_size = image.size(0) | |
| length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) | |
| text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) | |
| text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length) | |
| if 'CTC' in opt.Prediction: | |
| preds = model(image, text_for_pred) | |
| confScore = scoring(preds) | |
| confScore = confScore.detach().cpu().numpy() | |
| # Calculate evaluation loss for CTC deocder. | |
| preds_size = torch.IntTensor([preds.size(1)] * batch_size) | |
| # Select max probabilty (greedy decoding) then decode index to character | |
| if opt.baiduCTC: | |
| _, preds_index = preds.max(2) | |
| preds_index = preds_index.view(-1) | |
| else: | |
| _, preds_index = preds.max(2) | |
| preds_str = converter.decode(preds_index.data, preds_size.data)[0] | |
| else: | |
| preds = model(image, text_for_pred, is_train=False) | |
| confScore = scoring(preds) | |
| confScore = confScore.detach().cpu().numpy() | |
| preds = preds[:, :text_for_loss.shape[1] - 1, :] | |
| target = text_for_loss[:, 1:] # without [GO] Symbol | |
| # cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1)) | |
| # select max probabilty (greedy decoding) then decode index to character | |
| _, preds_index = preds.max(2) | |
| preds_str = converter.decode(preds_index, length_for_pred) | |
| ### Remove all chars after '[s]' | |
| preds_str = preds_str[0] | |
| preds_str = preds_str[:preds_str.find('[s]')] | |
| # pred = pred[:pred_EOS] | |
| return preds_str, confScore | |
| ### Output and save segmentations only for one dataset only | |
| def outputSegmOnly(opt): | |
| ### targetDataset - one dataset only, SVTP-645, CUTE80-288images | |
| targetDataset = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| targetHeight = 32 | |
| targetWidth = 100 | |
| segmRootDir = "/home/uclpc1/Documents/STR/datasets/segmen"\ | |
| "tations/{}X{}/{}/".format(targetHeight, targetWidth, targetDataset) | |
| if not os.path.exists(segmRootDir): | |
| os.makedirs(segmRootDir) | |
| opt.eval = True | |
| ### Only IIIT5k_3000 | |
| eval_data_list = [targetDataset] | |
| target_output_orig = opt.outputOrigDir | |
| ### Taken from LIME | |
| segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4, | |
| max_dist=200, ratio=0.2, | |
| random_seed=random.randint(0, 1000)) | |
| for eval_data in eval_data_list: | |
| eval_data_path = os.path.join(opt.eval_data, eval_data) | |
| AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) | |
| eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig) | |
| evaluation_loader = torch.utils.data.DataLoader( | |
| eval_data, batch_size=1, | |
| shuffle=False, | |
| num_workers=int(opt.workers), | |
| collate_fn=AlignCollate_evaluation, pin_memory=True) | |
| for i, (image_tensors, labels) in enumerate(evaluation_loader): | |
| image_tensors = ((image_tensors + 1.0) / 2.0) * 255.0 | |
| imgDataDict = {} | |
| img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only | |
| if img_numpy.shape[0] == 1: | |
| img_numpy = gray2rgb(img_numpy[0]) | |
| # print("img_numpy shape: ", img_numpy.shape) # (32,100,3) | |
| segmOutput = segmentation_fn(img_numpy) | |
| # print("segmOutput unique: ", len(np.unique(segmOutput))) | |
| imgDataDict['segdata'] = segmOutput | |
| imgDataDict['label'] = labels[0] | |
| outputPickleFile = segmRootDir + "{}.pkl".format(i) | |
| with open(outputPickleFile, 'wb') as f: | |
| pickle.dump(imgDataDict, f) | |
| ### Returns the mean for each segmentation having shape as the same as the input | |
| ### This function can only one attribution image at a time | |
| def averageSegmentsOut(attr, segments): | |
| averagedInput = torch.clone(attr) | |
| sortedDict = {} | |
| for x in np.unique(segments): | |
| segmentMean = torch.mean(attr[segments == x][:]) | |
| sortedDict[x] = float(segmentMean.detach().cpu().numpy()) | |
| averagedInput[segments == x] = segmentMean | |
| return averagedInput, sortedDict | |
| def acquireSelectivityHit(origImg, attributions, segmentations, model, converter, labels, scoring): | |
| # print("segmentations unique len: ", np.unique(segmentations)) | |
| aveSegmentations, sortedDict = averageSegmentsOut(attributions[0,0], segmentations) | |
| sortedKeys = [k for k, v in sorted(sortedDict.items(), key=lambda item: item[1])] | |
| sortedKeys = sortedKeys[::-1] ### A list that should contain largest to smallest score | |
| # print("sortedDict: ", sortedDict) # {0: -5.51e-06, 1: -1.469e-05, 2: -3.06e-05,...} | |
| # print("aveSegmentations unique len: ", np.unique(aveSegmentations)) | |
| # print("aveSegmentations device: ", aveSegmentations.device) # cuda:0 | |
| # print("aveSegmentations shape: ", aveSegmentations.shape) # (224,224) | |
| # print("aveSegmentations: ", aveSegmentations) | |
| n_correct = [] | |
| confidenceList = [] # First index is one feature removed, second index two features removed, and so on... | |
| clonedImg = torch.clone(origImg) | |
| gt = str(labels[0]) | |
| for totalSegToHide in range(0, len(sortedKeys)): | |
| ### Acquire LIME prediction result | |
| currentSegmentToHide = sortedKeys[totalSegToHide] | |
| clonedImg[0,0][segmentations == currentSegmentToHide] = 0.0 | |
| pred, confScore = getPredAndConf(opt, model, scoring, clonedImg, converter, np.array([gt])) | |
| # To evaluate 'case sensitive model' with alphanumeric and case insensitve setting. | |
| if opt.sensitive and opt.data_filtering_off: | |
| pred = pred.lower() | |
| gt = gt.lower() | |
| alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz' | |
| out_of_alphanumeric_case_insensitve = f'[^{alphanumeric_case_insensitve}]' | |
| pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred) | |
| gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt) | |
| if pred == gt: | |
| n_correct.append(1) | |
| else: | |
| n_correct.append(0) | |
| confScore = confScore[0][0]*100 | |
| confidenceList.append(confScore) | |
| return n_correct, confidenceList | |
| def main(opt): | |
| # 'IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80' | |
| datasetName = settings.TARGET_DATASET | |
| custom_segm_dataroot = "{}/{}X{}/{}/".format(settings.SEGM_DIR, opt.imgH, opt.imgW, datasetName) | |
| outputSelectivityPkl = "strexp_ave_{}_{}.pkl".format(settings.MODEL, datasetName) | |
| outputDir = "./attributionImgs/{}/{}/".format(settings.MODEL, datasetName) | |
| attrOutputDir = "./attributionData/{}/{}/".format(settings.MODEL, datasetName) | |
| acquireSelectivity = True | |
| acquireInfidelity = False | |
| acquireSensitivity = False ### GPU error | |
| imgHeight = 32 | |
| imgWidth = 100 | |
| if not os.path.exists(outputDir): | |
| os.makedirs(outputDir) | |
| if not os.path.exists(attrOutputDir): | |
| os.makedirs(attrOutputDir) | |
| """ model configuration """ | |
| if 'CTC' in opt.Prediction: | |
| converter = CTCLabelConverter(opt.character) | |
| else: | |
| converter = AttnLabelConverter(opt.character) | |
| opt.num_class = len(converter.character) | |
| if opt.rgb: | |
| opt.input_channel = 3 | |
| model_obj = Model(opt, device) | |
| print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, | |
| opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, | |
| opt.SequenceModeling, opt.Prediction) | |
| model = torch.nn.DataParallel(model_obj).to(device) | |
| # load model | |
| print('loading pretrained model from %s' % opt.saved_model) | |
| model.load_state_dict(torch.load(opt.saved_model, map_location=device)) | |
| opt.exp_name = '_'.join(opt.saved_model.split('/')[1:]) | |
| modelCopy = copy.deepcopy(model) | |
| scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True) | |
| super_pixel_model_singlechar = torch.nn.Sequential( | |
| # super_pixler, | |
| # numpy2torch_converter, | |
| modelCopy, | |
| scoring_singlechar | |
| ).to(device) | |
| modelCopy.train() | |
| scoring_singlechar.train() | |
| super_pixel_model_singlechar.train() | |
| scoring = STRScore(opt=opt, converter=converter, device=device) | |
| super_pixel_model = torch.nn.Sequential( | |
| model, | |
| scoring | |
| ) | |
| model.train() | |
| scoring.train() | |
| super_pixel_model.train() | |
| """ keep evaluation model and result logs """ | |
| os.makedirs(f'./result/{opt.exp_name}', exist_ok=True) | |
| os.system(f'cp {opt.saved_model} ./result/{opt.exp_name}/') | |
| """ setup loss """ | |
| if 'CTC' in opt.Prediction: | |
| criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) | |
| else: | |
| criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0 | |
| """Output shap values""" | |
| """ evaluation with 10 benchmark evaluation datasets """ | |
| # The evaluation datasets, dataset order is same with Table 1 in our paper. | |
| # eval_data_list = ['IIIT5k_3000', 'IC03_860', 'IC03_867', 'IC15_1811'] | |
| target_output_orig = opt.outputOrigDir | |
| # eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', | |
| # 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| # eval_data_list = ['IIIT5k_3000'] | |
| eval_data_list = [datasetName] | |
| # # To easily compute the total accuracy of our paper. | |
| # eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_867', | |
| # 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| list_accuracy = [] | |
| total_forward_time = 0 | |
| total_evaluation_data_number = 0 | |
| total_correct_number = 0 | |
| log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a') | |
| dashed_line = '-' * 80 | |
| print(dashed_line) | |
| log.write(dashed_line + '\n') | |
| selectivity_eval_results = [] | |
| imageData = [] | |
| targetText = "all" | |
| middleMaskThreshold = 5 | |
| testImgCount = 0 | |
| imgResultDir = str(opt.Transformation) + "-" + str(opt.FeatureExtraction) + "-" + str(opt.SequenceModeling) + "-" + str(opt.Prediction) + "-" + str(opt.scorer) | |
| # define a perturbation function for the input (used for calculating infidelity) | |
| def perturb_fn(modelInputs): | |
| noise = torch.tensor(np.random.normal(0, 0.003, modelInputs.shape)).float() | |
| noise = noise.to(device) | |
| return noise, modelInputs - noise | |
| if opt.blackbg: | |
| shapImgLs = np.zeros(shape=(1, 1, 32, 100)).astype(np.float32) | |
| trainList = np.array(shapImgLs) | |
| background = torch.from_numpy(trainList).to(device) | |
| if imgResultDir != "": | |
| if not os.path.exists(imgResultDir): | |
| os.makedirs(imgResultDir) | |
| for eval_data in eval_data_list: | |
| eval_data_path = os.path.join(opt.eval_data, eval_data) | |
| AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) | |
| eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig) | |
| evaluation_loader = torch.utils.data.DataLoader( | |
| eval_data, batch_size=1, | |
| shuffle=False, | |
| num_workers=int(opt.workers), | |
| collate_fn=AlignCollate_evaluation, pin_memory=True) | |
| # image_tensors, labels = next(iter(evaluation_loader)) ### Iterate one batch only | |
| for i, (orig_img_tensors, labels) in enumerate(evaluation_loader): | |
| # img_rgb *= 255.0 | |
| # img_rgb = img_rgb.astype('int') | |
| # print("img_rgb max: ", img_rgb.max()) ### 255 | |
| # img_rgb = np.asarray(orig_img_tensors) | |
| # segmentations = segmentation_fn(img_rgb) | |
| # print("segmentations shape: ", segmentations.shape) # (224, 224) | |
| # print("segmentations min: ", segmentations.min()) 0 | |
| # print("Unique: ", len(np.unique(segmentations))) # (70) | |
| # print("target: ", target) tensor([[ 0, 29, 26, 25, 12 | |
| results_dict = {} | |
| pklFilename = custom_segm_dataroot + "{}.pkl".format(i) | |
| with open(pklFilename, 'rb') as f: | |
| pklData = pickle.load(f) | |
| segmDataNP = pklData["segdata"] | |
| # print("segmDataNP unique: ", len(np.unique(segmDataNP))) | |
| assert pklData["label"] == labels[0] | |
| segmTensor = torch.from_numpy(segmDataNP).unsqueeze(0).unsqueeze(0) | |
| # print("segmTensor min: ", segmTensor.min()) # 0 starting segmentation | |
| segmTensor = segmTensor.to(device) | |
| # print("segmTensor shape: ", segmTensor.shape) | |
| # img1 = np.asarray(imgPIL.convert('L')) | |
| # sys.exit() | |
| # img1 = img1 / 255.0 | |
| # img1 = torch.from_numpy(img1).unsqueeze(0).unsqueeze(0).type(torch.FloatTensor).to(device) | |
| img1 = orig_img_tensors.to(device) | |
| img1.requires_grad = True | |
| bgImg = torch.zeros(img1.shape).to(device) | |
| # preds = model(img1, seqlen=converter.batch_max_length) | |
| target = converter.encode(labels) | |
| target = target[0][:, 1:] | |
| charOffset = 0 | |
| input = img1 | |
| origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224) | |
| origImgNP = gray2rgb(origImgNP) | |
| # preds = model(input) | |
| # preds_prob = F.softmax(preds, dim=2) | |
| # preds_max_prob, preds_max_idx = preds_prob.max(dim=2) | |
| # print("preds_max_idx: ", preds_max_idx) tensor([[14, 26, 25, 12 | |
| ### Captum test | |
| collectedAttributions = [] | |
| for charIdx in range(0, len(labels)): | |
| scoring_singlechar.setSingleCharOutput(charIdx + charOffset) | |
| gtClassNum = target[0][charIdx + charOffset] | |
| ### Shapley Value Sampling | |
| svs = ShapleyValueSampling(super_pixel_model_singlechar) | |
| # attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate | |
| attributions = svs.attribute(input, target=gtClassNum, feature_mask=segmTensor) | |
| collectedAttributions.append(attributions) | |
| aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0) | |
| rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_shapley_l.pkl', aveAttributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, aveAttributions, segmDataNP, modelCopy, converter, labels, scoring_singlechar) | |
| results_dict["shapley_local_acc"] = n_correct | |
| results_dict["shapley_local_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model_singlechar, perturb_fn, img1, aveAttributions).detach().cpu().numpy()) | |
| results_dict["shapley_local_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["shapley_local_sens"] = sens | |
| ### Shapley Value Sampling | |
| svs = ShapleyValueSampling(super_pixel_model) | |
| # attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate | |
| attributions = svs.attribute(input, target=0, feature_mask=segmTensor) | |
| collectedAttributions.append(attributions) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_shapley.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_shapley.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["shapley_acc"] = n_correct | |
| results_dict["shapley_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["shapley_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["shapley_sens"] = sens | |
| ### Global + Local context | |
| aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0) | |
| rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_shapley_gl.pkl', aveAttributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, aveAttributions, segmDataNP, modelCopy, converter, labels, scoring_singlechar) | |
| results_dict["shapley_global_local_acc"] = n_correct | |
| results_dict["shapley_global_local_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model_singlechar, perturb_fn, img1, aveAttributions).detach().cpu().numpy()) | |
| results_dict["shapley_global_local_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(svs.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["shapley_global_local_sens"] = sens | |
| # Baselines | |
| ### Integrated Gradients | |
| ig = IntegratedGradients(super_pixel_model) | |
| attributions = ig.attribute(input, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_intgrad.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_intgrad.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["intgrad_acc"] = n_correct | |
| results_dict["intgrad_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["intgrad_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(ig.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["intgrad_sens"] = sens | |
| ### Gradient SHAP using zero-background | |
| gs = GradientShap(super_pixel_model) | |
| # We define a distribution of baselines and draw `n_samples` from that | |
| # distribution in order to estimate the expectations of gradients across all baselines | |
| baseline_dist = torch.zeros((1, 1, imgHeight, imgWidth)) | |
| baseline_dist = baseline_dist.to(device) | |
| attributions = gs.attribute(input, baselines=baseline_dist, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_gradshap.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_gradshap.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["gradshap_acc"] = n_correct | |
| results_dict["gradshap_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["gradshap_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(gs.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["gradshap_sens"] = sens | |
| ### DeepLift using zero-background | |
| dl = DeepLift(super_pixel_model) | |
| attributions = dl.attribute(input, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_deeplift.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_deeplift.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["deeplift_acc"] = n_correct | |
| results_dict["deeplift_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["deeplift_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(dl.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["deeplift_sens"] = sens | |
| ### Saliency | |
| saliency = Saliency(super_pixel_model) | |
| attributions = saliency.attribute(input, target=0) ### target=class0 | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_saliency.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_saliency.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["saliency_acc"] = n_correct | |
| results_dict["saliency_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["saliency_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(saliency.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["saliency_sens"] = sens | |
| ### InputXGradient | |
| input_x_gradient = InputXGradient(super_pixel_model) | |
| attributions = input_x_gradient.attribute(input, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_inpxgrad.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_inpxgrad.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["inpxgrad_acc"] = n_correct | |
| results_dict["inpxgrad_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["inpxgrad_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(input_x_gradient.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["inpxgrad_sens"] = sens | |
| ## GuidedBackprop | |
| gbp = GuidedBackprop(super_pixel_model) | |
| attributions = gbp.attribute(input, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_guidedbp.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_guidedbp.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["guidedbp_acc"] = n_correct | |
| results_dict["guidedbp_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["guidedbp_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(gbp.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["guidedbp_sens"] = sens | |
| # | |
| # ## Deconvolution | |
| deconv = Deconvolution(super_pixel_model) | |
| attributions = deconv.attribute(input, target=0) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_deconv.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_deconv.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["deconv_acc"] = n_correct | |
| results_dict["deconv_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["deconv_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(deconv.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["deconv_sens"] = sens | |
| ### Feature ablator | |
| ablator = FeatureAblation(super_pixel_model) | |
| attributions = ablator.attribute(input, target=0, feature_mask=segmTensor) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_featablt.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_featablt.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["featablt_acc"] = n_correct | |
| results_dict["featablt_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["featablt_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(ablator.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["featablt_sens"] = sens | |
| ## LIME | |
| interpretable_model = SkLearnRidge(alpha=1, fit_intercept=True) ### This is the default used by LIME | |
| lime = Lime(super_pixel_model, interpretable_model=interpretable_model) | |
| attributions = lime.attribute(input, target=0, feature_mask=segmTensor) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_lime.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_lime.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["lime_acc"] = n_correct | |
| results_dict["lime_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["lime_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(lime.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["lime_sens"] = sens | |
| ### KernelSHAP | |
| ks = KernelShap(super_pixel_model) | |
| attributions = ks.attribute(input, target=0, feature_mask=segmTensor) | |
| rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP) | |
| rankedAttr = rankedAttr.detach().cpu().numpy()[0][0] | |
| rankedAttr = gray2rgb(rankedAttr) | |
| mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map') | |
| mplotfig.savefig(outputDir + '{}_kernelshap.png'.format(i)) | |
| mplotfig.clear() | |
| plt.close(mplotfig) | |
| saveAttrData(attrOutputDir + f'{i}_kernelshap.pkl', attributions, segmDataNP, origImgNP) | |
| if acquireSelectivity: | |
| n_correct, confidenceList = acquireSelectivityHit(img1, attributions, segmDataNP, model, converter, labels, scoring) | |
| results_dict["kernelshap_acc"] = n_correct | |
| results_dict["kernelshap_conf"] = confidenceList | |
| if acquireInfidelity: | |
| infid = float(infidelity(super_pixel_model, perturb_fn, img1, attributions, normalize=True).detach().cpu().numpy()) | |
| results_dict["kernelshap_infid"] = infid | |
| if acquireSensitivity: | |
| sens = float(sensitivity_max(ks.attribute, img1, target=0).detach().cpu().numpy()) | |
| results_dict["kernelshap_sens"] = sens | |
| selectivity_eval_results.append(results_dict) | |
| with open(outputSelectivityPkl, 'wb') as f: | |
| pickle.dump(selectivity_eval_results, f) | |
| testImgCount += 1 | |
| print("testImgCount: ", testImgCount) | |
| def outputOrigImagesOnly(opt): | |
| datasetName = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| opt.outputOrigDir = "./datasetOrigImgs/{}/".format(datasetName) | |
| opt.output_orig = True | |
| opt.corruption_num = 0 | |
| opt.apply_corruptions = False | |
| opt.min_imgnum = 0 | |
| opt.max_imgnum = 1000 | |
| target_output_orig = opt.outputOrigDir | |
| if not os.path.exists(target_output_orig): | |
| os.makedirs(target_output_orig) | |
| """ model configuration """ | |
| if 'CTC' in opt.Prediction: | |
| converter = CTCLabelConverter(opt.character) | |
| else: | |
| converter = AttnLabelConverter(opt.character) | |
| opt.num_class = len(converter.character) | |
| if opt.rgb: | |
| opt.input_channel = 3 | |
| model_obj = Model(opt, device) | |
| print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, | |
| opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, | |
| opt.SequenceModeling, opt.Prediction) | |
| model = torch.nn.DataParallel(model_obj).to(device) | |
| # load model | |
| print('loading pretrained model from %s' % opt.saved_model) | |
| model.load_state_dict(torch.load(opt.saved_model, map_location=device)) | |
| opt.exp_name = '_'.join(opt.saved_model.split('/')[1:]) | |
| scoring = STRScore(opt=opt, converter=converter, device=device) | |
| ### | |
| super_pixel_model = torch.nn.Sequential( | |
| model, | |
| scoring | |
| ) | |
| model.train() | |
| scoring.train() | |
| super_pixel_model.train() | |
| # print(model) | |
| """ keep evaluation model and result logs """ | |
| os.makedirs(f'./result/{opt.exp_name}', exist_ok=True) | |
| os.system(f'cp {opt.saved_model} ./result/{opt.exp_name}/') | |
| """ setup loss """ | |
| if 'CTC' in opt.Prediction: | |
| criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) | |
| else: | |
| criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0 | |
| """Output shap values""" | |
| """ evaluation with 10 benchmark evaluation datasets """ | |
| # The evaluation datasets, dataset order is same with Table 1 in our paper. | |
| # eval_data_list = ['IIIT5k_3000', 'IC03_860', 'IC03_867', 'IC15_1811'] | |
| # eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', | |
| # 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| # eval_data_list = ['IIIT5k_3000'] | |
| eval_data_list = [datasetName] | |
| # # To easily compute the total accuracy of our paper. | |
| # eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_867', | |
| # 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| list_accuracy = [] | |
| total_forward_time = 0 | |
| total_evaluation_data_number = 0 | |
| total_correct_number = 0 | |
| log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a') | |
| dashed_line = '-' * 80 | |
| print(dashed_line) | |
| log.write(dashed_line + '\n') | |
| selectivity_eval_results = [] | |
| imageData = [] | |
| targetText = "all" | |
| middleMaskThreshold = 5 | |
| testImgCount = 0 | |
| imgResultDir = str(opt.Transformation) + "-" + str(opt.FeatureExtraction) + "-" + str(opt.SequenceModeling) + "-" + str(opt.Prediction) + "-" + str(opt.scorer) | |
| if opt.blackbg: | |
| shapImgLs = np.zeros(shape=(1, 1, 32, 100)).astype(np.float32) | |
| trainList = np.array(shapImgLs) | |
| background = torch.from_numpy(trainList).to(device) | |
| if imgResultDir != "": | |
| if not os.path.exists(imgResultDir): | |
| os.makedirs(imgResultDir) | |
| for eval_data in eval_data_list: | |
| eval_data_path = os.path.join(opt.eval_data, eval_data) | |
| AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) | |
| eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig) | |
| evaluation_loader = torch.utils.data.DataLoader( | |
| eval_data, batch_size=1, | |
| shuffle=False, | |
| num_workers=int(opt.workers), | |
| collate_fn=AlignCollate_evaluation, pin_memory=True) | |
| # image_tensors, labels = next(iter(evaluation_loader)) ### Iterate one batch only | |
| for i, (orig_img_tensors, labels) in enumerate(evaluation_loader): | |
| testImgCount += 1 | |
| print("testImgCount: ", testImgCount) | |
| ### Use to check if the model predicted the image or not. Output a pickle file with the image index. | |
| def modelDatasetPredOnly(opt): | |
| ### targetDataset - one dataset only, CUTE80 has 288 samples | |
| targetDataset = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80'] | |
| outputSelectivityPkl = "metrics_predictonly_results_{}.pkl".format(targetDataset) | |
| start_time = time.time() | |
| """ model configuration """ | |
| if 'CTC' in opt.Prediction: | |
| converter = CTCLabelConverter(opt.character) | |
| else: | |
| converter = AttnLabelConverter(opt.character) | |
| opt.num_class = len(converter.character) | |
| if opt.rgb: | |
| opt.input_channel = 3 | |
| model_obj = Model(opt, device) | |
| print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, | |
| opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, | |
| opt.SequenceModeling, opt.Prediction) | |
| model = torch.nn.DataParallel(model_obj).to(device) | |
| # load model | |
| print('loading pretrained model from %s' % opt.saved_model) | |
| model.load_state_dict(torch.load(opt.saved_model, map_location=device)) | |
| opt.exp_name = '_'.join(opt.saved_model.split('/')[1:]) | |
| scoring = STRScore(opt=opt, converter=converter, device=device) | |
| ### | |
| super_pixel_model = torch.nn.Sequential( | |
| model, | |
| scoring | |
| ) | |
| model.train() | |
| scoring.train() | |
| super_pixel_model.train() | |
| if opt.blackbg: | |
| shapImgLs = np.zeros(shape=(1, 1, 224, 224)).astype(np.float32) | |
| trainList = np.array(shapImgLs) | |
| background = torch.from_numpy(trainList).to(device) | |
| opt.eval = True | |
| eval_data_list = [targetDataset] | |
| testImgCount = 0 | |
| list_accuracy = [] | |
| total_forward_time = 0 | |
| total_evaluation_data_number = 0 | |
| total_correct_number = 0 | |
| log = open(f'./result/{opt.exp_name}/log_all_evaluation.txt', 'a') | |
| dashed_line = '-' * 80 | |
| print(dashed_line) | |
| log.write(dashed_line + '\n') | |
| target_output_orig = opt.outputOrigDir | |
| predOutput = [] | |
| for eval_data in eval_data_list: | |
| eval_data_path = os.path.join(opt.eval_data, eval_data) | |
| AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) | |
| eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt, targetDir=target_output_orig) | |
| evaluation_loader = torch.utils.data.DataLoader( | |
| eval_data, batch_size=1, | |
| shuffle=False, | |
| num_workers=int(opt.workers), | |
| collate_fn=AlignCollate_evaluation, pin_memory=True) | |
| testImgCount = 0 | |
| for i, (orig_img_tensors, labels) in enumerate(evaluation_loader): | |
| image = orig_img_tensors.to(device) | |
| batch_size = 1 | |
| length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) | |
| text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) | |
| text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length) | |
| if 'CTC' in opt.Prediction: | |
| preds = model(image, text_for_pred) | |
| confScore = scoring(preds) | |
| confScore = confScore.detach().cpu().numpy() | |
| # Calculate evaluation loss for CTC deocder. | |
| preds_size = torch.IntTensor([preds.size(1)] * batch_size) | |
| # Select max probabilty (greedy decoding) then decode index to character | |
| if opt.baiduCTC: | |
| _, preds_index = preds.max(2) | |
| preds_index = preds_index.view(-1) | |
| else: | |
| _, preds_index = preds.max(2) | |
| preds_str = converter.decode(preds_index.data, preds_size.data)[0] | |
| else: | |
| preds = model(image, text_for_pred, is_train=False) | |
| confScore = scoring(preds) | |
| confScore = confScore.detach().cpu().numpy() | |
| preds = preds[:, :text_for_loss.shape[1] - 1, :] | |
| target = text_for_loss[:, 1:] # without [GO] Symbol | |
| # cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1)) | |
| # select max probabilty (greedy decoding) then decode index to character | |
| _, preds_index = preds.max(2) | |
| preds_str = converter.decode(preds_index, length_for_pred) | |
| ### Remove all chars after '[s]' | |
| preds_str = preds_str[0] | |
| preds_str = preds_str[:preds_str.find('[s]')] | |
| # print("preds_str: ", preds_str) # lowercased prediction | |
| # print("labels: ", labels[0]) # gt already in lowercased | |
| if preds_str==labels[0]: predOutput.append(1) | |
| else: predOutput.append(0) | |
| with open(outputSelectivityPkl, 'wb') as f: | |
| pickle.dump(predOutput, f) | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--eval_data', required=True, help='path to evaluation dataset') | |
| parser.add_argument('--benchmark_all_eval', action='store_true', help='evaluate 10 benchmark evaluation datasets') | |
| parser.add_argument('--workers', type=int, help='number of data loading workers', default=4) | |
| parser.add_argument('--batch_size', type=int, default=192, help='input batch size') | |
| parser.add_argument('--saved_model', required=True, help="path to saved_model to evaluation") | |
| """ Data processing """ | |
| parser.add_argument('--batch_max_length', type=int, default=25, help='maximum-label-length') | |
| parser.add_argument('--imgH', type=int, default=32, help='the height of the input image') | |
| parser.add_argument('--imgW', type=int, default=100, help='the width of the input image') | |
| parser.add_argument('--superHeight', type=int, default=5, help='the height of the superpixel') | |
| parser.add_argument('--superWidth', type=int, default=2, help='the width of the superpixel') | |
| parser.add_argument('--min_imgnum', type=int, default=0, help='set this to skip for loop index of specific image number') | |
| parser.add_argument('--max_imgnum', type=int, default=2, help='set this to skip for loop index of specific image number') | |
| parser.add_argument('--severity', type=int, default=1, help='severity level if apply corruptions') | |
| parser.add_argument('--scorer', type=str, default='cumprod', help='See STRScore: cumprod | mean') | |
| parser.add_argument('--corruption_num', type=int, default=0, help='corruption to apply') | |
| parser.add_argument('--confidence_mode', type=int, default=0, help='0-sum of argmax; 1-edit distance') | |
| parser.add_argument('--outputOrigDir', type=str, default="output_orig/", help='output directory to save original \ | |
| images. This will be automatically created. Needs --output_orig too.') | |
| parser.add_argument('--output_orig', action='store_true', help='if true, output first original rgb image of each batch') | |
| parser.add_argument('--compare_corrupt', action='store_true', help='set to true to output results across corruptions') | |
| parser.add_argument('--is_shap', action='store_true', help='no need to call in command line') | |
| parser.add_argument('--blackbg', action='store_true', help='if True, background color for covering features will be black(0)') | |
| parser.add_argument('--rgb', action='store_true', help='use rgb input') | |
| parser.add_argument('--character', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz', help='character label') | |
| parser.add_argument('--sensitive', action='store_true', help='for sensitive character mode') | |
| parser.add_argument('--PAD', action='store_true', help='whether to keep ratio then pad for image resize') | |
| parser.add_argument('--data_filtering_off', action='store_true', help='for data_filtering_off mode') | |
| parser.add_argument('--apply_corruptions', action='store_true', help='apply corruptions to images') | |
| parser.add_argument('--output_feat_maps', action='store_true', help='toggle this to output images of featmaps') | |
| parser.add_argument('--baiduCTC', action='store_true', help='for data_filtering_off mode') | |
| """ Model Architecture """ | |
| parser.add_argument('--Transformation', type=str, required=True, help='Transformation stage. None|TPS') | |
| parser.add_argument('--FeatureExtraction', type=str, required=True, help='FeatureExtraction stage. VGG|RCNN|ResNet') | |
| parser.add_argument('--SequenceModeling', type=str, required=True, help='SequenceModeling stage. None|BiLSTM') | |
| parser.add_argument('--Prediction', type=str, required=True, help='Prediction stage. CTC|Attn') | |
| parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN') | |
| parser.add_argument('--input_channel', type=int, default=1, help='the number of input channel of Feature extractor') | |
| parser.add_argument('--output_channel', type=int, default=512, | |
| help='the number of output channel of Feature extractor') | |
| parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state') | |
| opt = parser.parse_args() | |
| """ vocab / character number configuration """ | |
| if opt.sensitive: | |
| opt.character = string.printable[:-6] # same with ASTER setting (use 94 char). | |
| cudnn.benchmark = True | |
| cudnn.deterministic = True | |
| opt.num_gpu = torch.cuda.device_count() | |
| # acquire_average_auc(opt) | |
| main(opt) | |
| # outputOrigImagesOnly(opt) | |