EDL / EDL.py

Upload EDL.py

a1588af verified 2 months ago

31.1 kB

	import keras
	from keras.datasets import mnist, cifar10, cifar100
	from keras import layers
	from keras.models import Sequential
	from keras.layers import Dense, Dropout, Flatten
	from keras.layers import Conv2D, MaxPooling2D
	from keras import backend as K
	import cv2
	import tensorflow as tf
	#import GPy
	#import gpflow, gpflux
	import time
	from tensorflow.keras.applications import VGG16,ResNet50
	from keras import regularizers

	import numpy as np

	import sklearn
	from sklearn.metrics import classification_report
	from sklearn.metrics import accuracy_score
	import sklearn.gaussian_process as gp
	from sklearn.gaussian_process import GaussianProcessClassifier
	from sklearn.gaussian_process.kernels import RBF, WhiteKernel
	import matplotlib.pyplot as plt
	# import official.nlp.modeling.layers as nlp_layers
	# from official.nlp.modeling.layers import SpectralNormalization
	import gp_layer
	from sklearn.metrics import roc_auc_score
	#%matplotlib inline
	import os

	os.environ["CUDA_VISIBLE_DEVICES"] = "0"

	#Load training data
	(X_train, y_train), (X_test, y_test) = cifar10.load_data()

	X_train = X_train.astype('float32')
	X_test = X_test.astype('float32')

	X_train /= 255
	X_test /= 255

	num_classes = 10
	y_train_one_hot = keras.utils.to_categorical(y_train, num_classes)
	y_test_one_hot = keras.utils.to_categorical(y_test, num_classes)

	print('x_train shape:', X_train.shape)
	print(X_train.shape[0], 'train samples')
	print(X_test.shape[0], 'test samples')


	# kernel = gpflow.kernels.SquaredExponential()

	# inducing_variable = gpflow.inducing_variables.InducingPoints(
	# np.linspace(0, 1, 128*100).reshape(-1, 128)
	# )

	# mean = gpflow.mean_functions.Zero()

	# invlink = gpflow.likelihoods.RobustMax(10)
	# likelihood = gpflow.likelihoods.MultiClass(10, invlink=invlink)

	# likelihood_container = gpflux.layers.TrackableLayer()

	# likelihood_container.likelihood = likelihood

	# loss = gpflux.losses.LikelihoodLoss(likelihood)


	gp_layer = gp_layer.RandomFeatureGaussianProcess(units=10,
	num_inducing=2048,
	normalize_input=True,
	scale_random_features=False,
	gp_cov_momentum=-1,
	return_gp_cov=True)

	def feature_extractor(inputs):

	feature_extractor = tf.keras.applications.resnet.ResNet50(input_shape=(224, 224, 3),
	include_top=False,
	weights='imagenet')(inputs)
	return feature_extractor

	def classifier(inputs):
	x = tf.keras.layers.GlobalAveragePooling2D()(inputs)
	x = tf.keras.layers.Flatten()(x)
	# x = tf.keras.layers.Dropout(0.3)(x)
	# x = tf.keras.layers.Dense(256, activation="relu")(x)
	# x = tf.keras.layers.Dense(128, activation="relu")(x)
	# x = tf.keras.layers.Dropout(0.1)(x)
	#x = tf.keras.layers.Dense(10, activation="softmax", name="classification")(x)
	#x = tf.keras.layers.SpectralNormalization(tf.keras.layers.Dense(512, activation='relu'))(x)
	x = (tf.keras.layers.Dense(256, activation='relu'))(x)
	x = (tf.keras.layers.Dense(128, activation='relu'))(x)
	x = (tf.keras.layers.Dense(10, activation='linear'))(x)
	# outputs = gpflux.layers.GPLayer(mean_function=mean,
	# kernel=kernel,
	# inducing_variable=inducing_variable,
	# num_data=X_train.shape[0],
	# num_latent_gps=10)(x)
	#outputs, sd = gp_layer(x)

	return x


	def final_model(inputs):

	resize = tf.keras.layers.UpSampling2D(size=(7,7))(inputs)

	resnet_feature_extractor = feature_extractor(resize)
	classification_output = classifier(resnet_feature_extractor)


	return classification_output


	# lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
	# 0.001,
	# decay_steps=20*50,
	# decay_rate=1,
	# staircase=False)


	# def get_optimizer():
	# return tf.keras.optimizers.Adam(lr_schedule)


	def define_compile_model():
	inputs = tf.keras.layers.Input(shape=(32,32,3))

	classification_output = final_model(inputs)
	model = tf.keras.Model(inputs=inputs, outputs = classification_output)

	# model.compile(optimizer=get_optimizer(),
	# loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
	# metrics = ['accuracy'])
	return model

	# inputs = tf.keras.Input(shape=(28, 28, 1))

	# x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
	# x = tf.keras.layers.MaxPooling2D((1, 1))(x)
	# x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
	# x = tf.keras.layers.MaxPooling2D((2, 2))(x)
	# x = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
	# x = tf.keras.layers.MaxPooling2D((2, 2))(x)
	# x = tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
	# x = tf.keras.layers.MaxPooling2D((2, 2))(x)
	# x = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
	# x = tf.keras.layers.MaxPooling2D((2, 2))(x)
	# x = tf.keras.layers.Flatten()(x)
	# #x = tf.keras.layers.Dropout(0.5)(x)
	# x = tf.keras.layers.Dense(256, activation='linear')(x)
	# #x = tf.keras.layers.Dense(128, activation='linear')(x)
	# #l = tf.keras.layers.Dense(10, activation='linear')(x)
	# gp_output, gp_std= gp_layer(x)

	# model = tf.keras.Model(inputs=inputs, outputs=gp_output)

	model = define_compile_model()

	model.summary()

	# t = tf.expand_dims(X_train[0], axis=0)

	# model(t)[0]


	# from tensorflow.keras.callbacks import ReduceLROnPlateau

	# lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
	# 0.001,
	# decay_steps=20*50,
	# decay_rate=1,
	# staircase=False)

	# def get_optimizer():
	# return tf.keras.optimizers.Adam(lr_schedule)


	# #Compiling the model
	# model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer = get_optimizer(), metrics=['accuracy'])
	# # early_stop = EarlyStopping(monitor='val_loss',patience=5)
	# # checkpoint = ModelCheckpoint("./Best_model/",save_best_only=True,)
	# rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)


	# # # # Train the model
	# model.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test), callbacks=[rlrp])

	# predictions = np.argmax(model.predict(X_test), axis=1)

	# print(classification_report(y_test, predictions))

	# print(model(X_train[0].reshape(1,32,32,3)))

	#t = X_train[0].reshape(1,32,32,3)

	#model.predict(t)


	def relu_evidence(logits):
	return tf.nn.relu(logits)

	def exp_evidence(logits):
	return tf.exp(tf.clip_by_value(logits, -10, 10))


	def softplus_evidence(logits):
	return tf.nn.softplus(((logits + 1)**2) / 2)

	# # # def log_marginal_likelihood_gp_layer(model, X_train, y_train):
	# # # """Compute the log marginal likelihood for a GP layer within the model."""
	# # # gp_layer = model.layers[-1]


	# # # kernel = gp_layer.kernel
	# # # inducing_points = gp_layer.inducing_variable.Z.numpy()
	# # # mean = gp_layer.mean_function


	# # # y_train_subset = y_train[:inducing_points.shape[0]].astype(np.float64) # Ensure float64 dtype


	# # # K = kernel.K(inducing_points)
	# # # K += np.eye(inducing_points.shape[0]) * 1e-6


	# # # L = tf.linalg.cholesky(K)


	# # # alpha = tf.linalg.cholesky_solve(L, y_train_subset)


	# # # log_likelihood = -0.5 * tf.reduce_sum(tf.matmul(tf.transpose(y_train_subset), alpha)) - tf.reduce_sum(tf.math.log(tf.linalg.diag_part(L))) - 0.5 * inducing_points.shape[0] * np.log(2 * np.pi)

	# # # return tf.squeeze(log_likelihood)



	def kl_divergence(alpha):
	# KL divergence for Dirichlet distribution
	beta = tf.ones_like(alpha)
	S_alpha = tf.reduce_sum(alpha, axis=1, keepdims=True)
	S_beta = tf.reduce_sum(beta, axis=1, keepdims=True)

	lnB = tf.math.lgamma(S_alpha) - tf.reduce_sum(tf.math.lgamma(alpha), axis=1, keepdims=True)
	lnB_uni = tf.reduce_sum(tf.math.lgamma(beta), axis=1, keepdims=True) - tf.math.lgamma(S_beta)

	dg0 = tf.math.digamma(S_alpha)
	dg1 = tf.math.digamma(alpha)

	kl = tf.reduce_sum((alpha - beta) * (dg1 - dg0), axis=1, keepdims=True) + lnB + lnB_uni
	return kl



	def loglikelihood_loss(y, alpha):
	S = tf.reduce_sum(alpha, axis=1, keepdims=True)
	S = tf.cast(S, tf.float32)
	y = tf.cast(y, tf.float32)
	alpha = tf.cast(alpha, tf.float32)
	loglikelihood_err = tf.reduce_sum(tf.square(y - (alpha / S)), axis=1, keepdims=True)
	loglikelihood_var = tf.reduce_sum(alpha * (S - alpha) / (S * S * (S + 1)), axis=1, keepdims=True)
	loglikelihood = loglikelihood_err + loglikelihood_var
	return loglikelihood


	def mse_loss(y, alpha, epoch_num, num_classes=10, annealing_step=10):
	loglikelihood = loglikelihood_loss(y, alpha)

	annealing_coef = tf.minimum(
	tf.constant(1.0, dtype=tf.float32),
	tf.cast(epoch_num / annealing_step, dtype=tf.float32),
	)

	kl_alpha = (alpha - 1) * (1 - y) + 1
	kl_div = annealing_coef * kl_divergence(kl_alpha)

	S = tf.reduce_sum(alpha, axis=1, keepdims=True)
	vacuity = num_classes / tf.stop_gradient(S)
	vacuity = tf.identity(vacuity, name="vacuity")


	# gp_layer = model.layers[-1]

	# ker = gp_layer.kernel
	# ind = gp_layer.inducing_variable

	# K = ker.K(inducing_variable.Z) # Kernel matrix at inducing points
	# reg = tf.sqrt(tf.reduce_sum(tf.square(K))).numpy()*0.001
	#reg = log_marginal_likelihood_gp_layer(model, X_train, y_train_one_hot)
	#reg = tf.cast(reg, dtype=tf.float32)

	return loglikelihood + kl_div, vacuity


	# # # def edl_loss(func, y, alpha, epoch_num, num_classes, annealing_step, device=None):
	# # # y = tf.convert_to_tensor(y, dtype=tf.float32)
	# # # alpha = tf.convert_to_tensor(alpha, dtype=tf.float32)
	# # # S = tf.reduce_sum(alpha, axis=1, keepdims=True)

	# # # A = tf.reduce_sum(y * (func(S) - func(alpha)), axis=1, keepdims=True)

	# # # annealing_coef = tf.minimum(
	# # # tf.constant(1.0, dtype=tf.float32),
	# # # tf.constant(epoch_num / annealing_step, dtype=tf.float32),
	# # # )

	# # # kl_alpha = (alpha - 1) * (1 - y) + 1
	# # # kl_div = annealing_coef * kl_divergence(kl_alpha)

	# # # S = tf.reduce_sum(alpha, axis=1, keepdims=True)
	# # # with tf.GradientTape() as tape:
	# # # vacuity = num_classes / tf.stop_gradient(S)

	# # # return A + kl_div, vacuity


	def compute_metrics(logits, Y, epoch, global_step, annealing_step, lmb=0.0005):
	logits = tf.cast(logits, tf.float32)
	evidence = exp_evidence(logits)
	alpha = evidence + 1
	alpha = tf.cast(alpha, tf.float32)
	Y_onehot = tf.one_hot(Y, depth=10)
	K = 10

	if len(alpha.shape) == 1:
	u = K / tf.reduce_sum(alpha)
	else:
	u = K / tf.reduce_sum(alpha, axis=1, keepdims=True)

	#u = K / tf.reduce_sum(alpha, axis=1, keepdims=True) # uncertainty
	prob = alpha / tf.reduce_sum(alpha, axis=1, keepdims=True)

	mse_loss_val, vacuity = mse_loss(Y_onehot, alpha, epoch, num_classes, annealing_step)
	loss = tf.reduce_mean(mse_loss_val)

	output_correct = logits * Y_onehot
	#print(vacuity * output_correct)

	loss -= (tf.reduce_sum(vacuity * output_correct) / tf.cast(tf.shape(output_correct)[0], tf.float32))
	#print(loss)
	# loss, vacuity = mse_loss(Y_onehot, alpha, epoch)
	# l2 = model.l2_loss_last_layers()
	# loss = tf.reduce_mean(loss) + lmb * l2
	return loss, u, prob


	x_train = np.array(X_train)
	y_train = np.array(y_train)


	optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
	model.compile(optimizer=optimizer)
	num_epochs = 15
	batch_size = 32

	train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
	train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)

	test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
	test_dataset = test_dataset.shuffle(buffer_size=len(X_test)).batch(batch_size)

	# # # def get_multiple_samples(model, inputs, num_samples=5):
	# # # samples = [model(inputs, training=True) for _ in range(num_samples)]
	# # # mean_output = tf.reduce_mean(samples, axis=0)
	# # # return mean_output

	for epoch in range(num_epochs):
	total_loss = 0.0
	correct = 0
	total = 0


	# indices = np.random.permutation(len(x_train))
	# x_train_shuffled = x_train[indices]
	# y_train_shuffled = y_train[indices]

	for inputs, labels in train_dataset:
	labels = tf.squeeze(labels)
	# inputs = x_train_shuffled[i:i+batch_size]
	# labels = y_train_shuffled[i:i+batch_size]

	# inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
	# labels = tf.convert_to_tensor(labels, dtype=tf.int32)

	with tf.GradientTape() as tape:

	outputs = model(inputs, training=True)
	#outputs = outputs[0]
	#outputs = get_multiple_samples(model, inputs, num_samples=5)
	#print(outputs)
	#gradient_penalty = calc_gradient_penalty(X_train, outputs)


	loss, _, _ = compute_metrics(outputs, labels, epoch, global_step=epoch, annealing_step=10)


	#print(loss)

	gradients = tape.gradient(loss, model.trainable_variables)

	# gradients_l2 = [tf.norm(grad) for grad in gradients]

	# gradients_l2 = [0.000001(grad_norm - 1)*2 for grad_norm in gradients_l2]

	# # Penalize the loss with the L2 norm of gradients
	# penalty_weight = 0.001 # Adjust this weight as needed
	# penalty = tf.reduce_sum([tf.square(grad) for grad in gradients_l2])
	# loss += penalty_weight * penalty

	optimizer.apply_gradients(zip(gradients, model.trainable_variables))


	total_loss += loss.numpy()

	predicted = tf.argmax(outputs, axis=1)
	predicted = tf.cast(predicted, tf.int32)
	total += labels.shape[0]
	#labels = tf.squeeze(labels)
	#print(predicted)
	#print(labels)

	correct += tf.reduce_sum(tf.cast(predicted == tf.cast(labels, tf.int32), tf.float32)).numpy()

	#print(correct)
	#print(len(x_train))
	avg_loss = total_loss / (len(x_train) // batch_size)
	accuracy = 100 * correct / len(x_train)

	print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')

	if avg_loss < 0.05:
	print(f'Stopping training. Loss ({avg_loss:.4f}) is below threshold ({0.05}).')
	break

	predictions = np.argmax(model.predict(X_test), axis=1)

	print(classification_report(y_test, predictions))

	# # # #model.save('test_sngp.keras')

	def test(model, test_dataset):
	correct = 0
	total = 0
	all_predictions = []
	all_uncertainties = []

	for inputs, labels in test_dataset:
	labels = tf.squeeze(labels)
	outputs = model(inputs, training=False)
	#outputs[0]
	predicted = tf.argmax(outputs, axis=1)
	predicted = tf.cast(predicted, tf.int32)

	_, u, _ = compute_metrics(outputs, labels, epoch=0, global_step=0, annealing_step=10) # Calculate loss and uncertainty

	all_predictions.append(predicted.numpy())
	all_uncertainties.append(u.numpy())

	total += labels.shape[0]
	correct += tf.reduce_sum(tf.cast(predicted == tf.cast(labels, tf.int32), tf.float32)).numpy()

	accuracy = 100 * correct / total
	all_predictions = np.concatenate(all_predictions)
	all_uncertainties = np.concatenate(all_uncertainties)

	print(f'Test Accuracy: {accuracy:.2f}%')
	print(f'Shape of predictions array: {all_predictions.shape}')
	print(f'Shape of uncertainties array: {all_uncertainties.shape}')

	np.save('predictions.npy', all_predictions)
	np.save('uncertainties.npy', all_uncertainties)

	return accuracy, all_predictions, all_uncertainties


	# def add_gaussian_noise_to_image(image, noise_stddev=0.3):
	# noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=noise_stddev)
	# corrupted_image = tf.clip_by_value(image + noise, 0.0, 1.0) # Clip values to [0, 1]
	# return corrupted_image

	# # Corrupt the test dataset images with Gaussian noise
	# corrupted_test_dataset = test_dataset.map(lambda x, y: (add_gaussian_noise_to_image(x), y))

	# X, y = corrupted_test_dataset

	# predictions = np.argmax(model.predict(X), axis=1)

	# print(classification_report(y, predictions))


	# _,u,_ = compute_metrics(predictions, y_test, 1, global_step=1, annealing_step=10)
	test_accuracy, predictions_1, uncertainties = test(model, test_dataset)

	TC_indices = [] # True Certainty (TC)
	TU_indices = [] # True Uncertainty (TU)
	FU_indices = [] # False Uncertainty (FU)
	FC_indices = [] # False Certainty (FC)


	for i in range(len(predictions)):
	#p = y_pred_mc_dropout[i]

	if (predictions[i] == y_test[i]):

	if uncertainties[i] < 0.3:
	# True certainty (TU): Correct and certain
	TC_indices.append(i)
	else:
	# False certainty (FU): Correct and uncertain
	FU_indices.append(i)
	else:
	# Certain prediction
	if uncertainties[i] < 0.3:
	# True Unertainty (TC): Incorrect and certain
	FC_indices.append(i)
	else:
	# False Uncertainty (FC): Incorrect and uncertain
	TU_indices.append(i)


	print('USen:',len(TU_indices) / (len(TU_indices) + len(FC_indices)))

	print('USpe:', len(TC_indices) / (len(TC_indices) + len(FU_indices)))

	print('UPre:', len(TU_indices) / (len(TU_indices) + len(FU_indices)))

	print('UAcc:', (len(TU_indices) + len(TC_indices)) / (len(TU_indices) + len(TC_indices) + len(FU_indices) + len(FC_indices)))



	# def combine_images_with_padding(img_index_1, img_index_2, padding_type="top_bottom"):
	# """
	# Combines two CIFAR-10 images with padding and normalization.

	# Args:
	# img_index_1: Index of the first image in the dataset.
	# img_index_2: Index of the second image in the dataset.
	# padding_type: Type of padding to use ("top_bottom" or "left_right").

	# Returns:
	# A combined image tensor.
	# """

	# def combine_images_with_padding(img_index_1, img_index_2, padding_type):

	# (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()


	# img_1 = tf.convert_to_tensor(test_images[img_index_1], dtype=tf.float32) / 255.0
	# img_2 = tf.convert_to_tensor(test_images[img_index_2], dtype=tf.float32) / 255.0


	# if padding_type == "top_bottom":
	# padding_amount = (img_2.shape[0] - img_1.shape[0]) // 2
	# top_bottom_padding = tf.zeros((padding_amount, img_1.shape[1], 3))
	# padded_img_1 = tf.concat([top_bottom_padding, img_1, top_bottom_padding], axis=0)
	# padded_img_2 = img_2
	# elif padding_type == "left_right":
	# padding_amount = (img_2.shape[1] - img_1.shape[1]) // 2
	# left_right_padding = tf.zeros((img_1.shape[0], padding_amount, 3))
	# padded_img_1 = tf.concat([left_right_padding, img_1, left_right_padding], axis=1)
	# padded_img_2 = img_2
	# else:
	# raise ValueError("Invalid padding type. Choose 'top_bottom' or 'left_right'.")


	# combined_img = tf.concat([padded_img_1, padded_img_2], axis=0)


	# combined_img_resized = tf.image.resize(combined_img, [32, 32])

	# return combined_img_resized



	# img_index_1 = 50
	# img_index_2 = 100
	# padding_type = "top_bottom"

	# combined_img = combine_images_with_padding(img_index_1, img_index_2, padding_type)

	# combined_img = np.expand_dims(combined_img, axis=0)

	# image1_index = 10
	# image2_index = 21


	# combined_img = np.zeros((32, 32))
	# combined_img[:, :-6] += x_train[image1_index][:, 6:]
	# combined_img[:, 14:] += x_train[image2_index][:, 5:19]
	# combined_img /= combined_img.max()

	# combined_img = combined_img.reshape(1, 32, 32, 3)


	(train_images, _), (_, _) = mnist.load_data()


	mnist_image = train_images[np.random.randint(0, train_images.shape[0])]


	rescaled_image = cv2.resize(mnist_image, (32, 32))


	rgb_image = cv2.cvtColor(rescaled_image, cv2.COLOR_GRAY2RGB)

	rgb_image = np.expand_dims(rgb_image, axis=0)


	# pred_unc = model(combined_img)
	pred = model(X_test[0].reshape(1, 32, 32, 3))
	#var = pred.variance().numpy()

	pred_rgb = model(rgb_image)
	#var_rgb = pred_rgb.variance().numpy()
	# l_unc, u_unc, p_unc = compute_metrics(pred_unc, y_test[50], 0, global_step=0, annealing_step=10)
	l, u, p = compute_metrics(pred, y_test[0], 0, global_step=0, annealing_step=10)
	l_rgb, u_rgb, p_rgb = compute_metrics(pred_rgb, y_test[0], 0, global_step=0, annealing_step=10)

	# print('u_unc:',u_unc)
	# print('p_unc:',p_unc)
	# print('preds:', pred_unc)

	print('u:', u)
	print('p:', p)
	print('pred:', pred)
	#print('sd:', var)

	print('u_rgb:', u_rgb)
	print('p_rgb:', p_rgb)
	print('preds:', pred_rgb)
	#print('sd_rgb:', var_rgb)

	#----------------------------------------------------------------------------------------------------
	#Variance based EDL

	def uncertainty(alpha, reduce=True):
	S = tf.reduce_sum(alpha, axis=1, keepdims=True)
	p = alpha / S
	variance = p - tf.square(p)
	EU = (alpha / S) * (1 - alpha / S) / (S + 1)
	AU = variance - EU
	if reduce:
	AU = tf.reduce_sum(AU) / alpha.shape[0]
	EU = tf.reduce_sum(EU) / alpha.shape[0]
	return AU, EU

	pred_var = model(rgb_image)
	pred_var = exp_evidence(pred_var)

	unc_ale, unc_eps = uncertainty(pred_var)
	print('u_ale:', unc_ale)
	print('p_eps:', unc_eps)

	y_pred_probs = model.predict(X_test)
	y_pred = np.argmax(y_pred_probs, axis=1)

	#-----------------------------------------------------------------------------------------------------


	#-----------------------------------------------------------------------------------------------------
	#Different Variance based unc

	# def total_uncertainty_variance(probs):
	# if isinstance(probs, tf.Tensor):
	# mean = tf.reduce_mean(probs, axis=2)
	# t_u = tf.reduce_sum(mean * (1 - mean), axis=1)
	# else:
	# probs = tf.convert_to_tensor(probs, dtype=tf.float32)
	# mean = tf.reduce_mean(probs, axis=2)
	# t_u = tf.reduce_sum(mean * (1 - mean), axis=1)
	# return t_u

	# def aleatoric_uncertainty_variance(probs):
	# if isinstance(probs, tf.Tensor):
	# a_u = tf.reduce_mean(tf.reduce_sum(probs * (1 - probs), axis=1), axis=1)
	# else:
	# probs = tf.convert_to_tensor(probs, dtype=tf.float32)
	# a_u = tf.reduce_mean(tf.reduce_sum(probs * (1 - probs), axis=1), axis=1)
	# return a_u

	# def epistemic_uncertainty_variance(probs):
	# if isinstance(probs, tf.Tensor):
	# mean = tf.reduce_mean(probs, axis=2, keepdims=True)
	# e_u = tf.reduce_mean(tf.reduce_sum(probs * (probs - mean), axis=1), axis=1)
	# else:
	# probs = tf.convert_to_tensor(probs, dtype=tf.float32)
	# mean = tf.reduce_mean(probs, axis=2, keepdims=True)
	# e_u = tf.reduce_mean(tf.reduce_sum(probs * (probs - mean), axis=1), axis=1)
	# return e_u

	# eu = epistemic_uncertainty_variance(pred_rgb)
	# au = aleatoric_uncertainty_variance(pred_rgb)

	# print('eu:', eu)
	# print('au:', au)


	#------------------------------------------------------------------------------------------------------

	def softmax(vector):
	e = np.exp(vector)
	return e / e.sum()

	def expected_calibration_error(samples, true_labels, M=5):
	# uniform binning approach with M number of bins
	bin_boundaries = np.linspace(0, 1, M + 1)
	bin_lowers = bin_boundaries[:-1]
	bin_uppers = bin_boundaries[1:]

	#samples = softmax(samples)

	# get max probability per sample i
	confidences = np.max(samples, axis=1)
	# get predictions from confidences (positional in this case)
	predicted_label = np.argmax(samples, axis=1)

	# get a boolean list of correct/false predictions
	accuracies = predicted_label==true_labels

	ece = np.zeros(1)
	for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
	# determine if sample is in bin m (between bin lower & upper)
	in_bin = np.logical_and(confidences > bin_lower.item(), confidences <= bin_upper.item())
	# can calculate the empirical probability of a sample falling into bin m: (\|Bm\|/n)
	prob_in_bin = in_bin.mean()

	if prob_in_bin.item() > 0:
	# get the accuracy of bin m: acc(Bm)
	accuracy_in_bin = accuracies[in_bin].mean()
	# get the average confidence of bin m: conf(Bm)
	avg_confidence_in_bin = confidences[in_bin].mean()
	# calculate \|acc(Bm) - conf(Bm)\| * (\|Bm\|/n) for bin m and add to the total ECE
	ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prob_in_bin
	return ece

	ece = expected_calibration_error(y_pred_probs, y_test)
	print("Expected Calibration Error:", ece)

	# xtest = X_test[0]

	# xtest = tf.convert_to_tensor([xtest])

	# # Define the FGSM attack function
	# def fgsm_attack(image, label, epsilon):
	# with tf.GradientTape() as tape:
	# tape.watch(image)
	# prediction = model(image)
	# prediction = exp_evidence(prediction) + 1
	# loss,_ = mse_loss(label, prediction, epoch_num=1, num_classes=10, annealing_step=10)
	# #loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)
	# gradient = tape.gradient(loss, image)
	# signed_grad = tf.sign(gradient)
	# adversarial_image = image + epsilon * signed_grad
	# adversarial_image = tf.clip_by_value(adversarial_image, -1, 1)
	# return adversarial_image

	# # Create the adversarial image
	# epsilon = 0.5
	# label = tf.convert_to_tensor([y_test[0]], dtype=tf.int64)
	# adversarial_image = fgsm_attack(xtest, label, epsilon)


	# # Get the model predictions for both images
	# original_pred = model(xtest)
	# adversarial_pred = model(adversarial_image)

	# l1, u1, p1 = compute_metrics(adversarial_pred, y_test[0], 0, global_step=0, annealing_step=10)

	# print('u_rgb:', u1)
	# print('p_rgb:', p1)
	#print('preds:', pred_rgb)

	# # # def plot_reliability_diagram(confidences, true_labels, M=5):
	# # # """Plots the reliability diagram for the given data."""
	# # # bin_boundaries = np.linspace(0, 1, M + 1)
	# # # bin_centers = (bin_boundaries[:-1] + bin_boundaries[1:]) / 2

	# # # # Get binned accuracy (average accuracy for each confidence bin)
	# # # binned_accuracy = np.zeros(M)
	# # # for i, bin_lower in enumerate(bin_boundaries[:-1]):
	# # # bin_upper = bin_boundaries[i + 1]
	# # # in_bin = np.logical_and(confidences >= bin_lower, confidences < bin_upper)
	# # # if in_bin.sum() > 0:
	# # # binned_accuracy[i] = true_labels[in_bin].mean()

	# # # # Perfect calibration line (y = x)
	# # # perfect_calibration = np.linspace(0, 1, M)

	# # # plt.plot(bin_centers, binned_accuracy, 'o', label='Binned Accuracy')
	# # # plt.plot(perfect_calibration, perfect_calibration, '-', label='Perfect Calibration')
	# # # plt.xlabel('Predicted Probability')
	# # # plt.ylabel('Observed Accuracy')
	# # # plt.title('Reliability Diagram')
	# # # plt.legend()
	# # # plt.grid(True)
	# # # plt.show()


	# # #plot_reliability_diagram(y_pred_probs, y_test)



	# # # def fgsm_attack(image, epsilon, data_grad):
	# # # # Collect the element-wise sign of the data gradient
	# # # sign_data_grad = tf.sign(data_grad)
	# # # # Create the perturbed image by adjusting each pixel of the input image
	# # # perturbed_image = image + epsilon * sign_data_grad
	# # # # Adding clipping to maintain [0,1] range
	# # # perturbed_image = tf.clip_by_value(perturbed_image, 0, 1)
	# # # # Return the perturbed image
	# # # return perturbed_image

	# # # # Restores the tensors to their original scale
	# # # def denorm(batch, mean=[0.1307], std=[0.3081]):
	# # # mean = tf.convert_to_tensor(mean)
	# # # std = tf.convert_to_tensor(std)

	# # # return batch * std + mean


	# # # def test(model, test_dataset, epsilon):

	# # # # Accuracy counter
	# # # correct = 0
	# # # adv_examples = []

	# # # # Loop over all examples in test set
	# # # for data, target in test_dataset:

	# # # # Send the data and label to the device
	# # # data, target = data.numpy(), target.numpy()

	# # # # Set requires_grad attribute of tensor. Important for Attack
	# # # data = tf.convert_to_tensor(data, dtype=tf.float32)
	# # # with tf.GradientTape() as tape:
	# # # tape.watch(data)
	# # # # Forward pass the data through the model
	# # # output = model(data)
	# # # init_pred = tf.argmax(output, axis=1, output_type=tf.int32)

	# # # # If the initial prediction is wrong, don't bother attacking, just move on
	# # # if not np.array_equal(init_pred.numpy(), target):
	# # # continue

	# # # # Calculate the loss
	# # # loss, _, _ = compute_metrics(outputs, target, epoch=1, global_step=0, annealing_step=10)

	# # # # Calculate gradients of model in backward pass
	# # # data_grad = tape.gradient(loss, data)

	# # # # Call FGSM Attack
	# # # perturbed_data = fgsm_attack(data, epsilon, data_grad)

	# # # # Re-classify the perturbed image
	# # # output = model(perturbed_data)

	# # # # Check for success
	# # # final_pred = tf.argmax(output, axis=1, output_type=tf.int32)
	# # # if np.array_equal(final_pred.numpy(), target):
	# # # correct += 1
	# # # # Special case for saving 0 epsilon examples
	# # # if epsilon == 0 and len(adv_examples) < 5:
	# # # adv_examples.append((init_pred.numpy()[0], final_pred.numpy()[0], perturbed_data.numpy()))
	# # # else:
	# # # # Save some adv examples for visualization later
	# # # if len(adv_examples) < 5:
	# # # adv_examples.append((init_pred.numpy()[0], final_pred.numpy()[0], perturbed_data.numpy()))

	# # # # Calculate final accuracy for this epsilon
	# # # final_acc = correct / float(len(test_dataset))
	# # # print(f"Epsilon: {epsilon}\tTest Accuracy = {correct} / {len(test_dataset)} = {final_acc}")

	# # # # Return the accuracy and adversarial examples
	# # # return final_acc, adv_examples


	# # # accuracies = []
	# # # examples = []
	# # # epsilons = [0,0.05, 0.1, 0.15,0.2,0.25,0.3]

	# # # # Run test for each epsilon
	# # # for eps in epsilons:
	# # # acc, ex = test(model, test_dataset, eps)
	# # # accuracies.append(acc)
	# # # examples.append(ex)


	# # # import matplotlib.pyplot as plt

	# # # # Plot accuracy vs epsilon
	# # # plt.figure(figsize=(5,5))
	# # # plt.plot(epsilons, accuracies, "*-")
	# # # plt.yticks(np.arange(0, 1.1, step=0.1))
	# # # plt.xticks(np.arange(0, .35, step=0.05))
	# # # plt.title("Accuracy vs Epsilon")
	# # # plt.xlabel("Epsilon")
	# # # plt.ylabel("Accuracy")
	# # # plt.grid(True)
	# # # plt.show()

	# # # # Save the plot as a PNG file
	# # # plt.savefig('accuracy_vs_epsilon.png')