Spaces:

hari31416
/

Style-Transfer

Sleeping

App Files Files Community

Style-Transfer / style_transfer.py

hari31416

Added files

44f9699 over 2 years ago

raw

history blame contribute delete

11.6 kB

	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt
	from keras import backend as K


	class StyleTransfer:
	"""A class for neural style transfer. Uses the Inception model to extract features from the content and style images."""

	content_layers = ["conv2d_88", "conv2d_91", "conv2d_92", "conv2d_85", "conv2d_93"]
	style_layers = ["conv2d", "conv2d_1", "conv2d_2", "conv2d_3", "conv2d_4"]
	content_and_style_layers = content_layers + style_layers

	NUM_CONTENT_LAYERS = len(content_layers)
	NUM_STYLE_LAYERS = len(style_layers)

	def __init__(self, content_image_path, style_image_path) -> None:
	"""Initializes the class

	Parameters
	----------
	content_image_path : str
	path to the content image
	style_image_path : str
	path to the style image

	Returns
	-------
	None
	"""
	self.content_image_path = content_image_path
	self.style_image_path = style_image_path
	self.model = None

	def tensor_to_image(self, tensor):
	"""converts a tensor to an image"""
	tensor_shape = tf.shape(tensor)
	number_elem_shape = tf.shape(tensor_shape)
	if number_elem_shape > 3:
	assert tensor_shape[0] == 1, "There are more than one image"
	tensor = tensor[0]
	return tf.keras.preprocessing.image.array_to_img(tensor)

	def load_image(self, path_to_img):
	"""loads an image as a tensor and scales it to 512 pixels"""
	max_dim = 512
	image = tf.io.read_file(path_to_img)
	image = tf.image.decode_jpeg(image)
	image = tf.image.convert_image_dtype(image, tf.float32)

	shape = tf.shape(image)[:-1]
	shape = tf.cast(tf.shape(image)[:-1], tf.float32)
	long_dim = max(shape)
	scale = max_dim / long_dim

	new_shape = tf.cast(shape * scale, tf.int32)

	image = tf.image.resize(image, new_shape)
	image = image[tf.newaxis, :]
	image = tf.image.convert_image_dtype(image, tf.uint8)

	return image

	def imshow(self, image, title=""):
	"""displays an image"""
	if len(image.shape) > 3:
	image = tf.squeeze(image, axis=0)

	plt.imshow(image)
	plt.title(title)

	def show_images_with_style(self, images, titles=[]):
	"""displays a row of images with corresponding titles"""
	if len(images) != len(titles):
	return

	plt.figure(figsize=(20, 12))
	for idx, (image, title) in enumerate(zip(images, titles)):
	plt.subplot(1, len(images), idx + 1)
	plt.xticks([])
	plt.yticks([])
	self.imshow(image, title)
	plt.show()

	def preprocess_image(self, image):
	"""preprocesses a given image to use with Inception model"""
	image = tf.cast(image, dtype=tf.float32)
	image = (image / 127.5) - 1.0

	return image

	def display_images(self):
	"""displays the content and style images"""
	content_image = self.load_image(self.content_image_path)
	style_image = self.load_image(self.style_image_path)

	self.show_images_with_style(
	[content_image, style_image],
	titles=[f"Content image", f"Style image"],
	)

	def gram_matrix(self, input_tensor):
	"""Calculates the gram matrix and divides by the number of locations

	Parameters
	----------
	input_tensor : tensor
	tensor to calculate the gram matrix from

	Returns
	-------
	tensor
	gram matrix of the input tensor
	"""

	gram = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)

	input_shape = tf.shape(input_tensor)
	num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
	scaled_gram = gram / num_locations

	return scaled_gram

	def get_features(self, image, type=None):
	"""Returns the features of the image

	Parameters
	----------
	image : tensor
	image to extract features from
	type : str
	type of features to extract. Either "style" or "content". If `None` is provided, both
	content and style features are returned

	Returns
	-------
	list
	list of features of the content and style images
	"""
	preprocessed_image = self.preprocess_image(image)
	outputs = self.model(preprocessed_image)

	if type == "style":
	style_outputs = outputs[self.NUM_CONTENT_LAYERS :]
	gram_style_features = [
	self.gram_matrix(style_output) for style_output in style_outputs
	]
	return gram_style_features

	elif type == "content":
	content_outputs = outputs[: self.NUM_CONTENT_LAYERS]
	return content_outputs

	else:
	style_outputs = outputs[self.NUM_CONTENT_LAYERS :]
	content_outputs = outputs[: self.NUM_CONTENT_LAYERS]
	gram_style_features = [
	self.gram_matrix(style_output) for style_output in style_outputs
	]
	return content_outputs + gram_style_features

	def _loss(self, features, targets, type="style"):
	"""Returns the loss of fearure and target. This is just the mean square error.

	features : list
	list of features of the content and style images
	target : list
	list of features of the content and style images
	type : str
	type of loss to calculate. Either "style" or "content"
	"""
	loss = tf.reduce_mean(tf.square(features - targets))
	if type == "content":
	loss = loss * 0.5
	return loss

	def get_loss(self, features, target, alpha, beta):
	"""Returns the total loss of the style and content images

	Parameters
	----------
	features : list
	list of features of the content and style images
	target : list
	list of features of the content and style images
	alpha : float
	weight of the content loss
	beta : float
	weight of the style loss

	Returns
	-------
	loss : float
	total loss of the style and content images
	"""
	style_features = features[self.NUM_CONTENT_LAYERS :]
	content_features = features[: self.NUM_CONTENT_LAYERS]
	style_targets = target[self.NUM_CONTENT_LAYERS :]
	content_targets = target[: self.NUM_CONTENT_LAYERS]
	style_loss = 0
	content_loss = 0

	for i in range(self.NUM_STYLE_LAYERS):
	style_loss += self._loss(style_features[i], style_targets[i], type="style")
	for i in range(self.NUM_CONTENT_LAYERS):
	content_loss += self._loss(
	content_features[i], content_targets[i], type="content"
	)

	style_loss = beta * style_loss / self.NUM_STYLE_LAYERS
	content_loss = alpha * content_loss / self.NUM_CONTENT_LAYERS
	loss = content_loss + style_loss
	return loss

	def calculate_gradients(self, image, target, alpha, beta):
	"""Calculates the gradients of the loss with respect to the image"""
	with tf.GradientTape() as tape:
	features = self.get_features(image, "all")
	loss = self.get_loss(features, target, alpha, beta)
	gradients = tape.gradient(loss, image)
	return gradients, loss

	def update_image(self, image, target, alpha, beta, optimizer):
	"""Updates the image by calculating the gradients and applying them to the image"""
	gradients, loss = self.calculate_gradients(image, target, alpha, beta)
	optimizer.apply_gradients([(gradients, image)])
	image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0))
	return loss

	def load_model(self):
	"""Creates a inception model that returns a list of intermediate output values"""
	K.clear_session()
	inception = tf.keras.applications.InceptionV3(
	include_top=False, weights="imagenet"
	)
	inception.trainable = False
	output_layers = self.content_and_style_layers

	model = tf.keras.models.Model(
	[inception.input],
	[inception.get_layer(name).output for name in output_layers],
	)
	self.model = model
	return model

	def stylize_image(
	self,
	alpha=1,
	beta=0.1,
	epochs=10,
	steps_per_epoch=10,
	show_images=True,
	image_frequency=2,
	notebook=False,
	lr=None,
	):
	"""Stylizes the image using the style and content images

	Parameters
	----------
	alpha : float, optional
	Content weight, by default 1
	beta : float, optional
	Style weight, by default 0.1
	epochs : int, optional
	Number of epochs, by default 10
	steps_per_epoch : int, optional
	Number of steps per epoch, by default 10
	show_images : bool, optional
	Show images, by default True
	image_frequency : int, optional
	Frequency of images to show, by default 2
	notebook : bool, optional
	If the code is running on a notebook, by default False
	lr : float, optional
	Learning rate, by default None

	Returns
	-------
	[PIL.Image]
	List of images
	"""
	if self.model is None:
	K.clear_session()
	_ = self.load_model()
	style_image = self.load_image(self.style_image_path)
	content_image = self.load_image(self.content_image_path)

	style_target = self.get_features(style_image, "style")
	content_target = self.get_features(content_image, "content")

	target = content_target + style_target
	image = tf.cast(content_image, dtype=tf.float32)
	image = tf.Variable(image)
	# images = []
	if lr is None:
	lr = 40.0
	optimizer = tf.optimizers.Adam(
	tf.keras.optimizers.schedules.ExponentialDecay(
	initial_learning_rate=lr, decay_steps=100, decay_rate=0.80
	)
	)
	img = None
	for epoch in range(epochs):
	for step in range(steps_per_epoch):
	loss = self.update_image(image, target, alpha, beta, optimizer)
	display_image = self.tensor_to_image(image)
	# images.append(display_image)
	if show_images:
	if (step) % image_frequency == 0:
	# save the display_image
	display_image.save(f".img.jpg")
	# if notebook:
	# display_image = self.tensor_to_image(image)
	# display_fn(
	# display_image,
	# clear=True,
	# )
	# else:
	# im = np.array(display_image)
	# if img is None:
	# img = plt.imshow(im)
	# else:
	# img.set_data(im)
	# plt.pause(0.1)
	# plt.draw()
	# yield np.array(display_image)

	# print(f"Epoch: {epoch+1} \| Step {step+1} \| Loss {loss}", end="\r")
	# return images