hari31416 commited on
Commit
44f9699
·
1 Parent(s): 4caaf22

Added files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from style_transfer import StyleTransfer
3
+ import tensorflow as tf
4
+ from tensorflow.keras import backend as K
5
+ import numpy as np
6
+
7
+
8
+ def validate_inputs(epochs, steps_per_epoch, image_frequency, alpha, beta, lr):
9
+ """Validates the inputs and converts them to the correct type"""
10
+ epochs = int(epochs)
11
+ steps_per_epoch = int(steps_per_epoch)
12
+ image_frequency = int(image_frequency)
13
+ alpha = float(alpha)
14
+ beta = float(beta)
15
+ lr = float(lr)
16
+ return epochs, steps_per_epoch, image_frequency, alpha, beta, lr
17
+
18
+
19
+ def stylize_image(
20
+ content_image_path,
21
+ style_image_path,
22
+ epochs,
23
+ steps_per_epoch,
24
+ image_frequency,
25
+ alpha,
26
+ beta,
27
+ lr,
28
+ ):
29
+ """Stylizes the image using the style and content images
30
+
31
+ Parameters
32
+ ----------
33
+ content_image_path : str
34
+ Path to the content image
35
+ style_image_path : str
36
+ Path to the style image
37
+ epochs : int, optional
38
+ Number of epochs
39
+ steps_per_epoch : int, optional
40
+ Number of steps per epoch
41
+ image_frequency : int, optional
42
+ Frequency of images to show
43
+ alpha : float, optional
44
+ Content weight
45
+ beta : float, optional
46
+ Style weight
47
+ lr : float, optional
48
+ Learning rate
49
+
50
+ Returns
51
+ -------
52
+ [PIL.Image]
53
+ List of images
54
+ """
55
+ epochs, steps_per_epoch, image_frequency, alpha, beta, lr = validate_inputs(
56
+ epochs, steps_per_epoch, image_frequency, alpha, beta, lr
57
+ )
58
+ style_transfer = StyleTransfer(
59
+ content_image_path=content_image_path,
60
+ style_image_path=style_image_path,
61
+ )
62
+ if style_transfer.model is None:
63
+ K.clear_session()
64
+ _ = style_transfer.load_model()
65
+ style_image = style_transfer.load_image(style_transfer.style_image_path)
66
+ content_image = style_transfer.load_image(style_transfer.content_image_path)
67
+
68
+ style_target = style_transfer.get_features(style_image, "style")
69
+ content_target = style_transfer.get_features(content_image, "content")
70
+
71
+ target = content_target + style_target
72
+ image = tf.cast(content_image, dtype=tf.float32)
73
+ image = tf.Variable(image)
74
+ optimizer = tf.optimizers.Adam(
75
+ tf.keras.optimizers.schedules.ExponentialDecay(
76
+ initial_learning_rate=lr, decay_steps=100, decay_rate=0.80
77
+ )
78
+ )
79
+ for epoch in range(epochs):
80
+ for step in range(steps_per_epoch):
81
+ loss = style_transfer.update_image(image, target, alpha, beta, optimizer)
82
+ display_image = style_transfer.tensor_to_image(image)
83
+ # images.append(display_image)
84
+ if (step) % image_frequency == 0:
85
+ yield np.array(display_image), epoch + 1, step + 1, loss
86
+
87
+
88
+ def main():
89
+ content_image = gr.Image(type="filepath", label="Content Image", shape=(512, 512))
90
+ style_image = gr.Image(type="filepath", label="Style Image", shape=(512, 512))
91
+ epochs = gr.Slider(minimum=1, maximum=20, label="Epochs", value=10)
92
+ steps_per_epoch = gr.Slider(
93
+ minimum=1, maximum=20, label="Steps per Epoch", value=10
94
+ )
95
+ image_frequency = gr.Slider(
96
+ minimum=1, maximum=10, label="Show Image Frequency", value=2
97
+ )
98
+ alpha = gr.Slider(minimum=0, maximum=1, label="Alpha", value=1)
99
+ beta = gr.Slider(minimum=0, maximum=1, label="Beta", value=0.1)
100
+ lr = gr.Slider(minimum=0.1, maximum=100, label="Learning Rate", value=40.0)
101
+
102
+ output_image = gr.Image(type="numpy", label="Output Image", shape=(512, 512))
103
+ current_epoch = gr.Number(label="Current Epoch")
104
+ current_step = gr.Number(label="Current Step")
105
+ current_loss = gr.Number(label="Current Loss")
106
+
107
+ inputs = [
108
+ content_image,
109
+ style_image,
110
+ epochs,
111
+ steps_per_epoch,
112
+ image_frequency,
113
+ alpha,
114
+ beta,
115
+ lr,
116
+ ]
117
+
118
+ outputs = [output_image, current_epoch, current_step, current_loss]
119
+
120
+ description = """### This is a demo of neural style transfer. Upload a content image and a style image, and see the result! You can play around with the parameters to see how they affect the result.
121
+ """
122
+
123
+ interface = gr.Interface(
124
+ fn=stylize_image,
125
+ inputs=inputs,
126
+ outputs=outputs,
127
+ title="Style Transfer",
128
+ description=description,
129
+ examples=[
130
+ [
131
+ "examples/landscape_1.jpg",
132
+ "examples/van_gogh.jpg",
133
+ 10,
134
+ 10,
135
+ 1,
136
+ 1,
137
+ 0.1,
138
+ 30.0,
139
+ ],
140
+ [
141
+ "examples/landscape_1.jpg",
142
+ "examples/picaso.jpg",
143
+ 10,
144
+ 10,
145
+ 1,
146
+ 1,
147
+ 0.1,
148
+ 30.0,
149
+ ],
150
+ [
151
+ "examples/landscape_2.jpg",
152
+ "examples/van_gogh.jpg",
153
+ 10,
154
+ 10,
155
+ 1,
156
+ 1,
157
+ 0.1,
158
+ 30.0,
159
+ ],
160
+ [
161
+ "examples/landscape_2.jpg",
162
+ "examples/picaso.jpg",
163
+ 10,
164
+ 10,
165
+ 1,
166
+ 1,
167
+ 0.1,
168
+ 30.0,
169
+ ],
170
+ ],
171
+ theme="gstaff/xkcd",
172
+ )
173
+ interface.queue().launch(server_name="0.0.0.0", server_port=7860)
174
+
175
+
176
+ main()
examples/landscape_1.jpg ADDED

Git LFS Details

  • SHA256: fb0b6b5941d82cea0265c8700f1dd49a28a74fe952f67ead5301fa0f0362d5b0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.72 MB
examples/landscape_2.jpg ADDED

Git LFS Details

  • SHA256: e9b450567310e77ddbc4c7dfc9e8b5b4bc40f00efc8c4b933e56790c60e8558e
  • Pointer size: 131 Bytes
  • Size of remote file: 180 kB
examples/picaso.jpg ADDED

Git LFS Details

  • SHA256: 4c902b413d16787450cd5235c911fb020fa2e233cae8ea88843b7ba6ec7e50c8
  • Pointer size: 131 Bytes
  • Size of remote file: 122 kB
examples/van_gogh.jpg ADDED

Git LFS Details

  • SHA256: 554c042b9ad5b1ca2c30e0035bdcaf34472a9602a4151c3224cc34196b6e92bf
  • Pointer size: 131 Bytes
  • Size of remote file: 356 kB
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tensorflow-cpu
2
+ gradio
style_transfer.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from keras import backend as K
5
+
6
+
7
+ class StyleTransfer:
8
+ """A class for neural style transfer. Uses the Inception model to extract features from the content and style images."""
9
+
10
+ content_layers = ["conv2d_88", "conv2d_91", "conv2d_92", "conv2d_85", "conv2d_93"]
11
+ style_layers = ["conv2d", "conv2d_1", "conv2d_2", "conv2d_3", "conv2d_4"]
12
+ content_and_style_layers = content_layers + style_layers
13
+
14
+ NUM_CONTENT_LAYERS = len(content_layers)
15
+ NUM_STYLE_LAYERS = len(style_layers)
16
+
17
+ def __init__(self, content_image_path, style_image_path) -> None:
18
+ """Initializes the class
19
+
20
+ Parameters
21
+ ----------
22
+ content_image_path : str
23
+ path to the content image
24
+ style_image_path : str
25
+ path to the style image
26
+
27
+ Returns
28
+ -------
29
+ None
30
+ """
31
+ self.content_image_path = content_image_path
32
+ self.style_image_path = style_image_path
33
+ self.model = None
34
+
35
+ def tensor_to_image(self, tensor):
36
+ """converts a tensor to an image"""
37
+ tensor_shape = tf.shape(tensor)
38
+ number_elem_shape = tf.shape(tensor_shape)
39
+ if number_elem_shape > 3:
40
+ assert tensor_shape[0] == 1, "There are more than one image"
41
+ tensor = tensor[0]
42
+ return tf.keras.preprocessing.image.array_to_img(tensor)
43
+
44
+ def load_image(self, path_to_img):
45
+ """loads an image as a tensor and scales it to 512 pixels"""
46
+ max_dim = 512
47
+ image = tf.io.read_file(path_to_img)
48
+ image = tf.image.decode_jpeg(image)
49
+ image = tf.image.convert_image_dtype(image, tf.float32)
50
+
51
+ shape = tf.shape(image)[:-1]
52
+ shape = tf.cast(tf.shape(image)[:-1], tf.float32)
53
+ long_dim = max(shape)
54
+ scale = max_dim / long_dim
55
+
56
+ new_shape = tf.cast(shape * scale, tf.int32)
57
+
58
+ image = tf.image.resize(image, new_shape)
59
+ image = image[tf.newaxis, :]
60
+ image = tf.image.convert_image_dtype(image, tf.uint8)
61
+
62
+ return image
63
+
64
+ def imshow(self, image, title=""):
65
+ """displays an image"""
66
+ if len(image.shape) > 3:
67
+ image = tf.squeeze(image, axis=0)
68
+
69
+ plt.imshow(image)
70
+ plt.title(title)
71
+
72
+ def show_images_with_style(self, images, titles=[]):
73
+ """displays a row of images with corresponding titles"""
74
+ if len(images) != len(titles):
75
+ return
76
+
77
+ plt.figure(figsize=(20, 12))
78
+ for idx, (image, title) in enumerate(zip(images, titles)):
79
+ plt.subplot(1, len(images), idx + 1)
80
+ plt.xticks([])
81
+ plt.yticks([])
82
+ self.imshow(image, title)
83
+ plt.show()
84
+
85
+ def preprocess_image(self, image):
86
+ """preprocesses a given image to use with Inception model"""
87
+ image = tf.cast(image, dtype=tf.float32)
88
+ image = (image / 127.5) - 1.0
89
+
90
+ return image
91
+
92
+ def display_images(self):
93
+ """displays the content and style images"""
94
+ content_image = self.load_image(self.content_image_path)
95
+ style_image = self.load_image(self.style_image_path)
96
+
97
+ self.show_images_with_style(
98
+ [content_image, style_image],
99
+ titles=[f"Content image", f"Style image"],
100
+ )
101
+
102
+ def gram_matrix(self, input_tensor):
103
+ """Calculates the gram matrix and divides by the number of locations
104
+
105
+ Parameters
106
+ ----------
107
+ input_tensor : tensor
108
+ tensor to calculate the gram matrix from
109
+
110
+ Returns
111
+ -------
112
+ tensor
113
+ gram matrix of the input tensor
114
+ """
115
+
116
+ gram = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)
117
+
118
+ input_shape = tf.shape(input_tensor)
119
+ num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
120
+ scaled_gram = gram / num_locations
121
+
122
+ return scaled_gram
123
+
124
+ def get_features(self, image, type=None):
125
+ """Returns the features of the image
126
+
127
+ Parameters
128
+ ----------
129
+ image : tensor
130
+ image to extract features from
131
+ type : str
132
+ type of features to extract. Either "style" or "content". If `None` is provided, both
133
+ content and style features are returned
134
+
135
+ Returns
136
+ -------
137
+ list
138
+ list of features of the content and style images
139
+ """
140
+ preprocessed_image = self.preprocess_image(image)
141
+ outputs = self.model(preprocessed_image)
142
+
143
+ if type == "style":
144
+ style_outputs = outputs[self.NUM_CONTENT_LAYERS :]
145
+ gram_style_features = [
146
+ self.gram_matrix(style_output) for style_output in style_outputs
147
+ ]
148
+ return gram_style_features
149
+
150
+ elif type == "content":
151
+ content_outputs = outputs[: self.NUM_CONTENT_LAYERS]
152
+ return content_outputs
153
+
154
+ else:
155
+ style_outputs = outputs[self.NUM_CONTENT_LAYERS :]
156
+ content_outputs = outputs[: self.NUM_CONTENT_LAYERS]
157
+ gram_style_features = [
158
+ self.gram_matrix(style_output) for style_output in style_outputs
159
+ ]
160
+ return content_outputs + gram_style_features
161
+
162
+ def _loss(self, features, targets, type="style"):
163
+ """Returns the loss of fearure and target. This is just the mean square error.
164
+
165
+ features : list
166
+ list of features of the content and style images
167
+ target : list
168
+ list of features of the content and style images
169
+ type : str
170
+ type of loss to calculate. Either "style" or "content"
171
+ """
172
+ loss = tf.reduce_mean(tf.square(features - targets))
173
+ if type == "content":
174
+ loss = loss * 0.5
175
+ return loss
176
+
177
+ def get_loss(self, features, target, alpha, beta):
178
+ """Returns the total loss of the style and content images
179
+
180
+ Parameters
181
+ ----------
182
+ features : list
183
+ list of features of the content and style images
184
+ target : list
185
+ list of features of the content and style images
186
+ alpha : float
187
+ weight of the content loss
188
+ beta : float
189
+ weight of the style loss
190
+
191
+ Returns
192
+ -------
193
+ loss : float
194
+ total loss of the style and content images
195
+ """
196
+ style_features = features[self.NUM_CONTENT_LAYERS :]
197
+ content_features = features[: self.NUM_CONTENT_LAYERS]
198
+ style_targets = target[self.NUM_CONTENT_LAYERS :]
199
+ content_targets = target[: self.NUM_CONTENT_LAYERS]
200
+ style_loss = 0
201
+ content_loss = 0
202
+
203
+ for i in range(self.NUM_STYLE_LAYERS):
204
+ style_loss += self._loss(style_features[i], style_targets[i], type="style")
205
+ for i in range(self.NUM_CONTENT_LAYERS):
206
+ content_loss += self._loss(
207
+ content_features[i], content_targets[i], type="content"
208
+ )
209
+
210
+ style_loss = beta * style_loss / self.NUM_STYLE_LAYERS
211
+ content_loss = alpha * content_loss / self.NUM_CONTENT_LAYERS
212
+ loss = content_loss + style_loss
213
+ return loss
214
+
215
+ def calculate_gradients(self, image, target, alpha, beta):
216
+ """Calculates the gradients of the loss with respect to the image"""
217
+ with tf.GradientTape() as tape:
218
+ features = self.get_features(image, "all")
219
+ loss = self.get_loss(features, target, alpha, beta)
220
+ gradients = tape.gradient(loss, image)
221
+ return gradients, loss
222
+
223
+ def update_image(self, image, target, alpha, beta, optimizer):
224
+ """Updates the image by calculating the gradients and applying them to the image"""
225
+ gradients, loss = self.calculate_gradients(image, target, alpha, beta)
226
+ optimizer.apply_gradients([(gradients, image)])
227
+ image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0))
228
+ return loss
229
+
230
+ def load_model(self):
231
+ """Creates a inception model that returns a list of intermediate output values"""
232
+ K.clear_session()
233
+ inception = tf.keras.applications.InceptionV3(
234
+ include_top=False, weights="imagenet"
235
+ )
236
+ inception.trainable = False
237
+ output_layers = self.content_and_style_layers
238
+
239
+ model = tf.keras.models.Model(
240
+ [inception.input],
241
+ [inception.get_layer(name).output for name in output_layers],
242
+ )
243
+ self.model = model
244
+ return model
245
+
246
+ def stylize_image(
247
+ self,
248
+ alpha=1,
249
+ beta=0.1,
250
+ epochs=10,
251
+ steps_per_epoch=10,
252
+ show_images=True,
253
+ image_frequency=2,
254
+ notebook=False,
255
+ lr=None,
256
+ ):
257
+ """Stylizes the image using the style and content images
258
+
259
+ Parameters
260
+ ----------
261
+ alpha : float, optional
262
+ Content weight, by default 1
263
+ beta : float, optional
264
+ Style weight, by default 0.1
265
+ epochs : int, optional
266
+ Number of epochs, by default 10
267
+ steps_per_epoch : int, optional
268
+ Number of steps per epoch, by default 10
269
+ show_images : bool, optional
270
+ Show images, by default True
271
+ image_frequency : int, optional
272
+ Frequency of images to show, by default 2
273
+ notebook : bool, optional
274
+ If the code is running on a notebook, by default False
275
+ lr : float, optional
276
+ Learning rate, by default None
277
+
278
+ Returns
279
+ -------
280
+ [PIL.Image]
281
+ List of images
282
+ """
283
+ if self.model is None:
284
+ K.clear_session()
285
+ _ = self.load_model()
286
+ style_image = self.load_image(self.style_image_path)
287
+ content_image = self.load_image(self.content_image_path)
288
+
289
+ style_target = self.get_features(style_image, "style")
290
+ content_target = self.get_features(content_image, "content")
291
+
292
+ target = content_target + style_target
293
+ image = tf.cast(content_image, dtype=tf.float32)
294
+ image = tf.Variable(image)
295
+ # images = []
296
+ if lr is None:
297
+ lr = 40.0
298
+ optimizer = tf.optimizers.Adam(
299
+ tf.keras.optimizers.schedules.ExponentialDecay(
300
+ initial_learning_rate=lr, decay_steps=100, decay_rate=0.80
301
+ )
302
+ )
303
+ img = None
304
+ for epoch in range(epochs):
305
+ for step in range(steps_per_epoch):
306
+ loss = self.update_image(image, target, alpha, beta, optimizer)
307
+ display_image = self.tensor_to_image(image)
308
+ # images.append(display_image)
309
+ if show_images:
310
+ if (step) % image_frequency == 0:
311
+ # save the display_image
312
+ display_image.save(f".img.jpg")
313
+ # if notebook:
314
+ # display_image = self.tensor_to_image(image)
315
+ # display_fn(
316
+ # display_image,
317
+ # clear=True,
318
+ # )
319
+ # else:
320
+ # im = np.array(display_image)
321
+ # if img is None:
322
+ # img = plt.imshow(im)
323
+ # else:
324
+ # img.set_data(im)
325
+ # plt.pause(0.1)
326
+ # plt.draw()
327
+ # yield np.array(display_image)
328
+
329
+ # print(f"Epoch: {epoch+1} | Step {step+1} | Loss {loss}", end="\r")
330
+ # return images