Spaces:

SKT27182
/

NeuralStyleTransfer

Sleeping

App Files Files Community

SKT27182 commited on May 28, 2023

Commit

cbbfbb2

1 Parent(s): 75505ab

Created Neural Style Tranfer from scratch

Browse files

Files changed (11) hide show

.gitattributes +1 -0
Dockerfile +19 -0
app.py +307 -0
examples/content_1.jpg +3 -0
examples/content_2.jpg +3 -0
examples/content_3.jpg +3 -0
examples/style_1.jpg +3 -0
examples/style_2.jpg +3 -0
examples/style_3.jpg +3 -0
model.py +279 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import gradio as gr
+from model import NeuralStyleTransfer
+import tensorflow as tf
+from keras import backend as K
+import numpy as np
+def change_dtype_inputs(
+    n_style_layers,
+    n_content_layers,
+    epochs,
+    learning_rate,
+    steps_per_epoch,
+    style_weight,
+    content_weight,
+    var_weight,
+):
+    return (
+        int(n_style_layers),
+        int(n_content_layers),
+        int(epochs),
+        float(learning_rate),
+        int(steps_per_epoch),
+        float(style_weight),
+        float(content_weight),
+        float(var_weight),
+    )
+def fit_style_transfer(
+    style_image,
+    content_image,
+    extractor="inception_v3",
+    n_style_layers=2,
+    n_content_layers=3,
+    epochs=4,
+    learning_rate=60.0,
+    steps_per_epoch=100,
+    style_weight=0.3,
+    content_weight=0.5,
+    var_weight=1e-12,
+):
+    """
+    Fit the style transfer model to the content and style images.
+    Parameters
+    ----------
+    style_image: str
+        The path to the style image.
+    content_image: str
+        The path to the content image.
+    extractor: str
+        The name of the feature extractor to use. Options are
+        "inception_v3", "vgg19", "resnet50", and "mobilenet_v2".
+    n_style_layers: int
+        The number of layers to use for the style loss.
+    n_content_layers: int
+        The number of layers to use for the content loss.
+    epochs: int
+        The number of epochs to train the model for.
+    learning_rate: float
+        The learning rate to use for the Adam optimizer.
+    steps_per_epoch: int
+        The number of steps to take per epoch.
+    style_weight: float
+        The weight to use for the style loss.
+    content_weight: float
+        The weight to use for the content loss.
+    var_weight: float
+        The weight to use for the total variation loss.
+    Returns
+    -------
+    display_image: np.array
+    """
+    (
+        n_style_layers,
+        n_content_layers,
+        epochs,
+        learning_rate,
+        steps_per_epoch,
+        style_weight,
+        content_weight,
+        var_weight,
+    ) = change_dtype_inputs(
+        n_style_layers,
+        n_content_layers,
+        epochs,
+        learning_rate,
+        steps_per_epoch,
+        style_weight,
+        content_weight,
+        var_weight,
+    )
+    model = NeuralStyleTransfer(
+        style_image=style_image,
+        content_image=content_image,
+        extractor=extractor,
+        n_style_layers=n_style_layers,
+        n_content_layers=n_content_layers,
+    )
+    style_image = model.style_image
+    content_image = model.content_image
+    content_and_style_layers = model.get_output_layers()
+    # build the model with the layers we need to extract the features from
+    K.clear_session()
+    model.build(content_and_style_layers)
+    style_features = model.get_features(style_image, type="style")
+    content_features = model.get_features(content_image, type="content")
+    optimizer = tf.optimizers.Adam(
+        tf.keras.optimizers.schedules.ExponentialDecay(
+            initial_learning_rate=learning_rate, decay_steps=100, decay_rate=0.80
+        )
+    )
+    generated_image = tf.cast(content_image, tf.float32)
+    generated_image = tf.Variable(generated_image)
+    step = 0
+    for epoch in range(epochs):
+        for step in range(steps_per_epoch):
+            losses = model._update_image_with_style(
+                generated_image,
+                style_features,
+                content_features,
+                style_weight,
+                content_weight,
+                optimizer,
+                var_weight,
+            )
+            display_image = model.tensor_to_image(generated_image)
+            step += 1
+            style_loss, content_loss, var_loss = losses
+            yield np.array(display_image), style_loss, content_loss, var_loss, epoch, step
+def main():
+    content_image = gr.Image(type="filepath", label="Content Image", shape=(512, 512))
+    style_image = gr.Image(type="filepath", label="Style Image", shape=(512, 512))
+    extractor = gr.Dropdown(
+        ["inception_v3", "vgg19", "resnet50", "mobilenet_v2"],
+        label="Feature Extractor",
+        value="inception_v3",
+    )
+    n_content_layers = gr.Slider(
+        1,
+        5,
+        value=3,
+        step=1,
+        label="Content Layers",
+    )
+    n_style_layers = gr.Slider(
+        1,
+        5,
+        value=2,
+        step=1,
+        label="Style Layers",
+    )
+    epochs = gr.Slider(2, 20, value=4, step=1, label="Epochs")
+    learning_rate = gr.Slider(1, 100, value=60, step=1, label="Learning Rate")
+    steps_per_epoch = gr.Slider(
+        1,
+        100,
+        value=80,
+        step=1,
+        label="Steps Per Epoch",
+    )
+    style_weight = gr.Slider(
+        1e-4,
+        0.5,
+        value=0.3,
+        step=1e-4,
+        label="Style Weight",
+    )
+    content_weight = gr.Slider(
+        1e-3,
+        0.5,
+        value=0.5,
+        step=1e-4,
+        label="Content Weight",
+    )
+    var_weight = gr.Slider(
+        0,
+        1e-5,
+        value=1e-7,
+        step=1e-12,
+        label="Total Variation Weight",
+    )
+    inputs = [
+        style_image,
+        content_image,
+        extractor,
+        n_style_layers,
+        n_content_layers,
+        epochs,
+        learning_rate,
+        steps_per_epoch,
+        style_weight,
+        content_weight,
+        var_weight,
+    ]
+    examples = [
+        [
+            "examples/style_1.jpg",
+            "examples/content_1.jpg",
+            "inception_v3",
+            3,
+            2,
+            4,
+            60,
+            100,
+            0.3,
+            0.5,
+            1e-8,
+        ],
+        [
+            "examples/style_2.jpg",
+            "examples/content_2.jpg",
+            "inception_v3",
+            3,
+            2,
+            4,
+            60,
+            100,
+            0.3,
+            0.5,
+            1e-5,
+        ],
+        [
+            "examples/style_3.jpg",
+            "examples/content_3.jpg",
+            "inception_v3",
+            3,
+            2,
+            4,
+            60,
+            100,
+            0.5,
+            0.3,
+            1e-10,
+        ]
+    ]
+    output_image = gr.Image(type="numpy", label="Output Image", shape=(512, 512))
+    style_loss = gr.Number(label="Current Style Loss")
+    content_loss = gr.Number(label="Current Content Loss")
+    var_loss = gr.Number(label="Current Total Variation Loss")
+    curr_epoch = gr.Number(label="Current Epoch")
+    curr_step = gr.Number(label="Current Step")
+    outputs = [output_image, style_loss, content_loss, var_loss, curr_epoch, curr_step]
+    interface = gr.Interface(
+        fn=fit_style_transfer,
+        inputs=inputs,
+        outputs=outputs,
+        examples=examples,
+    )
+    interface.queue().launch(sever_name="0.0.0.0", server_port=7860)
+main()

examples/content_1.jpg ADDED Viewed

Git LFS Details

SHA256: 86b8d00d3303cb3525db04e57f7d23853f5607ebed85561e43ed3a3f61d95176
Pointer size: 132 Bytes
Size of remote file: 2.36 MB

examples/content_2.jpg ADDED Viewed

Git LFS Details

SHA256: a04d1bb8ae37a6d6b5d7058a067d4b471d9c9461ce20d5b48fa8390b5afbfa17
Pointer size: 132 Bytes
Size of remote file: 1.96 MB

examples/content_3.jpg ADDED Viewed

Git LFS Details

SHA256: 16dfe55991e70e355ac95927d1ad9fa461c58e555d013ad84deb717a89e26da5
Pointer size: 132 Bytes
Size of remote file: 3.1 MB

examples/style_1.jpg ADDED Viewed

Git LFS Details

SHA256: 0982c578317c170d444599632c1556f2492b51fa75a2caad9012755893159c52
Pointer size: 132 Bytes
Size of remote file: 2.59 MB

examples/style_2.jpg ADDED Viewed

Git LFS Details

SHA256: d25bb1f00ca850cab0710ac98414a0de63dd9e49f9abc96ee9415cbdf7e4540a
Pointer size: 133 Bytes
Size of remote file: 14.3 MB

examples/style_3.jpg ADDED Viewed

Git LFS Details

SHA256: 749549277a70212a842011a60228ae91d17026ecac8aecc3aab90799b6eed6a2
Pointer size: 132 Bytes
Size of remote file: 4.41 MB

model.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+from keras import backend as K
+class NeuralStyleTransfer:
+    def __init__(self, style_image, content_image, extractor, n_style_layers=5, n_content_layers=5):
+        # load the model
+        if extractor == "inception_v3":
+            self.feature_extractor = tf.keras.applications.InceptionV3(
+                include_top=False, weights="imagenet"
+            )
+        elif extractor == "vgg19":
+            self.feature_extractor = tf.keras.applications.VGG19(
+                include_top=False, weights="imagenet"
+            )
+        elif extractor == "resnet50":
+            self.feature_extractor = tf.keras.applications.ResNet50(
+                include_top=False, weights="imagenet"
+            )
+        elif extractor == "mobilenet_v2":
+            self.feature_extractor = tf.keras.applications.MobileNetV2(
+                include_top=False, weights="imagenet"
+            )
+        elif isinstance(extractor, tf.keras.Model):
+            self.feature_extractor = extractor
+        else:
+            raise Exception("Features Extractor not found")
+        # freeze the model
+        self.feature_extractor.trainable = False
+        # define the style and content depth
+        self.n_style_layers = n_style_layers
+        self.n_content_layers = n_content_layers
+        self.style_image = self._load_img(style_image)
+        self.content_image = self._load_img(content_image)
+    def tensor_to_image(self, tensor):
+        """converts a tensor to an image"""
+        tensor_shape = tf.shape(tensor)
+        number_elem_shape = tf.shape(tensor_shape)
+        if number_elem_shape > 3:
+            assert tensor_shape[0] == 1
+            tensor = tensor[0]
+        return tf.keras.preprocessing.image.array_to_img(tensor)
+    def _load_img(self, image):
+        max_dim = 512
+        image = tf.io.read_file(image)
+        image = tf.image.decode_image(image)
+        image = tf.image.convert_image_dtype(image, tf.float32)
+        image = tf.image.convert_image_dtype(image, tf.float32)
+        shape = tf.shape(image)[:-1]
+        shape = tf.cast(tf.shape(image)[:-1], tf.float32)
+        long_dim = max(shape)
+        scale = max_dim / long_dim
+        new_shape = tf.cast(shape * scale, tf.int32)
+        image = tf.image.resize(image, new_shape)
+        image = image[tf.newaxis, :]
+        image = tf.image.convert_image_dtype(image, tf.uint8)
+        return image
+    def imshow(self, image, title=None):
+        """displays an image with a corresponding title"""
+        if len(image.shape) > 3:
+            image = tf.squeeze(image, axis=0)
+        plt.imshow(image)
+        if title:
+            plt.title(title)
+    def show_images_with_objects(self, images, titles=[]):
+        """displays a row of images with corresponding titles"""
+        if len(images) != len(titles):
+            return
+        plt.figure(figsize=(20, 12))
+        for idx, (image, title) in enumerate(zip(images, titles)):
+            plt.subplot(1, len(images), idx + 1)
+            plt.xticks([])
+            plt.yticks([])
+            self.imshow(image, title)
+    def _preprocess_image(self, image):
+        image = tf.cast(image, dtype=tf.float32)
+        image = (image / 127.5) - 1.0
+        return image
+    def get_output_layers(self):
+        # get all the layers which contain conv in their name
+        all_layers = [
+            layer.name
+            for layer in self.feature_extractor.layers
+            if "conv" in layer.name
+        ]
+        # define the style layers
+        style_layers = all_layers[: self.n_style_layers]
+        # define the content layers from second last layer
+        content_layers = all_layers[-2: -self.n_content_layers - 2 : -1]
+        content_and_style_layers = content_layers + style_layers
+        return content_and_style_layers
+    def build(self, layers_name):
+        output_layers = [
+            self.feature_extractor.get_layer(name).output for name in layers_name
+        ]
+        model = tf.keras.Model(self.feature_extractor.input, output_layers)
+        self.feature_extractor = model
+        return
+    def _loss(self, target_img, features_img, type):
+        """
+        Calculates the loss of the style transfer
+        target_img:
+            the target image (style or content) features
+        features_img:
+            the generated image features (style or content)
+        """
+        loss = tf.reduce_mean(tf.square(features_img - target_img))
+        if type == "content":
+            return 0.5 * loss
+        return loss
+    def _gram_matrix(self, input_tensor):
+        """
+        Calculates the gram matrix and divides by the number of locations
+        input_tensor:
+            the output of the conv layer of the style image, shape = (batch_size, height, width, channels)
+        """
+        result = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)
+        input_shape = tf.shape(input_tensor)
+        num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
+        return result / (num_locations)
+    def get_features(self, image, type):
+        preprocess_image = self._preprocess_image(image)
+        outputs = self.feature_extractor(preprocess_image)
+        if type == "style":
+            outputs = outputs[self.n_content_layers : ]
+            features = [self._gram_matrix(style_output) for style_output in outputs]
+        elif type == "content":
+            features = outputs[ : self.n_content_layers]
+        return features
+    def _style_content_loss(
+        self,
+        style_targets,
+        style_outputs,
+        content_targets,
+        content_outputs,
+        style_weight,
+        content_weight,
+    ):
+        """
+        Calculates the total loss of the style transfer
+        style_targets:
+            the style features of the style image
+        style_outputs:
+            the style features of the generated image
+        content_targets:
+            the content features of the content image
+        content_outputs:
+            the content features of the generated image
+        style_weight:
+            the weight of the style loss
+        content_weight:
+            the weight of the content loss
+        """
+        # adding the loss of each layer
+        style_loss = style_weight * tf.add_n(
+            [
+                self._loss(style_target, style_output, type="style")
+                for style_target, style_output in zip(style_targets, style_outputs)
+            ]
+        )
+        content_loss = content_weight * tf.add_n(
+            [
+                self._loss(content_target, content_output, type="content")
+                for content_target, content_output in zip(
+                    content_targets, content_outputs
+                )
+            ]
+        )
+        total_loss = style_loss + content_loss
+        return total_loss, style_loss, content_loss
+    def _grad_loss(
+        self,
+        generated_image,
+        style_target,
+        content_target,
+        style_weight,
+        content_weight,
+        var_weight,
+    ):
+        """
+        Calculates the gradients of the loss function with respect to the generated image
+        generated_image:
+            the generated image
+        """
+        with tf.GradientTape() as tape:
+            style_features = self.get_features(generated_image, type="style")
+            content_features = self.get_features(generated_image, type="content")
+            loss, style_loss, content_loss = self._style_content_loss(
+                style_target,
+                style_features,
+                content_target,
+                content_features,
+                style_weight,
+                content_weight,
+            )
+            variational_loss= var_weight*tf.image.total_variation(generated_image)
+            loss += variational_loss
+        grads = tape.gradient(loss, generated_image)
+        return grads, loss, [style_loss, content_loss, variational_loss]
+    def _update_image_with_style(
+        self,
+        generated_image,
+        style_target,
+        content_target,
+        style_weight,
+        content_weight,
+        optimizer,
+        var_weight,
+    ):
+        grads, loss, loss_list = self._grad_loss(
+            generated_image, style_target, content_target, style_weight, content_weight, var_weight
+        )
+        optimizer.apply_gradients([(grads, generated_image)])
+        generated_image.assign(
+            tf.clip_by_value(generated_image, clip_value_min=0.0, clip_value_max=255.0)
+        )
+        return loss_list

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+tensorflow-cpu
+gradio
+keras
+matplotlib
+numpy