# ============================================================================= # Project: WHITE-BOX-CARTOONIZATION # Authors: Amey Thakur & Mega Satish # Date: 2021-08-28 # Repository: https://github.com/Amey-Thakur/WHITE-BOX-CARTOONIZATION # Profiles: https://github.com/Amey-Thakur | https://github.com/msatmod # ============================================================================= """ network.py ============================================================================= This file defines the Neural Network architecture used for cartoonization. It uses a "U-Net" based Generator with Residual Blocks. Key Components: 1. Convolutional Layers: To extract features (edges, textures) from the image. 2. Leaky ReLU: Activation function to introduce non-linearity. 3. Residual Blocks (ResBlock): To help the network learn complex transformations without losing original details. 4. U-Net Structure: Downsamples the image to understand global context, then upsamples it back to original size. ============================================================================= """ import tensorflow as tf import numpy as np try: import tf_slim as slim except ImportError: try: import tensorflow.contrib.slim as slim except ImportError: print("Error: Could not import slim. Please install tf-slim.") def resblock(inputs, out_channel=32, name='resblock'): """ Defines a Residual Block. Input -> [Conv -> ReLU -> Conv] + Input -> Output This "skip connection" (+ Input) prevents the gradient from vanishing and allows the network to learn "residuals" (changes) rather than constructing from scratch. """ with tf.compat.v1.variable_scope(name): # First Convolution x = slim.convolution2d(inputs, out_channel, [3, 3], activation_fn=None, scope='conv1') x = tf.nn.leaky_relu(x) # Second Convolution x = slim.convolution2d(x, out_channel, [3, 3], activation_fn=None, scope='conv2') # Add the original input back to the result (Skip Connection) return x + inputs def unet_generator(inputs, channel=32, num_blocks=4, name='generator', reuse=False): """ Defines the Generator Network. Structure: Encoder -> Bottleneck (ResBlocks) -> Decoder """ with tf.compat.v1.variable_scope(name, reuse=reuse): # --- ENCODER (Downsampling) --- # Reduce the spatial size (Height/Width) but increase the depth (Channels) # Initial Convolution (7x7 kernel to capture large features) x0 = slim.convolution2d(inputs, channel, [7, 7], activation_fn=None) x0 = tf.nn.leaky_relu(x0) # Downsample 1 x1 = slim.convolution2d(x0, channel, [3, 3], stride=2, activation_fn=None) x1 = tf.nn.leaky_relu(x1) x1 = slim.convolution2d(x1, channel*2, [3, 3], activation_fn=None) x1 = tf.nn.leaky_relu(x1) # Downsample 2 x2 = slim.convolution2d(x1, channel*2, [3, 3], stride=2, activation_fn=None) x2 = tf.nn.leaky_relu(x2) x2 = slim.convolution2d(x2, channel*4, [3, 3], activation_fn=None) x2 = tf.nn.leaky_relu(x2) # --- BOTTLENECK (Processing) --- # Apply multiple Residual Blocks to process the image features (the "Cartoonizing" logic) for idx in range(num_blocks): x2 = resblock(x2, out_channel=channel*4, name='block_{}'.format(idx)) # --- DECODER (Upsampling) --- # Increase spatial size back to original resolution x2 = slim.convolution2d(x2, channel*2, [3, 3], activation_fn=None) x2 = tf.nn.leaky_relu(x2) # Upsample 1 h1, w1 = tf.shape(x2)[1], tf.shape(x2)[2] x3 = tf.compat.v1.image.resize_bilinear(x2, (h1*2, w1*2)) # Double the size x3 = slim.convolution2d(x3+x1, channel*2, [3, 3], activation_fn=None) # +x1 is a Skip Connection from Encoder x3 = tf.nn.leaky_relu(x3) x3 = slim.convolution2d(x3, channel, [3, 3], activation_fn=None) x3 = tf.nn.leaky_relu(x3) # Upsample 2 h2, w2 = tf.shape(x3)[1], tf.shape(x3)[2] x4 = tf.compat.v1.image.resize_bilinear(x3, (h2*2, w2*2)) # Double the size again x4 = slim.convolution2d(x4+x0, channel, [3, 3], activation_fn=None) # +x0 is a Skip Connection from Input x4 = tf.nn.leaky_relu(x4) # Final Convolution to produce RGB image (3 channels) x4 = slim.convolution2d(x4, 3, [7, 7], activation_fn=None) return x4 if __name__ == '__main__': pass