Spaces:
Sleeping
Sleeping
pages omitted
Browse files- AI_Decoded__Yin_2024.pdf +0 -0
- app.py +2 -2
- pages/1_Main_Deck.py +0 -38
- pages/2_Artificial_Neural_Networks.py +0 -441
- pages/3_Convolutional_Neural_Networks.py +0 -197
- pages/4_Convolutional_Operation.py +0 -148
- pages/5_Convolutional_Neural_Networks_Advanced.py +0 -651
- pages/6_Autoencoders.py +0 -154
- pages/AI_Decoded__Yin_2024.pdf +0 -0
AI_Decoded__Yin_2024.pdf
DELETED
|
Binary file (633 kB)
|
|
|
app.py
CHANGED
|
@@ -15,7 +15,7 @@ st.markdown(
|
|
| 15 |
Streamlit is an open-source app framework built specifically for
|
| 16 |
Machine Learning and Data Science projects.
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
"""
|
| 21 |
)
|
|
|
|
| 15 |
Streamlit is an open-source app framework built specifically for
|
| 16 |
Machine Learning and Data Science projects.
|
| 17 |
|
| 18 |
+
- Go to `docs/notebooks` for access of python notebooks
|
| 19 |
+
- For more information, go [here](https://wyn-education.streamlit.app/)
|
| 20 |
"""
|
| 21 |
)
|
pages/1_Main_Deck.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import base64
|
| 3 |
-
|
| 4 |
-
def get_pdf_file_content_as_base64(file_path: str):
|
| 5 |
-
"""
|
| 6 |
-
Converts a PDF file to base64 for embedding.
|
| 7 |
-
Args:
|
| 8 |
-
file_path (str): The file path to the PDF file.
|
| 9 |
-
|
| 10 |
-
Returns:
|
| 11 |
-
str: The base64 encoded string of the PDF file.
|
| 12 |
-
"""
|
| 13 |
-
with open(file_path, "rb") as pdf_file:
|
| 14 |
-
base64_encoded_data = base64.b64encode(pdf_file.read()).decode('utf-8')
|
| 15 |
-
return base64_encoded_data
|
| 16 |
-
|
| 17 |
-
def main():
|
| 18 |
-
st.title('PDF Display in Streamlit')
|
| 19 |
-
|
| 20 |
-
# Path to the PDF file
|
| 21 |
-
pdf_path = "AI_Decoded__Yin_2024.pdf"
|
| 22 |
-
|
| 23 |
-
# Display the PDF by downloading it
|
| 24 |
-
with open(pdf_path, "rb") as file:
|
| 25 |
-
btn = st.download_button(
|
| 26 |
-
label="Download PDF",
|
| 27 |
-
data=file,
|
| 28 |
-
file_name="xyz.pdf",
|
| 29 |
-
mime="application/octet-stream"
|
| 30 |
-
)
|
| 31 |
-
|
| 32 |
-
# Display the PDF embedded in the page
|
| 33 |
-
base64_pdf = get_pdf_file_content_as_base64(pdf_path)
|
| 34 |
-
pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="700" height="1000" type="application/pdf"></iframe>'
|
| 35 |
-
st.markdown(pdf_display, unsafe_allow_html=True)
|
| 36 |
-
|
| 37 |
-
if __name__ == "__main__":
|
| 38 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/2_Artificial_Neural_Networks.py
DELETED
|
@@ -1,441 +0,0 @@
|
|
| 1 |
-
def page_for_ann():
|
| 2 |
-
import streamlit as st
|
| 3 |
-
|
| 4 |
-
st.write("# Welcome to Artificial Neural Network! 👋")
|
| 5 |
-
st.sidebar.success("Select a session from the menu of content.")
|
| 6 |
-
|
| 7 |
-
st.markdown(
|
| 8 |
-
r"""
|
| 9 |
-
# Artificial Neural Networks Overview
|
| 10 |
-
|
| 11 |
-
Artificial Neural Networks (ANNs) are a cornerstone of modern machine learning, offering powerful tools for learning representations and making predictions. This document covers key aspects of ANNs, including representation learning, forward propagation, loss functions, optimization techniques, and a practical application example using TensorFlow.
|
| 12 |
-
|
| 13 |
-
## What is Representation Learning and the Importance of Neural Networks?
|
| 14 |
-
|
| 15 |
-
Representation learning is a set of methods that allows a machine to automatically discover the representations needed for feature detection or classification from raw data. Neural networks are crucial in representation learning because they can learn complex patterns and relationships in the data. By adjusting weights and biases, neural networks transform input data into more useful representations for a specific task.
|
| 16 |
-
"""
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
video_file_ann = open("./videos/4_ann.mp4", "rb")
|
| 20 |
-
video_bytes_ann = video_file_ann.read()
|
| 21 |
-
st.video(video_bytes_ann)
|
| 22 |
-
|
| 23 |
-
st.markdown(
|
| 24 |
-
r"""
|
| 25 |
-
## Forward Propagation in Neural Networks
|
| 26 |
-
|
| 27 |
-
Forward propagation is the process of calculating the output of a neural network by applying a set of weights to the input data and using activation functions in the network layers.
|
| 28 |
-
"""
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
-
st.image(
|
| 32 |
-
"https://miro.medium.com/v2/resize:fit:640/1*sPg-0hha7o3iNPjY4n-vow.jpeg",
|
| 33 |
-
caption="Visualization of One Neuron (a.k.a. 'unit' or 'node' in the community) in an Artificial Neural Network (ANN)",
|
| 34 |
-
use_column_width=True,
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
st.markdown(
|
| 38 |
-
r"""
|
| 39 |
-
### Fully Connected Dense Layers
|
| 40 |
-
|
| 41 |
-
In a fully connected dense layer, each neuron is connected to every neuron in the previous layer. The output of each neuron is a weighted sum of its inputs, passed through an activation function.
|
| 42 |
-
|
| 43 |
-
### Activation Functions
|
| 44 |
-
|
| 45 |
-
- **ReLU (Rectified Linear Unit)**: ReLU is defined as $ f(x) = \max(0, x) $. It introduces non-linearity in the model, allowing the network to learn complex patterns. In the context of linear regression, ReLU can be seen as an extension that allows for more complex, non-linear relationships.
|
| 46 |
-
|
| 47 |
-
- **Sigmoid**: The sigmoid function outputs values between 0 and 1, making it suitable for binary classification. It is defined as $ f(x) = \frac{1}{1 + e^{-x}} $.
|
| 48 |
-
|
| 49 |
-
- **Softmax**: Used in multi-class classification, softmax converts a vector of values into a probability distribution. For a vector $ \mathbf{z} $, softmax is defined as $ \sigma(\mathbf{z})_i = \frac{e^{z_i}}{\sum_{j=1}^{K} e^{z_j}} $ for $ i = 1, ..., K $ and $ K $ being the number of classes.
|
| 50 |
-
"""
|
| 51 |
-
)
|
| 52 |
-
|
| 53 |
-
video_file_ann_forwardpropagation = open(
|
| 54 |
-
"./videos/4_ann_forwardpropagation.mp4", "rb"
|
| 55 |
-
)
|
| 56 |
-
video_bytes_ann_forwardpropagation = video_file_ann_forwardpropagation.read()
|
| 57 |
-
st.video(video_bytes_ann_forwardpropagation)
|
| 58 |
-
|
| 59 |
-
st.markdown(
|
| 60 |
-
r"""
|
| 61 |
-
## Loss Functions
|
| 62 |
-
|
| 63 |
-
Loss functions measure how well the model performs on the given data.
|
| 64 |
-
|
| 65 |
-
- **Mean Square Error (MSE)**: Used for regression tasks, it's defined as $ MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y_i})^2 $, where $ y_i $ is the true value, $ \hat{y_i} $ is the predicted value, and $ n $ is the number of samples.
|
| 66 |
-
|
| 67 |
-
- **Binary Cross-Entropy**: Used for binary classification, it's defined as $ -\frac{1}{n} \sum_{i=1}^{n} [y_i \log(\hat{y_i}) + (1 - y_i) \log(1 - \hat{y_i})] $, where $ y_i $ is the true label, and $ \hat{y_i} $ is the predicted probability.
|
| 68 |
-
|
| 69 |
-
## Optimization and Gradient Descent
|
| 70 |
-
|
| 71 |
-
Optimization in neural networks is about finding the best set of weights and biases to minimize the loss function.
|
| 72 |
-
|
| 73 |
-
- **Gradient Descent**: This is an optimization algorithm used to minimize the loss function by iteratively moving in the direction of steepest descent. The update rule is $ \theta = \theta - \alpha \nabla_\theta J(\theta) $, where $ \theta $ represents parameters, $ \alpha $ is the learning rate, and $ \nabla_\theta J(\theta) $ is the gradient of the loss function.
|
| 74 |
-
"""
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
video_file_ann_loss = open("./videos/4_ann_loss.mp4", "rb")
|
| 78 |
-
video_bytes_ann_loss = video_file_ann_loss.read()
|
| 79 |
-
st.video(video_bytes_ann_loss)
|
| 80 |
-
|
| 81 |
-
st.markdown(
|
| 82 |
-
r"""
|
| 83 |
-
## Practical Application: 3-Layer Neural Network in TensorFlow
|
| 84 |
-
|
| 85 |
-
Here's an example of implementing a 3-layer neural network using TensorFlow:
|
| 86 |
-
|
| 87 |
-
```python
|
| 88 |
-
import tensorflow as tf
|
| 89 |
-
|
| 90 |
-
# Define the model
|
| 91 |
-
model = tf.keras.Sequential([
|
| 92 |
-
tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
|
| 93 |
-
tf.keras.layers.Dense(64, activation='relu'),
|
| 94 |
-
tf.keras.layers.Dense(num_classes, activation='softmax')
|
| 95 |
-
])
|
| 96 |
-
|
| 97 |
-
# Compile the model
|
| 98 |
-
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
|
| 99 |
-
|
| 100 |
-
# Train the model
|
| 101 |
-
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))
|
| 102 |
-
```
|
| 103 |
-
|
| 104 |
-
- The model is defined with three layers: two hidden layers with ReLU activation and an output layer with softmax activation.
|
| 105 |
-
- The model is compiled with the Adam optimizer and categorical cross-entropy loss function.
|
| 106 |
-
- `model.fit` trains the model with the training data, specifying epochs and validation data.
|
| 107 |
-
|
| 108 |
-
This TensorFlow example demonstrates the construction, compilation, and training of a basic ANN for classification tasks.
|
| 109 |
-
|
| 110 |
-
"""
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
st.markdown(
|
| 114 |
-
r"""
|
| 115 |
-
# Tensorflow Implementation on Colab
|
| 116 |
-
|
| 117 |
-
# MNIST Data
|
| 118 |
-
|
| 119 |
-
According to Yann LeCun on this [site](http://yann.lecun.com/exdb/mnist/#:~:text=It%20is%20a%20good%20database,efforts%20on%20preprocessing%20and%20formatting.), the MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image.
|
| 120 |
-
|
| 121 |
-
It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting.
|
| 122 |
-
|
| 123 |
-
## Research Question
|
| 124 |
-
|
| 125 |
-
Can AI recognize pictures?
|
| 126 |
-
|
| 127 |
-
## Agenda
|
| 128 |
-
|
| 129 |
-
This is the standrad image data for developing neural network models and convolutional neural network models.
|
| 130 |
-
|
| 131 |
-
- Neural Networks (1st meeting)
|
| 132 |
-
- Convolutional Neural Networks (2nd meeting)
|
| 133 |
-
|
| 134 |
-
One you have the basic knowledge, there two tasks:
|
| 135 |
-
- First, you build a model to make good predictions on MNIST dataset
|
| 136 |
-
- Next, you will get a new data set that is similar and you will build advanced models to make good predictions on it. Plan: We use the MNIST Fashion dataset.
|
| 137 |
-
|
| 138 |
-
Goal: To develop state-of-the-art CNN models.
|
| 139 |
-
|
| 140 |
-
## Notes
|
| 141 |
-
|
| 142 |
-
Most of the notes are taken using markdown area. To learn more diverse editting techniques for you to take quick notes using markdown, please click [here](https://colab.research.google.com/notebooks/markdown_guide.ipynb).
|
| 143 |
-
|
| 144 |
-
An interesting visualization is [here](https://media.licdn.com/dms/image/C4D12AQFKRQOp_aXz0g/article-cover_image-shrink_600_2000/0/1577211633664?e=2147483647&v=beta&t=pZ5TdXalXICIqPXTDBX6NP-CYlGS3wE2Kn6y1XiYjHI)
|
| 145 |
-
"""
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
st.image(
|
| 149 |
-
"https://media.licdn.com/dms/image/C4D12AQFKRQOp_aXz0g/article-cover_image-shrink_600_2000/0/1577211633664?e=2147483647&v=beta&t=pZ5TdXalXICIqPXTDBX6NP-CYlGS3wE2Kn6y1XiYjHI",
|
| 150 |
-
caption="Neural Network Architecture",
|
| 151 |
-
use_column_width=True,
|
| 152 |
-
)
|
| 153 |
-
|
| 154 |
-
st.markdown(
|
| 155 |
-
r"""
|
| 156 |
-
|
| 157 |
-
Given a picture, we can look at the pixel values. These pixel values provide information of the content/object in the image. These values are propagated into a network of neurons that are connected with lines (weights or also known as parameters) to be concluded to a final labeled category. The network architecture including width and length is completely dependent on the scientists and the data.
|
| 158 |
-
```python
|
| 159 |
-
# import library
|
| 160 |
-
import tensorflow as tf
|
| 161 |
-
|
| 162 |
-
# use tf API to get data
|
| 163 |
-
mnist = tf.keras.datasets.mnist
|
| 164 |
-
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()
|
| 165 |
-
print(training_images.shape)
|
| 166 |
-
print(training_images[0].shape)
|
| 167 |
-
|
| 168 |
-
# import
|
| 169 |
-
import os
|
| 170 |
-
import pickle
|
| 171 |
-
|
| 172 |
-
# dump: this means save the desired file into a pickle file
|
| 173 |
-
# note: a pickle file is a file ends with .pkl format
|
| 174 |
-
# b = (0, 1)
|
| 175 |
-
a = (training_images, training_labels), (test_images, test_labels)
|
| 176 |
-
with open('mnist_data.pkl', 'wb') as handle:
|
| 177 |
-
pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
| 178 |
-
|
| 179 |
-
# load: this means start loading the desired file from the pickle file
|
| 180 |
-
# note: this means that you do a with-open clause, and grab whatever
|
| 181 |
-
# inside of the .pkl file and defined it as a new object
|
| 182 |
-
# please make sure the name and the directory are correct
|
| 183 |
-
with open('mnist_data.pkl', 'rb') as handle:
|
| 184 |
-
b = pickle.load(handle)
|
| 185 |
-
|
| 186 |
-
# preview
|
| 187 |
-
type(b), len(b)
|
| 188 |
-
|
| 189 |
-
# library
|
| 190 |
-
import matplotlib.pyplot as plt
|
| 191 |
-
|
| 192 |
-
# plot one sample
|
| 193 |
-
plt.imshow(training_images[0])
|
| 194 |
-
plt.title('title: this is digit ' + str(training_labels[0]))
|
| 195 |
-
plt.colorbar()
|
| 196 |
-
|
| 197 |
-
# library
|
| 198 |
-
import numpy as np
|
| 199 |
-
|
| 200 |
-
# unique digits
|
| 201 |
-
np.unique(training_labels)
|
| 202 |
-
```
|
| 203 |
-
|
| 204 |
-
### One hot encode
|
| 205 |
-
|
| 206 |
-
We need to use *to_categorical* function to do one-hot encode. The operation of one-hot encode will check each unique level of the data and create a new binary vector for that level.
|
| 207 |
-
|
| 208 |
-
For example, the vector below has 3 levels: 1, 3, 4. Hence, for each level (for each of the value 1, 3, 4), there will be a new vector created that only takes values 1 or 0.
|
| 209 |
-
|
| 210 |
-
$$
|
| 211 |
-
\begin{bmatrix}
|
| 212 |
-
1 \\
|
| 213 |
-
3 \\
|
| 214 |
-
1 \\
|
| 215 |
-
4 \\
|
| 216 |
-
\end{bmatrix}
|
| 217 |
-
\rightarrow_\text{one hot}
|
| 218 |
-
\begin{bmatrix}
|
| 219 |
-
1 & 0 & 0\\
|
| 220 |
-
0 & 1 & 0\\
|
| 221 |
-
1 & 0 & 0\\
|
| 222 |
-
0 & 0 & 1\\
|
| 223 |
-
\end{bmatrix}
|
| 224 |
-
$$
|
| 225 |
-
|
| 226 |
-
Source is [here](https://www.tensorflow.org/api_docs/python/tf/keras/utils/to_categorical).
|
| 227 |
-
|
| 228 |
-
```python
|
| 229 |
-
# one hot encode
|
| 230 |
-
training_labels_one_hot_encoded = tf.keras.utils.to_categorical(training_labels, num_classes=10)
|
| 231 |
-
test_labels_one_hot_encoded = tf.keras.utils.to_categorical(test_labels, num_classes=10)
|
| 232 |
-
|
| 233 |
-
# rescale
|
| 234 |
-
training_images = training_images / 255
|
| 235 |
-
test_images = test_images / 255
|
| 236 |
-
|
| 237 |
-
```
|
| 238 |
-
|
| 239 |
-
## Activation Functions
|
| 240 |
-
|
| 241 |
-
An Activation Function decides whether a neuron should be activated or not. This means that it will decide whether the neuron's input to the network is important or not in the process of prediction using simpler mathematical operations. See [here](https://miro.medium.com/max/1400/1*p_hyqAtyI8pbt2kEl6siOQ.png).
|
| 242 |
-
"""
|
| 243 |
-
)
|
| 244 |
-
|
| 245 |
-
st.image(
|
| 246 |
-
"https://miro.medium.com/max/1400/1*p_hyqAtyI8pbt2kEl6siOQ.png",
|
| 247 |
-
caption="Activation Functions",
|
| 248 |
-
use_column_width=True,
|
| 249 |
-
)
|
| 250 |
-
|
| 251 |
-
st.markdown(
|
| 252 |
-
r"""
|
| 253 |
-
Why use activation function? It is because we need to have non-linear component in each neuron.
|
| 254 |
-
|
| 255 |
-
## Softmax Function
|
| 256 |
-
|
| 257 |
-
The softmax function is used as the activation function in the output layer of neural network models that predict a multinomial probability distribution. That is, softmax is used as the activation function for multi-class classification problems where class membership is required on more than two class labels. See [here](https://miro.medium.com/max/805/1*ReYpdIZ3ZSAPb2W8cJpkBg.jpeg).
|
| 258 |
-
"""
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
-
st.image(
|
| 262 |
-
"https://miro.medium.com/max/805/1*ReYpdIZ3ZSAPb2W8cJpkBg.jpeg",
|
| 263 |
-
caption="Softmax Functions",
|
| 264 |
-
use_column_width=True,
|
| 265 |
-
)
|
| 266 |
-
|
| 267 |
-
st.markdown(
|
| 268 |
-
r"""
|
| 269 |
-
## Regularization
|
| 270 |
-
|
| 271 |
-
Regularization refers to techniques that are used to calibrate machine learning models in order to minimize the adjusted loss function and prevent overfitting or underfitting. Using Regularization, we can fit our machine learning model appropriately on a given test set and hence reduce the errors in it. See [here](https://miro.medium.com/proxy/1*mh-QVwSKbe2kYCBh9mwA0Q.png).
|
| 272 |
-
|
| 273 |
-
If you are using L1 (this is the square one on the left), you are more likely to have sparse results (this means some of your weights are zeros).
|
| 274 |
-
"""
|
| 275 |
-
)
|
| 276 |
-
|
| 277 |
-
st.image(
|
| 278 |
-
"https://miro.medium.com/proxy/1*mh-QVwSKbe2kYCBh9mwA0Q.png",
|
| 279 |
-
caption="Regularizations",
|
| 280 |
-
use_column_width=True,
|
| 281 |
-
)
|
| 282 |
-
|
| 283 |
-
st.markdown(
|
| 284 |
-
r"""
|
| 285 |
-
### Question: Is this good or bad?
|
| 286 |
-
|
| 287 |
-
It depends. If you want to avoid overfitting way more than you do, use L1. If you don't really care about it, you can use L2.
|
| 288 |
-
|
| 289 |
-
## Build a Neural Network
|
| 290 |
-
|
| 291 |
-
```python
|
| 292 |
-
# use sequential api to build model
|
| 293 |
-
model = tf.keras.models.Sequential([
|
| 294 |
-
tf.keras.layers.Flatten(), # neural network requires the input layer to be a vector instead of 2D array
|
| 295 |
-
## Your Changes Start Here ##
|
| 296 |
-
# Things to change:
|
| 297 |
-
# number of hidden layers
|
| 298 |
-
# number of neurons per hidden layer
|
| 299 |
-
# the activation functions: Please refer this website: https://keras.io/api/layers/activations/
|
| 300 |
-
# the kernel_regularizer: Please refer this website: https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/Regularizer
|
| 301 |
-
tf.keras.layers.Dense(1024, activation='relu', use_bias=True), # input units (usually starts with 128) and activation (it's a choice, usually relu)
|
| 302 |
-
tf.keras.layers.Dense(512, activation='relu', use_bias=True), # create more hidden layer if desired
|
| 303 |
-
tf.keras.layers.Dense(64, activation='relu', use_bias=True), # create more hidden layer if desired
|
| 304 |
-
# ... you can have however many you want
|
| 305 |
-
## Your Changes Ends Here ##
|
| 306 |
-
tf.keras.layers.Dense(10, activation='softmax') # output layer or end layer | you have to match the number of classes
|
| 307 |
-
])
|
| 308 |
-
```
|
| 309 |
-
|
| 310 |
-
### Loss Function: MSE
|
| 311 |
-
|
| 312 |
-
What is a loss function? A loss function helps you to identify how many mistakes the model is making. The most common one (originated from linear regression) is L2 loss or also known as the square loss. This loss function is written below
|
| 313 |
-
|
| 314 |
-
$$\mathcal{L}(Y, \hat{Y}) = \frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)^2$$
|
| 315 |
-
|
| 316 |
-
where the $Y$ is ground truth at a vector level and the $\hat{Y}$ is the educated guess or estimate from the model at a vector level. The left hand side of the formula is defined by the right hand side which means that the vectors can be expanded using sub-index $i$. This sub-index $i$ is a running index indicating which sample the function is referring to. For example, let us take a look at the baby version below.
|
| 317 |
-
|
| 318 |
-
For example, suppose we have a few data points below. Denote $y_i - \hat{y}_i$ as "diff" for short hand nickname. Then the table below walks you through how to execute the above formula mathematically.
|
| 319 |
-
|
| 320 |
-
| $i$ | $y_i$ | $\hat{y}_i$ | $\text{diff}$ | $\text{diff}^2$ |
|
| 321 |
-
| --- | --- | --- | --- | --- |
|
| 322 |
-
| 1 | 10 | 20 | -10 | 100 |
|
| 323 |
-
| 2 | 10 | 5 | 5 | 25 |
|
| 324 |
-
|
| 325 |
-
If you watch carefully, the formula is essentially $\text{something}^2$ (we read "something square" or "square of something"), this means that it is a [convex function](https://en.wikipedia.org/wiki/Convex_function). Hence, there always exists an optimal point!!!
|
| 326 |
-
|
| 327 |
-
### Loss Function: BCE
|
| 328 |
-
|
| 329 |
-
If you are training a binary classifier, chances are you are using binary cross-entropy / log loss as your loss function. Loss Function: Binary Cross-Entropy / Log Loss takes the following form
|
| 330 |
-
|
| 331 |
-
$$\mathcal{L}(y, \hat{y}) = -\frac{1}{n} \sum_{i=1}^n y_i \log(\hat{y}_i) + (1 - y_i) \log (1-\hat{y}_i)$$
|
| 332 |
-
|
| 333 |
-
where $y$ is the label and $\hat{y}$ is the predicted probability of the data point for all $n$ points.
|
| 334 |
-
|
| 335 |
-
The plot below gives us a clear picture —as the predicted probability of the true class gets closer to zero, the loss increases exponentially. See [here](https://miro.medium.com/max/836/1*T8KWtAn8FkAcsg8RsjiZ6Q.png).
|
| 336 |
-
"""
|
| 337 |
-
)
|
| 338 |
-
|
| 339 |
-
st.image(
|
| 340 |
-
"https://miro.medium.com/max/836/1*T8KWtAn8FkAcsg8RsjiZ6Q.png",
|
| 341 |
-
caption="Visualization of Cross-entropy",
|
| 342 |
-
use_column_width=True,
|
| 343 |
-
)
|
| 344 |
-
|
| 345 |
-
st.markdown(
|
| 346 |
-
r"""
|
| 347 |
-
Consider the following scenarios:
|
| 348 |
-
- If predicted probabilities $\hat{y}_i$ is small (close to 0) and truth $y_i = 0$, what happens?
|
| 349 |
-
- If predicted probabilities $\hat{y}_i$ is small (close to 0) and truth $y_i = 1$, what happens?
|
| 350 |
-
- If predicted probabilities $\hat{y}_i$ is large (close to 1) and truth $y_i = 0$, what happens?
|
| 351 |
-
- If predicted probabilities $\hat{y}_i$ is large (close to 1) and truth $y_i = 1$, what happens?
|
| 352 |
-
|
| 353 |
-
Answers:
|
| 354 |
-
- If prediction $\hat{y}_i = 0.001$ and $y_i = 0$, then the loss is $-(1-0)\log(1-0.001)=-\log(0.999) \approx 0$, i.e. error is small so the model is good
|
| 355 |
-
- If prediction $\hat{y}_i = 0.001$ and $y_i = 1$, then the loss is $-\log(0.001)=-\log(0.001) = 3$, i.e. the error is large so the model is bad
|
| 356 |
-
- If prediction $\hat{y}_i = 0.999$ and $y_i = 0$, then the loss is $-(1-0)\log(1-0.999)=-\log(0.001)=3$, i.e. the error is large so the model is bad
|
| 357 |
-
- If prediction $\hat{y}_i = 0.999$ and $y_i = 1$, then the loss is $-\log(0.999) \approx 0$, i.e. error is small so the model is good
|
| 358 |
-
|
| 359 |
-
A good paper to refer to is [here](https://www.semanticscholar.org/paper/On-Loss-Functions-for-Deep-Neural-Networks-in-Janocha-Czarnecki/d589392ab606a3d2861988ebcba95176517939ec). See [here](https://d3i71xaburhd42.cloudfront.net/d589392ab606a3d2861988ebcba95176517939ec/2-Table1-1.png).
|
| 360 |
-
|
| 361 |
-
### Optimization
|
| 362 |
-
|
| 363 |
-
An optimizer is an algorithm to search for the most optimal (some minimum value) on a loss surface. Recall the loss surface is how many mistakes the model is making, so it is intuitive that the desired point (the optimal point) is where the loss is small. The location where the loss is small is the "best" parameters we need in our model. See [here](https://miro.medium.com/max/1400/1*47skUygd3tWf3yB9A10QHg.gif).
|
| 364 |
-
"""
|
| 365 |
-
)
|
| 366 |
-
|
| 367 |
-
st.image(
|
| 368 |
-
"https://towardsdatascience.com/a-visual-explanation-of-gradient-descent-methods-momentum-adagrad-rmsprop-adam-f898b102325c",
|
| 369 |
-
caption="Visualization of Gradient Descent",
|
| 370 |
-
use_column_width=True,
|
| 371 |
-
)
|
| 372 |
-
|
| 373 |
-
st.markdown(
|
| 374 |
-
r"""
|
| 375 |
-
- Animation of gradient descent is [here](https://towardsdatascience.com/a-visual-explanation-of-gradient-descent-methods-momentum-adagrad-rmsprop-adam-f898b102325c)
|
| 376 |
-
- Other animation can be found [here](https://towardsai.net/p/machine-learning/analysis-of-learning-rate-in-gradient-descent-algorithm-using-python)
|
| 377 |
-
Today, there are many different upgrades of optimization algorithm. Please see [here](https://user-images.githubusercontent.com/11681225/49325458-fc785480-f585-11e8-8d2a-9012d6024c6e.gif).
|
| 378 |
-
|
| 379 |
-
Source of this animation is [here](https://github.com/ilguyi/optimizers.numpy).
|
| 380 |
-
"""
|
| 381 |
-
)
|
| 382 |
-
|
| 383 |
-
with st.expander("Code💻"):
|
| 384 |
-
st.markdown(
|
| 385 |
-
r"""
|
| 386 |
-
|
| 387 |
-
```python
|
| 388 |
-
# compile
|
| 389 |
-
model.compile(
|
| 390 |
-
## Your Changes Start Here ##
|
| 391 |
-
# optimizer: https://keras.io/api/optimizers/
|
| 392 |
-
# loss: https://keras.io/api/losses/
|
| 393 |
-
optimizer="adam",
|
| 394 |
-
loss="categorical_crossentropy",
|
| 395 |
-
## Your Changes Ends Here ##
|
| 396 |
-
metrics=['accuracy']
|
| 397 |
-
)
|
| 398 |
-
|
| 399 |
-
# check gpu
|
| 400 |
-
%tensorflow_version 2.x
|
| 401 |
-
import tensorflow as tf
|
| 402 |
-
device_name = tf.test.gpu_device_name()
|
| 403 |
-
if device_name != '/device:GPU:0':
|
| 404 |
-
raise SystemError('GPU device not found')
|
| 405 |
-
print('Found GPU at: {}'.format(device_name))
|
| 406 |
-
|
| 407 |
-
# callbacks
|
| 408 |
-
callbacks = [ tf.keras.callbacks.ModelCheckpoint("toy_mnist_model.h5", save_best_only=True) ]
|
| 409 |
-
|
| 410 |
-
%%time
|
| 411 |
-
# fit | we train the model here
|
| 412 |
-
# use GPU
|
| 413 |
-
with tf.device('/device:GPU:0'):
|
| 414 |
-
history = model.fit(
|
| 415 |
-
training_images, training_labels_one_hot_encoded,
|
| 416 |
-
validation_split=0.2,
|
| 417 |
-
## Your Changes Start Here ##
|
| 418 |
-
# there is a sweet spot, you don't want to overfit, but you also want to make sure your model learned something
|
| 419 |
-
epochs=10
|
| 420 |
-
## Your Changes Ends Here ##
|
| 421 |
-
)
|
| 422 |
-
|
| 423 |
-
# use CPU
|
| 424 |
-
history = model.fit(
|
| 425 |
-
training_images, # features | explanatory variables
|
| 426 |
-
training_labels_one_hot_encoded, # label <= create yourself (it's just a vector of 1's and 0's)
|
| 427 |
-
validation_split=0.2,
|
| 428 |
-
## Your Changes Start Here ##
|
| 429 |
-
# there is a sweet spot, you don't want to overfit, but you also want to make sure your model learned something
|
| 430 |
-
epochs=10,
|
| 431 |
-
## Your Changes Ends Here ##
|
| 432 |
-
callbacks=callbacks
|
| 433 |
-
)
|
| 434 |
-
|
| 435 |
-
# save model: it must be .h5 format
|
| 436 |
-
model.save('your_neuralnet_model.h5')
|
| 437 |
-
```
|
| 438 |
-
"""
|
| 439 |
-
)
|
| 440 |
-
|
| 441 |
-
page_for_ann()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/3_Convolutional_Neural_Networks.py
DELETED
|
@@ -1,197 +0,0 @@
|
|
| 1 |
-
def page_for_cnn():
|
| 2 |
-
import streamlit as st
|
| 3 |
-
|
| 4 |
-
st.write("# Welcome to Convolutional Neural Network! 👋")
|
| 5 |
-
st.sidebar.success("Select a session from the menu of content.")
|
| 6 |
-
|
| 7 |
-
st.markdown(
|
| 8 |
-
r"""
|
| 9 |
-
# Convolutional Neural Network (CNN) Overview
|
| 10 |
-
|
| 11 |
-
This document provides a comprehensive explanation of Convolutional Neural Networks (CNNs), a class of deep neural networks, widely used for analyzing visual imagery.
|
| 12 |
-
|
| 13 |
-
## Motivation for CNNs
|
| 14 |
-
|
| 15 |
-
### Difference from Tabular Data
|
| 16 |
-
Convolutional Neural Networks are specifically designed for processing data that comes in the form of multiple arrays, such as images. Unlike tabular data where each feature column can be independent, in image data, neighboring pixels are closely related, suggesting the presence of spatial hierarchy in features - a trait CNNs capitalize on.
|
| 17 |
-
|
| 18 |
-
### Advantage Over Traditional Neural Networks
|
| 19 |
-
CNNs preserve the spatial relationship between pixels by learning internal feature representations using small squares of input data. They require fewer parameters compared to fully connected networks with the same depth, making them more efficient and reducing the risk of overfitting.
|
| 20 |
-
"""
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
-
video_file_cnn = open("./videos/6_cnn.mp4", "rb")
|
| 24 |
-
video_bytes_cnn = video_file_cnn.read()
|
| 25 |
-
st.video(video_bytes_cnn)
|
| 26 |
-
|
| 27 |
-
st.markdown(
|
| 28 |
-
r"""
|
| 29 |
-
## Convolution Operation in CNNs
|
| 30 |
-
|
| 31 |
-
### Convolution with an Image and Filter
|
| 32 |
-
A convolution operation involves a filter (also known as a kernel) which is a small matrix used to apply effects like blurring, sharpening, edge detection, etc. The convolution operation is an element-wise multiplication of the filter matrix and the part of the image it is covering, followed by summing up these multiplication results into a single output pixel.
|
| 33 |
-
"""
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
st.image(
|
| 37 |
-
"https://miro.medium.com/v2/resize:fit:1052/0*jLoqqFsO-52KHTn9.gif",
|
| 38 |
-
caption="Visualization of Convolutional Operation",
|
| 39 |
-
use_column_width=True,
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
st.markdown(
|
| 43 |
-
r"""
|
| 44 |
-
To illustrate a convolution operation using element-wise matrix multiplication, let's define two matrices and perform the operation. The convolution operation is typically denoted by $ \otimes $.
|
| 45 |
-
|
| 46 |
-
Let's consider a 3x3 matrix $ A $ and a 2x2 filter $ B $, and we'll perform the convolution operation on them. For simplicity, we'll use a stride of 1 and no padding.
|
| 47 |
-
|
| 48 |
-
Matrix $ A $ (the image):
|
| 49 |
-
$$ A = \begin{bmatrix} 1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{bmatrix} $$
|
| 50 |
-
|
| 51 |
-
Filter $ B $ (the kernel):
|
| 52 |
-
$$ B = \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} $$
|
| 53 |
-
|
| 54 |
-
The convolution operation $ A \otimes B $ is calculated as follows:
|
| 55 |
-
|
| 56 |
-
1. Place the filter on the top-left corner of the matrix $ A $, and perform element-wise multiplication followed by a sum:
|
| 57 |
-
|
| 58 |
-
$$ \begin{bmatrix} 1 & 2 \\ 4 & 5 \end{bmatrix} \otimes \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} = (1 \times -1) + (2 \times 0) + (4 \times 0) + (5 \times 1) = -1 + 0 + 0 + 5 = 4 $$
|
| 59 |
-
|
| 60 |
-
2. Move the filter one stride to the right and repeat:
|
| 61 |
-
|
| 62 |
-
$$ \begin{bmatrix} 2 & 3 \\ 5 & 6 \end{bmatrix} \otimes \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} = (2 \times -1) + (3 \times 0) + (5 \times 0) + (6 \times 1) = -2 + 0 + 0 + 6 = 4 $$
|
| 63 |
-
|
| 64 |
-
3. Move the filter down one stride and repeat for the left column:
|
| 65 |
-
|
| 66 |
-
$$ \begin{bmatrix} 4 & 5 \\ 7 & 8 \end{bmatrix} \otimes \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} = (4 \times -1) + (5 \times 0) + (7 \times 0) + (8 \times 1) = -4 + 0 + 0 + 8 = 4 $$
|
| 67 |
-
|
| 68 |
-
4. Finally, move the filter one stride to the right:
|
| 69 |
-
|
| 70 |
-
$$ \begin{bmatrix} 5 & 6 \\ 8 & 9 \end{bmatrix} \otimes \begin{bmatrix} -1 & 0 \\ 0 & 1 \end{bmatrix} = (5 \times -1) + (6 \times 0) + (8 \times 0) + (9 \times 1) = -5 + 0 + 0 + 9 = 4 $$
|
| 71 |
-
|
| 72 |
-
So, the resulting matrix after the convolution operation is:
|
| 73 |
-
$$ \begin{bmatrix} 4 & 4 \\ 4 & 4 \end{bmatrix} $$
|
| 74 |
-
|
| 75 |
-
This numerical example demonstrates a basic convolution operation with a stride of 1, no padding, and element-wise multiplication followed by a sum.
|
| 76 |
-
"""
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
st.image(
|
| 80 |
-
"https://indiantechwarrior.com/wp-content/uploads/2022/05/conv3.gif",
|
| 81 |
-
caption="Visualization of Convolutional Operation (multiple filters)",
|
| 82 |
-
use_column_width=True,
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
st.markdown(
|
| 86 |
-
r"""
|
| 87 |
-
### Filter Size and Stride
|
| 88 |
-
- **Filter Size**: The size of the filter (e.g., 3x3, 5x5) determines the number of pixels in the image to consider at a time for applying the convolution operation.
|
| 89 |
-
- **Stride**: Stride defines the number of pixels by which we slide the filter across the image. A larger stride results in a smaller output size.
|
| 90 |
-
|
| 91 |
-
## Pooling Layers in CNNs
|
| 92 |
-
|
| 93 |
-
### Max Pooling and Average Pooling
|
| 94 |
-
- **Max Pooling**: This operation reduces the spatial size (width, height) of the input volume for the next convolutional layer. It takes the maximum value over a window defined by the filter size.
|
| 95 |
-
|
| 96 |
-
- **Average Pooling**: Similar to max pooling, but it takes the average of the elements in the filter window.
|
| 97 |
-
"""
|
| 98 |
-
)
|
| 99 |
-
|
| 100 |
-
st.image(
|
| 101 |
-
"https://i.stack.imgur.com/pLlwx.png",
|
| 102 |
-
caption="Visualization of Impact of Filter on Image",
|
| 103 |
-
use_column_width=True,
|
| 104 |
-
)
|
| 105 |
-
|
| 106 |
-
st.markdown(
|
| 107 |
-
r"""
|
| 108 |
-
## What are filters in image processing?
|
| 109 |
-
|
| 110 |
-
In the context of image processing (and, in general, signal processing), the kernels (also known as filters) are used to perform some specific operation on the image. For example, you can use a [Gaussian filter](https://homepages.inf.ed.ac.uk/rbf/HIPR2/gsmooth.htm) to smooth the image (including its edges).
|
| 111 |
-
|
| 112 |
-
## What are filters in CNNs?
|
| 113 |
-
|
| 114 |
-
In the context of convolutional neural networks (CNNs), the filters (or kernels) are the learnable parameters of the model.
|
| 115 |
-
|
| 116 |
-
Before training, the kernels are usually randomly initialised (so they are not usually hardcoded). During training, depending on the loss or error of the network (according to the loss function), the kernels (or filters) are updated, so that to minimise the loss (or error). After training, they are typically fixed. Incredibly, the filters learned by CNNs can be similar to the [Gabor filter](https://en.wikipedia.org/wiki/Gabor_filter) (which is thought to be related to our visual system [this paper](http://www.cns.nyu.edu/%7Etony/vns/readings/olshausen-field-1996.pdf)). See [figure 9.19 of chapter 9 (p. 365) of the Deep Learning book by Goodfellow et al.](https://www.deeplearningbook.org/contents/convnets.html).
|
| 117 |
-
|
| 118 |
-
## What are the differences and similarities?
|
| 119 |
-
|
| 120 |
-
In both contexts, the words "kernel" and "filter" are roughly synonymous, so they are often used interchangeably. Furthermore, in both cases, the kernels are related to the [convolution (or cross-correlation) operation](https://ai.stackexchange.com/a/22000/2444). More specifically, the application of a filter, which is a function $h$ to an input, which is another function $f$, is equivalent to the convolution of $f$ and $h$. In mathematics, this is often denoted by $f \otimes h = g$, where $\otimes$ is the convolution operator and $g$ is the result of the convolution operation and is often called the convolution (of $f$ and $h$) as we discussed above. In the case of image processing, $g$ is the filtered image. In the case of CNNs $g$ is often called an [activation map](https://stats.stackexchange.com/a/292064/82135).
|
| 121 |
-
|
| 122 |
-
## Convolution Operation on Color Images
|
| 123 |
-
|
| 124 |
-
Color images have three channels (RGB). The convolution operation is applied to each channel separately with the same filter, and the results are summed up to produce the final output.
|
| 125 |
-
|
| 126 |
-
## Convolution in 3D Data (e.g., MRI Scans)
|
| 127 |
-
|
| 128 |
-
In the case of 3D data, like MRI scans, the convolution operation is extended to three dimensions. The filter used for convolution is also a 3D matrix, moving along the depth of the input volume in addition to the height and width.
|
| 129 |
-
"""
|
| 130 |
-
)
|
| 131 |
-
|
| 132 |
-
st.image(
|
| 133 |
-
"https://d3i71xaburhd42.cloudfront.net/ce3e49a4abd5b3ce6b615b60b4ffe6d09cb76a90/2-Figure1-1.png",
|
| 134 |
-
caption="MRI Classification using 3D Convolutional Neural Networks (credits to https://arxiv.org/abs/2003.08818)",
|
| 135 |
-
use_column_width=True,
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
st.markdown(
|
| 139 |
-
r"""
|
| 140 |
-
## Example: LeNet Style CNN in TensorFlow
|
| 141 |
-
|
| 142 |
-
Below is an example of a LeNet-style CNN model implemented using TensorFlow:
|
| 143 |
-
|
| 144 |
-
```python
|
| 145 |
-
import tensorflow as tf
|
| 146 |
-
|
| 147 |
-
model = tf.keras.Sequential([
|
| 148 |
-
# C1 Convolutional Layer
|
| 149 |
-
tf.keras.layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(32,32,1)),
|
| 150 |
-
|
| 151 |
-
# S2 Pooling Layer
|
| 152 |
-
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
|
| 153 |
-
|
| 154 |
-
# C3 Convolutional Layer
|
| 155 |
-
tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'),
|
| 156 |
-
|
| 157 |
-
# S4 Pooling Layer
|
| 158 |
-
tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
|
| 159 |
-
|
| 160 |
-
# Flatten the CNN output to feed it with fully connected layers
|
| 161 |
-
tf.keras.layers.Flatten(),
|
| 162 |
-
|
| 163 |
-
# C5 Fully Connected Layer
|
| 164 |
-
tf.keras.layers.Dense(units=120, activation='relu'),
|
| 165 |
-
|
| 166 |
-
# F6 Fully Connected Layer
|
| 167 |
-
tf.keras.layers.Dense(units=84, activation='relu'),
|
| 168 |
-
|
| 169 |
-
# Output Layer with softmax activation
|
| 170 |
-
tf.keras.layers.Dense(units=10, activation='softmax')
|
| 171 |
-
])
|
| 172 |
-
|
| 173 |
-
# Compile the model
|
| 174 |
-
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
|
| 175 |
-
|
| 176 |
-
# Model summary
|
| 177 |
-
model.summary()
|
| 178 |
-
```
|
| 179 |
-
|
| 180 |
-
### Explanation of the Code
|
| 181 |
-
- The model starts with a `Sequential` container as it is a linear stack of layers.
|
| 182 |
-
- The first layer, `Conv2D`, performs the convolution operation with 6 filters each of size 3x3 and ReLU activation. The input shape (32,32,1) corresponds to 32x32 pixels with 1 channel (grayscale).
|
| 183 |
-
- The `MaxPooling2D` layer performs max pooling with a 2x2 window and stride of 2, reducing the spatial dimensions.
|
| 184 |
-
- Another convolutional and max pooling layer follow, with more filters but the same approach.
|
| 185 |
-
- `Flatten` is used to flatten the output from the convolutional layers to feed into fully connected layers.
|
| 186 |
-
- Two `Dense` layers follow, representing fully connected layers with ReLU activation.
|
| 187 |
-
- The final `Dense` layer has a softmax activation function, used for multi-class classification.
|
| 188 |
-
|
| 189 |
-
This model demonstrates the basic architecture of a CNN, closely resembling
|
| 190 |
-
|
| 191 |
-
the LeNet architecture, one of the earliest and most influential CNN architectures.
|
| 192 |
-
|
| 193 |
-
"""
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
page_for_cnn()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/4_Convolutional_Operation.py
DELETED
|
@@ -1,148 +0,0 @@
|
|
| 1 |
-
def page_for_convop():
|
| 2 |
-
import streamlit as st
|
| 3 |
-
|
| 4 |
-
st.write("# Welcome to Convolutional Operation! 👋")
|
| 5 |
-
st.sidebar.success("Select a session from the menu of content.")
|
| 6 |
-
|
| 7 |
-
st.markdown(
|
| 8 |
-
r"""
|
| 9 |
-
# Convolution Operation in Statistics
|
| 10 |
-
|
| 11 |
-
This document provides an in-depth explanation of the convolution operation in statistics, its application in image data, and the mathematical principles behind it.
|
| 12 |
-
"""
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
st.image(
|
| 16 |
-
"https://quincyaflint.weebly.com/uploads/2/6/5/0/26500868/convolution-1-fast_orig.gif",
|
| 17 |
-
caption="Convolutional Operation (the convolution is the product of the overlapping areas as the signal is shifted)",
|
| 18 |
-
use_column_width=True,
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
st.markdown(
|
| 22 |
-
r"""
|
| 23 |
-
## Convolution Operation: Integration and Summation
|
| 24 |
-
|
| 25 |
-
### Definition
|
| 26 |
-
In statistics and signal processing, convolution is a mathematical operation that combines two functions to produce a third function. It represents the amount of overlap between one function as it is shifted over another.
|
| 27 |
-
|
| 28 |
-
### In Continuous and Discrete Forms
|
| 29 |
-
- **Continuous Convolution**: In continuous domains, it is defined as the integral of the product of the two functions after one is reversed and shifted. For functions $ f $ and $ g $, the convolution is written as:
|
| 30 |
-
|
| 31 |
-
$$ (f * g)(t) = \int_{-\infty}^{\infty} f(\tau) g(t - \tau) d\tau $$
|
| 32 |
-
|
| 33 |
-
- **Discrete Convolution**: In discrete domains, such as in signal processing or image analysis, convolution is typically expressed as a summation:
|
| 34 |
-
|
| 35 |
-
$$ (f * g)[n] = \sum_{m=-\infty}^{\infty} f[m] g[n - m] $$
|
| 36 |
-
"""
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
-
video_file_convop = open("./videos/5_convop.mp4", "rb")
|
| 40 |
-
video_bytes_convop = video_file_convop.read()
|
| 41 |
-
st.video(video_bytes_convop)
|
| 42 |
-
|
| 43 |
-
st.markdown(
|
| 44 |
-
r"""
|
| 45 |
-
## Image Data and Its Dimensions
|
| 46 |
-
|
| 47 |
-
### Image Data Structure
|
| 48 |
-
Image data is fundamentally different from tabular data. An image can be represented as a matrix of pixel values. In terms of dimensions:
|
| 49 |
-
|
| 50 |
-
- **Width and Height**: These are the dimensions of the image in pixels.
|
| 51 |
-
- **3 in (Width, Height, 3)**: This represents the three color channels, typically Red, Green, and Blue (RGB). Each pixel has three values corresponding to the intensity of each color channel.
|
| 52 |
-
|
| 53 |
-
### Contrast with Tabular Data
|
| 54 |
-
Tabular data is structured in rows and columns, each column potentially being a different feature type (numerical, categorical, etc.), whereas image data is a consistent array of pixel intensity values across its dimensions.
|
| 55 |
-
|
| 56 |
-
## Motivation for Convolution Operation in Image Processing
|
| 57 |
-
|
| 58 |
-
Convolution in image processing is used for various purposes such as:
|
| 59 |
-
|
| 60 |
-
- **Feature Detection**: Detecting features like edges, corners, or textures.
|
| 61 |
-
- **Image Blurring and Sharpening**: Smoothing or emphasizing details in an image.
|
| 62 |
-
- **Noise Reduction**: Eliminating random variations in the image's intensity.
|
| 63 |
-
|
| 64 |
-
These operations are achieved by applying different convolution kernels (filters) over the image.
|
| 65 |
-
"""
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
video_file_convop_imagedata = open("./videos/5_convop_imagedata.mp4", "rb")
|
| 69 |
-
video_bytes_convop_imagedata = video_file_convop_imagedata.read()
|
| 70 |
-
st.video(video_bytes_convop_imagedata)
|
| 71 |
-
|
| 72 |
-
st.markdown(
|
| 73 |
-
r"""
|
| 74 |
-
## Applying Convolution Operation to 2D Data
|
| 75 |
-
|
| 76 |
-
### Convolution with Matrices
|
| 77 |
-
Convolution can be applied to 2D data like images using matrices. If $ A $ is an image matrix and $ B $ is a kernel matrix, the convolution $ C $ is computed as follows:
|
| 78 |
-
|
| 79 |
-
- For each element $ C[i, j] $ of the convolution matrix:
|
| 80 |
-
$$ C[i, j] = \sum_{u} \sum_{v} A[i + u, j + v] \cdot B[u, v] $$
|
| 81 |
-
|
| 82 |
-
- Here, $ A[i + u, j + v] $ is a pixel value from the image, and $ B[u, v] $ is a value from the kernel. The kernel is slid over the image, and at each position, the product of the overlapped values of $ A $ and $ B $ is computed and summed to produce an entry in the output matrix $ C $.
|
| 83 |
-
|
| 84 |
-
This operation effectively applies a filter to the image, transforming it in a way that depends on the properties of the kernel.
|
| 85 |
-
"""
|
| 86 |
-
)
|
| 87 |
-
|
| 88 |
-
video_file_convop_matrices = open("./videos/5_convop_matrices.mp4", "rb")
|
| 89 |
-
video_bytes_convop_matrices = video_file_convop_matrices.read()
|
| 90 |
-
st.video(video_bytes_convop_matrices)
|
| 91 |
-
|
| 92 |
-
st.markdown(
|
| 93 |
-
r"""
|
| 94 |
-
## TensorFlow Conv1D and Conv2D API Explanation
|
| 95 |
-
|
| 96 |
-
This document provides a brief explanation of TensorFlow's Conv1D and Conv2D APIs, with example code snippets to illustrate their usage.
|
| 97 |
-
|
| 98 |
-
### Conv1D in TensorFlow
|
| 99 |
-
|
| 100 |
-
`Conv1D` is used for convolutional operations on 1-dimensional data, commonly seen in time-series analysis or natural language processing.
|
| 101 |
-
|
| 102 |
-
#### Example Code:
|
| 103 |
-
|
| 104 |
-
```python
|
| 105 |
-
import tensorflow as tf
|
| 106 |
-
|
| 107 |
-
# Example of a Conv1D layer
|
| 108 |
-
model = tf.keras.Sequential([
|
| 109 |
-
tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(10, 64))
|
| 110 |
-
])
|
| 111 |
-
```
|
| 112 |
-
|
| 113 |
-
- `filters=32`: Specifies the number of output filters in the convolution.
|
| 114 |
-
- `kernel_size=3`: The length of the 1D convolution window.
|
| 115 |
-
- `activation='relu'`: The activation function used after the convolution.
|
| 116 |
-
- `input_shape=(10, 64)`: The shape of the input data (10 time steps, 64 features per step).
|
| 117 |
-
|
| 118 |
-
`Conv1D` is ideal for data where the relationship between consecutive data points is significant.
|
| 119 |
-
|
| 120 |
-
### Conv2D in TensorFlow
|
| 121 |
-
|
| 122 |
-
`Conv2D` is used for convolutional operations on 2-dimensional data, like images.
|
| 123 |
-
|
| 124 |
-
#### Example Code:
|
| 125 |
-
|
| 126 |
-
```python
|
| 127 |
-
# Example of a Conv2D layer
|
| 128 |
-
model = tf.keras.Sequential([
|
| 129 |
-
tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))
|
| 130 |
-
])
|
| 131 |
-
```
|
| 132 |
-
|
| 133 |
-
- `filters=64`: Number of output filters in the convolution.
|
| 134 |
-
- `kernel_size=(3, 3)`: The height and width of the 2D convolution window.
|
| 135 |
-
- `activation='relu'`: Activation function after the convolution.
|
| 136 |
-
- `input_shape=(28, 28, 1)`: Shape of input data (28x28 pixels, 1 color channel).
|
| 137 |
-
|
| 138 |
-
`Conv2D` is widely used in image processing tasks as it helps the model learn spatial hierarchies in images.
|
| 139 |
-
|
| 140 |
-
---
|
| 141 |
-
|
| 142 |
-
Both `Conv1D` and `Conv2D` are crucial layers in building neural networks for processing sequential data and images, respectively. They help in extracting important features from the data, which is essential for tasks like classification, recognition, and prediction.
|
| 143 |
-
|
| 144 |
-
"""
|
| 145 |
-
)
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
page_for_convop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/5_Convolutional_Neural_Networks_Advanced.py
DELETED
|
@@ -1,651 +0,0 @@
|
|
| 1 |
-
def page_for_cnnadvanced():
|
| 2 |
-
import streamlit as st
|
| 3 |
-
|
| 4 |
-
st.write("# Welcome to Advanced Convolutional Neural Networks! 👋")
|
| 5 |
-
st.sidebar.success("Select a session from the menu of content.")
|
| 6 |
-
|
| 7 |
-
st.markdown(
|
| 8 |
-
r"""
|
| 9 |
-
"""
|
| 10 |
-
)
|
| 11 |
-
|
| 12 |
-
st.markdown(
|
| 13 |
-
r"""
|
| 14 |
-
## VGG16
|
| 15 |
-
|
| 16 |
-
VGG16 is a deep convolutional neural network (CNN) architecture developed by the Visual Geometry Group at the University of Oxford. Here's a simple implementation of the VGG16 model using TensorFlow 2.x and Keras Sequential API:
|
| 17 |
-
"""
|
| 18 |
-
)
|
| 19 |
-
|
| 20 |
-
st.image(
|
| 21 |
-
"https://datagen.tech/wp-content/uploads/2022/11/image2-1.png",
|
| 22 |
-
caption="VGG16",
|
| 23 |
-
use_column_width=True,
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
with st.expander("Code💻"):
|
| 27 |
-
st.markdown(
|
| 28 |
-
r"""
|
| 29 |
-
```py
|
| 30 |
-
import tensorflow as tf
|
| 31 |
-
from tensorflow.keras import layers, models
|
| 32 |
-
|
| 33 |
-
def build_vgg16_model(input_shape=(224, 224, 3), num_classes=1000):
|
| 34 |
-
model = models.Sequential()
|
| 35 |
-
|
| 36 |
-
# Block 1: 2x Convolutional Layers + Max Pooling
|
| 37 |
-
model.add(layers.Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu', input_shape=input_shape))
|
| 38 |
-
model.add(layers.Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 39 |
-
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
|
| 40 |
-
|
| 41 |
-
# Block 2: 2x Convolutional Layers + Max Pooling
|
| 42 |
-
model.add(layers.Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 43 |
-
model.add(layers.Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 44 |
-
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
|
| 45 |
-
|
| 46 |
-
# Block 3: 3x Convolutional Layers + Max Pooling
|
| 47 |
-
model.add(layers.Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 48 |
-
model.add(layers.Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 49 |
-
model.add(layers.Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 50 |
-
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
|
| 51 |
-
|
| 52 |
-
# Block 4: 3x Convolutional Layers + Max Pooling
|
| 53 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 54 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 55 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 56 |
-
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
|
| 57 |
-
|
| 58 |
-
# Block 5: 3x Convolutional Layers + Max Pooling
|
| 59 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 60 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 61 |
-
model.add(layers.Conv2D(512, kernel_size=(3, 3), padding='same', activation='relu'))
|
| 62 |
-
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
|
| 63 |
-
|
| 64 |
-
# Flatten the output
|
| 65 |
-
model.add(layers.Flatten())
|
| 66 |
-
|
| 67 |
-
# Fully Connected Layers
|
| 68 |
-
model.add(layers.Dense(4096, activation='relu'))
|
| 69 |
-
model.add(layers.Dropout(0.5))
|
| 70 |
-
model.add(layers.Dense(4096, activation='relu'))
|
| 71 |
-
model.add(layers.Dropout(0.5))
|
| 72 |
-
|
| 73 |
-
# Output Layer
|
| 74 |
-
model.add(layers.Dense(num_classes, activation='softmax'))
|
| 75 |
-
|
| 76 |
-
return model
|
| 77 |
-
|
| 78 |
-
# Build the VGG16 model
|
| 79 |
-
vgg16_model = build_vgg16_model(input_shape=(224, 224, 3), num_classes=1000)
|
| 80 |
-
vgg16_model.summary
|
| 81 |
-
```
|
| 82 |
-
"""
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
st.markdown(
|
| 86 |
-
r"""
|
| 87 |
-
VGG16 is introduced in a paper titled "Very Deep Convolutional Networks for Large-Scale Image Recognition" by K. Simonyan and A. Zisserman.
|
| 88 |
-
|
| 89 |
-
Here is the link to the VGG16 paper:
|
| 90 |
-
|
| 91 |
-
[Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
|
| 92 |
-
"""
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
st.markdown(
|
| 96 |
-
r"""
|
| 97 |
-
## ResNet
|
| 98 |
-
|
| 99 |
-
ResNet (Residual Network) is a deep convolutional neural network (CNN) architecture that introduces residual connections to improve learning and avoid the vanishing gradient problem in deep networks. Here's a simple implementation of the ResNet-50 model using TensorFlow 2.x and Keras functional API:
|
| 100 |
-
"""
|
| 101 |
-
)
|
| 102 |
-
|
| 103 |
-
st.image(
|
| 104 |
-
"https://production-media.paperswithcode.com/methods/resnet-e1548261477164_2_mD02h5A.png",
|
| 105 |
-
caption="ResBlock",
|
| 106 |
-
use_column_width=True,
|
| 107 |
-
)
|
| 108 |
-
|
| 109 |
-
with st.expander("Code💻"):
|
| 110 |
-
st.markdown(
|
| 111 |
-
r"""
|
| 112 |
-
```py
|
| 113 |
-
import tensorflow as tf
|
| 114 |
-
from tensorflow.keras import layers, models, Input
|
| 115 |
-
|
| 116 |
-
def identity_block(input_tensor, kernel_size, filters):
|
| 117 |
-
filters1, filters2, filters3 = filters
|
| 118 |
-
|
| 119 |
-
x = layers.Conv2D(filters1, (1, 1))(input_tensor)
|
| 120 |
-
x = layers.BatchNormalization()(x)
|
| 121 |
-
x = layers.Activation('relu')(x)
|
| 122 |
-
|
| 123 |
-
x = layers.Conv2D(filters2, kernel_size, padding='same')(x)
|
| 124 |
-
x = layers.BatchNormalization()(x)
|
| 125 |
-
x = layers.Activation('relu')(x)
|
| 126 |
-
|
| 127 |
-
x = layers.Conv2D(filters3, (1, 1))(x)
|
| 128 |
-
x = layers.BatchNormalization()(x)
|
| 129 |
-
|
| 130 |
-
x = layers.add([x, input_tensor])
|
| 131 |
-
x = layers.Activation('relu')(x)
|
| 132 |
-
return x
|
| 133 |
-
|
| 134 |
-
def conv_block(input_tensor, kernel_size, filters, strides=(2, 2)):
|
| 135 |
-
filters1, filters2, filters3 = filters
|
| 136 |
-
|
| 137 |
-
x = layers.Conv2D(filters1, (1, 1), strides=strides)(input_tensor)
|
| 138 |
-
x = layers.BatchNormalization()(x)
|
| 139 |
-
x = layers.Activation('relu')(x)
|
| 140 |
-
|
| 141 |
-
x = layers.Conv2D(filters2, kernel_size, padding='same')(x)
|
| 142 |
-
x = layers.BatchNormalization()(x)
|
| 143 |
-
x = layers.Activation('relu')(x)
|
| 144 |
-
|
| 145 |
-
x = layers.Conv2D(filters3, (1, 1))(x)
|
| 146 |
-
x = layers.BatchNormalization()(x)
|
| 147 |
-
|
| 148 |
-
shortcut = layers.Conv2D(filters3, (1, 1), strides=strides)(input_tensor)
|
| 149 |
-
shortcut = layers.BatchNormalization()(shortcut)
|
| 150 |
-
|
| 151 |
-
x = layers.add([x, shortcut])
|
| 152 |
-
x = layers.Activation('relu')(x)
|
| 153 |
-
return x
|
| 154 |
-
|
| 155 |
-
def build_resnet50(input_shape=(224, 224, 3), num_classes=1000):
|
| 156 |
-
input_tensor = Input(shape=input_shape)
|
| 157 |
-
|
| 158 |
-
# Initial convolution layer
|
| 159 |
-
x = layers.ZeroPadding2D(padding=(3, 3))(input_tensor)
|
| 160 |
-
x = layers.Conv2D(64, (7, 7), strides=(2, 2))(x)
|
| 161 |
-
x = layers.BatchNormalization()(x)
|
| 162 |
-
x = layers.Activation('relu')(x)
|
| 163 |
-
x = layers.ZeroPadding2D(padding=(1, 1))(x)
|
| 164 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
|
| 165 |
-
|
| 166 |
-
# Residual blocks
|
| 167 |
-
x = conv_block(x, 3, [64, 64, 256], strides=(1, 1))
|
| 168 |
-
x = identity_block(x, 3, [64, 64, 256])
|
| 169 |
-
x = identity_block(x, 3, [64, 64, 256])
|
| 170 |
-
|
| 171 |
-
x = conv_block(x, 3, [128, 128, 512])
|
| 172 |
-
x = identity_block(x, 3, [128, 128, 512])
|
| 173 |
-
x = identity_block(x, 3, [128, 128, 512])
|
| 174 |
-
x = identity_block(x, 3, [128, 128, 512])
|
| 175 |
-
|
| 176 |
-
x = conv_block(x, 3, [256, 256, 1024])
|
| 177 |
-
x = identity_block(x, 3, [256, 256, 1024])
|
| 178 |
-
x = identity_block(x, 3, [256, 256, 1024])
|
| 179 |
-
x = identity_block(x, 3, [256, 256, 1024])
|
| 180 |
-
x = identity_block(x, 3, [256, 256, 1024])
|
| 181 |
-
|
| 182 |
-
x = conv_block(x, 3, [512, 512, 2048])
|
| 183 |
-
x = identity_block(x, 3, [512, 512, 2048])
|
| 184 |
-
x = identity_block(x, 3, [512, 512, 2048])
|
| 185 |
-
|
| 186 |
-
# Final part
|
| 187 |
-
x = layers.GlobalAveragePooling2D()(x)
|
| 188 |
-
x = layers.Dense(num_classes, activation='softmax')(x)
|
| 189 |
-
|
| 190 |
-
# Create model
|
| 191 |
-
model = Model(input_tensor, x)
|
| 192 |
-
|
| 193 |
-
return model
|
| 194 |
-
```
|
| 195 |
-
"""
|
| 196 |
-
)
|
| 197 |
-
|
| 198 |
-
st.markdown(
|
| 199 |
-
r"""
|
| 200 |
-
ResNet was introduced in the paper "Deep Residual Learning for Image Recognition" by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Here's the link to the ResNet paper: [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf)
|
| 201 |
-
|
| 202 |
-
## DenseNet
|
| 203 |
-
|
| 204 |
-
DenseNet (Densely Connected Convolutional Networks) is a deep convolutional neural network (CNN) architecture that introduces dense connections between layers to improve learning and parameter efficiency. Here's a simple implementation of the DenseNet-121 model using TensorFlow 2.x and Keras functional API:
|
| 205 |
-
"""
|
| 206 |
-
)
|
| 207 |
-
|
| 208 |
-
st.image(
|
| 209 |
-
"https://pytorch.org/assets/images/densenet1.png",
|
| 210 |
-
caption="DenseNet",
|
| 211 |
-
use_column_width=True,
|
| 212 |
-
)
|
| 213 |
-
|
| 214 |
-
with st.expander("Code💻"):
|
| 215 |
-
st.markdown(
|
| 216 |
-
r"""
|
| 217 |
-
```py
|
| 218 |
-
import tensorflow as tf
|
| 219 |
-
from tensorflow.keras import layers, models, Input
|
| 220 |
-
|
| 221 |
-
def dense_block(x, num_layers, growth_rate):
|
| 222 |
-
for _ in range(num_layers):
|
| 223 |
-
output = layers.BatchNormalization()(x)
|
| 224 |
-
output = layers.Activation('relu')(output)
|
| 225 |
-
output = layers.Conv2D(4 * growth_rate, (1, 1), padding='same', kernel_initializer='he_normal')(output)
|
| 226 |
-
output = layers.BatchNormalization()(output)
|
| 227 |
-
output = layers.Activation('relu')(output)
|
| 228 |
-
output = layers.Conv2D(growth_rate, (3, 3), padding='same', kernel_initializer='he_normal')(output)
|
| 229 |
-
x = layers.Concatenate()([x, output])
|
| 230 |
-
return x
|
| 231 |
-
|
| 232 |
-
def transition_layer(x, compression_factor):
|
| 233 |
-
num_filters = int(x.shape[-1] * compression_factor)
|
| 234 |
-
x = layers.BatchNormalization()(x)
|
| 235 |
-
x = layers.Activation('relu')(x)
|
| 236 |
-
x = layers.Conv2D(num_filters, (1, 1), padding='same', kernel_initializer='he_normal')(x)
|
| 237 |
-
x = layers.AveragePooling2D((2, 2), strides=(2, 2))(x)
|
| 238 |
-
return x
|
| 239 |
-
|
| 240 |
-
def build_densenet121(input_shape=(224, 224, 3), num_classes=1000, growth_rate=32, compression_factor=0.5):
|
| 241 |
-
input_tensor = Input(shape=input_shape)
|
| 242 |
-
|
| 243 |
-
# Initial convolution layer
|
| 244 |
-
x = layers.Conv2D(2 * growth_rate, (7, 7), strides=(2, 2), padding='same', kernel_initializer='he_normal')(input_tensor)
|
| 245 |
-
x = layers.BatchNormalization()(x)
|
| 246 |
-
x = layers.Activation('relu')(x)
|
| 247 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
|
| 248 |
-
|
| 249 |
-
# Dense blocks and transition layers
|
| 250 |
-
x = dense_block(x, 6, growth_rate)
|
| 251 |
-
x = transition_layer(x, compression_factor)
|
| 252 |
-
|
| 253 |
-
x = dense_block(x, 12, growth_rate)
|
| 254 |
-
x = transition_layer(x, compression_factor)
|
| 255 |
-
|
| 256 |
-
x = dense_block(x, 24, growth_rate)
|
| 257 |
-
x = transition_layer(x, compression_factor)
|
| 258 |
-
|
| 259 |
-
x = dense_block(x, 16, growth_rate)
|
| 260 |
-
|
| 261 |
-
# Global average pooling and output layer
|
| 262 |
-
x = layers.GlobalAveragePooling2D()(x)
|
| 263 |
-
x = layers.Dense(num_classes, activation='softmax')(x)
|
| 264 |
-
|
| 265 |
-
model = models.Model(input_tensor, x, name='densenet121')
|
| 266 |
-
return model
|
| 267 |
-
|
| 268 |
-
# Build the DenseNet-121 model
|
| 269 |
-
densenet121_model = build_densenet121(input_shape=(224, 224, 3), num_classes=1000)
|
| 270 |
-
densenet121_model.summary()
|
| 271 |
-
```
|
| 272 |
-
"""
|
| 273 |
-
)
|
| 274 |
-
|
| 275 |
-
st.markdown(
|
| 276 |
-
r"""
|
| 277 |
-
DenseNet was introduced in the paper "Densely Connected Convolutional Networks" by Gao Huang, Zhuang Liu, Laurens van der Maaten, and Kilian Q. Weinberger. Here is the link to the original paper for Densenet: [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993)
|
| 278 |
-
"""
|
| 279 |
-
)
|
| 280 |
-
|
| 281 |
-
st.markdown(
|
| 282 |
-
r"""
|
| 283 |
-
## Inception
|
| 284 |
-
|
| 285 |
-
Inception is a deep convolutional neural network (CNN) architecture that was introduced in the GoogLeNet model. It uses inception modules to efficiently learn different features at multiple scales. Here's a simple implementation of the Inception V1 (GoogLeNet) model using TensorFlow 2.x and Keras functional API:
|
| 286 |
-
"""
|
| 287 |
-
)
|
| 288 |
-
|
| 289 |
-
st.image(
|
| 290 |
-
"https://media5.datahacker.rs/2018/11/Featured-Image-017-CNN-Inception-Network-1.jpg",
|
| 291 |
-
caption="Inception",
|
| 292 |
-
use_column_width=True,
|
| 293 |
-
)
|
| 294 |
-
|
| 295 |
-
with st.expander("Code💻"):
|
| 296 |
-
st.markdown(
|
| 297 |
-
r"""
|
| 298 |
-
```py
|
| 299 |
-
import tensorflow as tf
|
| 300 |
-
from tensorflow.keras import layers, models, Input
|
| 301 |
-
|
| 302 |
-
def inception_module(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj):
|
| 303 |
-
conv_1x1 = layers.Conv2D(filters_1x1, (1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(x)
|
| 304 |
-
|
| 305 |
-
conv_3x3_reduce = layers.Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(x)
|
| 306 |
-
conv_3x3 = layers.Conv2D(filters_3x3, (3, 3), padding='same', activation='relu', kernel_initializer='he_normal')(conv_3x3_reduce)
|
| 307 |
-
|
| 308 |
-
conv_5x5_reduce = layers.Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(x)
|
| 309 |
-
conv_5x5 = layers.Conv2D(filters_5x5, (5, 5), padding='same', activation='relu', kernel_initializer='he_normal')(conv_5x5_reduce)
|
| 310 |
-
|
| 311 |
-
max_pool = layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
|
| 312 |
-
pool_proj = layers.Conv2D(filters_pool_proj, (1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(max_pool)
|
| 313 |
-
|
| 314 |
-
output = layers.Concatenate(axis=-1)([conv_1x1, conv_3x3, conv_5x5, pool_proj])
|
| 315 |
-
return output
|
| 316 |
-
|
| 317 |
-
def build_inception_v1(input_shape=(224, 224, 3), num_classes=1000):
|
| 318 |
-
input_tensor = Input(shape=input_shape)
|
| 319 |
-
|
| 320 |
-
# Initial convolution and max pooling layers
|
| 321 |
-
x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu', kernel_initializer='he_normal')(input_tensor)
|
| 322 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
|
| 323 |
-
|
| 324 |
-
# Local response normalization
|
| 325 |
-
x = layers.BatchNormalization()(x)
|
| 326 |
-
|
| 327 |
-
# Convolution and max pooling layers
|
| 328 |
-
x = layers.Conv2D(64, (1, 1), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(x)
|
| 329 |
-
x = layers.Conv2D(192, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='he_normal')(x)
|
| 330 |
-
|
| 331 |
-
# Local response normalization
|
| 332 |
-
x = layers.BatchNormalization()(x)
|
| 333 |
-
|
| 334 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
|
| 335 |
-
|
| 336 |
-
# Inception modules
|
| 337 |
-
x = inception_module(x, filters_1x1=64, filters_3x3_reduce=96, filters_3x3=128, filters_5x5_reduce=16, filters_5x5=32, filters_pool_proj=32)
|
| 338 |
-
x = inception_module(x, filters_1x1=128, filters_
|
| 339 |
-
x = inception_module(x, filters_1x1=128, filters_3x3_reduce=128, filters_3x3=192, filters_5x5_reduce=32, filters_5x5=96, filters_pool_proj=64)
|
| 340 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
|
| 341 |
-
|
| 342 |
-
x = inception_module(x, filters_1x1=192, filters_3x3_reduce=96, filters_3x3=208, filters_5x5_reduce=16, filters_5x5=48, filters_pool_proj=64)
|
| 343 |
-
x = inception_module(x, filters_1x1=160, filters_3x3_reduce=112, filters_3x3=224, filters_5x5_reduce=24, filters_5x5=64, filters_pool_proj=64)
|
| 344 |
-
x = inception_module(x, filters_1x1=128, filters_3x3_reduce=128, filters_3x3=256, filters_5x5_reduce=24, filters_5x5=64, filters_pool_proj=64)
|
| 345 |
-
x = inception_module(x, filters_1x1=112, filters_3x3_reduce=144, filters_3x3=288, filters_5x5_reduce=32, filters_5x5=64, filters_pool_proj=64)
|
| 346 |
-
x = inception_module(x, filters_1x1=256, filters_3x3_reduce=160, filters_3x3=320, filters_5x5_reduce=32, filters_5x5=128, filters_pool_proj=128)
|
| 347 |
-
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
|
| 348 |
-
|
| 349 |
-
x = inception_module(x, filters_1x1=256, filters_3x3_reduce=160, filters_3x3=320, filters_5x5_reduce=32, filters_5x5=128, filters_pool_proj=128)
|
| 350 |
-
x = inception_module(x, filters_1x1=384, filters_3x3_reduce=192, filters_3x3=384, filters_5x5_reduce=48, filters_5x5=128, filters_pool_proj=128)
|
| 351 |
-
|
| 352 |
-
# Global average pooling and output layer
|
| 353 |
-
x = layers.GlobalAveragePooling2D()(x)
|
| 354 |
-
x = layers.Dropout(0.4)(x)
|
| 355 |
-
x = layers.Dense(num_classes, activation='softmax')(x)
|
| 356 |
-
|
| 357 |
-
model = models.Model(input_tensor, x, name='inception_v1')
|
| 358 |
-
return model
|
| 359 |
-
|
| 360 |
-
# Build the Inception V1 (GoogLeNet) model
|
| 361 |
-
inception_v1_model = build_inception_v1(input_shape=(224, 224, 3), num_classes=1000)
|
| 362 |
-
inception_v1_model.summary()
|
| 363 |
-
```
|
| 364 |
-
"""
|
| 365 |
-
)
|
| 366 |
-
|
| 367 |
-
st.markdown(
|
| 368 |
-
r"""
|
| 369 |
-
The paper that introduced the Inception model is titled "Going deeper with convolutions" by Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. The paper for Inception model can be found at this link: [Going deeper with convolutions](https://arxiv.org/abs/1409.4842)
|
| 370 |
-
|
| 371 |
-
"""
|
| 372 |
-
)
|
| 373 |
-
|
| 374 |
-
st.markdown(
|
| 375 |
-
r"""
|
| 376 |
-
## Vision Transformer (ViT)
|
| 377 |
-
|
| 378 |
-
The [Vision Transformer (ViT)](https://arxiv.org/pdf/2010.11929.pdf) model represents a significant leap in the field of computer vision and was first introduced in a groundbreaking research paper at the International Conference on Learning Representations (ICLR) 2021. The paper, titled "An Image is Worth 16*16 Words: Transformers for Image Recognition at Scale," not only presented this innovative model but also laid the foundation for its widespread application in various image recognition tasks. The ViT model is a testament to the evolving landscape of neural network architectures, particularly in how they handle image data.
|
| 379 |
-
|
| 380 |
-
Unlike traditional approaches that primarily rely on convolutional neural networks (CNNs), the ViT model adopts the transformer architecture, which has been immensely successful in natural language processing. This approach treats an image as a sequence of patches, much like words in a sentence, allowing the model to interpret and analyze images in a novel way.
|
| 381 |
-
|
| 382 |
-
Key steps in the Vision Transformer architecture include:
|
| 383 |
-
1. Splitting an image into fixed-size patches.
|
| 384 |
-
2. Flattening these patches.
|
| 385 |
-
3. Transforming the flattened patches into lower-dimensional linear embeddings.
|
| 386 |
-
4. Adding positional embeddings to retain the order of the patches.
|
| 387 |
-
5. Feeding these sequences into a standard transformer encoder, similar to those used in language models.
|
| 388 |
-
|
| 389 |
-
The initial training of ViT models involves leveraging large datasets like ImageNet and ImageNet-21k. This process, known as pretraining, is fully supervised and utilizes image labels to teach the model the fundamentals of image recognition. Subsequently, the model undergoes finetuning, where it is further trained on specific downstream datasets for tasks such as image classification.
|
| 390 |
-
|
| 391 |
-
The flexibility and efficiency of Vision Transformers have led to their adoption in a variety of applications beyond mere image classification. These include object detection, image segmentation, action recognition, and even in more complex tasks like generative modeling, visual grounding, visual-question answering, and visual reasoning.
|
| 392 |
-
|
| 393 |
-
Overall, the introduction of the Vision Transformer model marks a significant advancement in the way neural networks approach visual data, offering a fresh perspective and new possibilities in the field of computer vision.
|
| 394 |
-
"""
|
| 395 |
-
)
|
| 396 |
-
|
| 397 |
-
st.image(
|
| 398 |
-
"https://production-media.paperswithcode.com/methods/Screen_Shot_2021-01-26_at_9.43.31_PM_uI4jjMq.png",
|
| 399 |
-
caption="Vision Transformer (ViT)",
|
| 400 |
-
use_column_width=True,
|
| 401 |
-
)
|
| 402 |
-
|
| 403 |
-
with st.expander("Code💻"):
|
| 404 |
-
st.markdown(
|
| 405 |
-
r"""
|
| 406 |
-
```python
|
| 407 |
-
from typing import Any, Dict
|
| 408 |
-
|
| 409 |
-
import matplotlib.pyplot as plt
|
| 410 |
-
import numpy as np
|
| 411 |
-
import tensorflow as tf
|
| 412 |
-
import tensorflow_addons as tfa
|
| 413 |
-
from tensorflow import keras
|
| 414 |
-
from tensorflow.keras import layers
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
# learning_rate = 0.001
|
| 418 |
-
# weight_decay = 0.0001
|
| 419 |
-
# batch_size = 256
|
| 420 |
-
# num_epochs = 800
|
| 421 |
-
# image_size = 72 # We'll resize input images to this size
|
| 422 |
-
# patch_size = 6 # Size of the patches to be extract from the input images
|
| 423 |
-
# num_patches = (image_size // patch_size) ** 2
|
| 424 |
-
# projection_dim = 64
|
| 425 |
-
# num_heads = 4
|
| 426 |
-
# transformer_units = [
|
| 427 |
-
# projection_dim * 2,
|
| 428 |
-
# projection_dim,
|
| 429 |
-
# ] # Size of the transformer layers
|
| 430 |
-
# transformer_layers = 10
|
| 431 |
-
# mlp_head_units = [2048, 1024] # Size of the dense layers of the final classifier
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
def mlp(x: tf.Tensor, hidden_units: list[int], dropout_rate: float) -> tf.Tensor:
|
| 435 |
-
# Constructs a multi-layer perceptron with Gelu activation and dropout layers.
|
| 436 |
-
|
| 437 |
-
# Args:
|
| 438 |
-
# x (tf.Tensor): Input tensor to the MLP.
|
| 439 |
-
# hidden_units (list[int]): A list of integers for the number of units in each hidden layer.
|
| 440 |
-
# dropout_rate (float): The rate of dropout to apply after each hidden layer.
|
| 441 |
-
|
| 442 |
-
# Returns:
|
| 443 |
-
# A tensor representing the output of the MLP.
|
| 444 |
-
# For each specified number of hidden units,
|
| 445 |
-
# add a dense layer with gelu activation followed by dropout.
|
| 446 |
-
for units in hidden_units:
|
| 447 |
-
x = layers.Dense(units, activation=tf.nn.gelu)(x)
|
| 448 |
-
x = layers.Dropout(dropout_rate)(x)
|
| 449 |
-
|
| 450 |
-
# Return the final output after passing through all the hidden layers
|
| 451 |
-
return x
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
class Patches(layers.Layer):
|
| 455 |
-
def __init__(self, patch_size: int):
|
| 456 |
-
# A layer that extracts patches from an image tensor.
|
| 457 |
-
|
| 458 |
-
# Args:
|
| 459 |
-
# patch_size (int): The size of each patch to extract.
|
| 460 |
-
super(Patches, self).__init__()
|
| 461 |
-
self.patch_size = patch_size
|
| 462 |
-
|
| 463 |
-
def call(self, images: tf.Tensor) -> tf.Tensor:
|
| 464 |
-
# Extracts patches from the input image tensor.
|
| 465 |
-
|
| 466 |
-
# Args:
|
| 467 |
-
# images (tf.Tensor): A tensor representing a batch of images.
|
| 468 |
-
|
| 469 |
-
# Returns:
|
| 470 |
-
# A tensor representing the extracted patches.
|
| 471 |
-
batch_size = tf.shape(images)[0]
|
| 472 |
-
patches = tf.image.extract_patches(
|
| 473 |
-
images=images,
|
| 474 |
-
sizes=[1, self.patch_size, self.patch_size, 1],
|
| 475 |
-
strides=[1, self.patch_size, self.patch_size, 1],
|
| 476 |
-
rates=[1, 1, 1, 1],
|
| 477 |
-
padding="VALID",
|
| 478 |
-
)
|
| 479 |
-
patch_dims = patches.shape[-1]
|
| 480 |
-
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
|
| 481 |
-
return patches
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
def plot_patches(sample_index: int) -> None:
|
| 485 |
-
# Plots the original image and patches extracted from it.
|
| 486 |
-
|
| 487 |
-
# Args:
|
| 488 |
-
# sample_index (int): The index of the image to plot from the training set.
|
| 489 |
-
plt.figure(figsize=(4, 4))
|
| 490 |
-
# Select a random image from the training set
|
| 491 |
-
image = x_train[np.random.choice(range(x_train.shape[sample_index]))]
|
| 492 |
-
plt.imshow(image.astype("uint8"))
|
| 493 |
-
plt.axis("off")
|
| 494 |
-
|
| 495 |
-
resized_image = tf.image.resize(
|
| 496 |
-
tf.convert_to_tensor([image]), size=(image_size, image_size)
|
| 497 |
-
)
|
| 498 |
-
# Extract patches from the resized image
|
| 499 |
-
patches = Patches(patch_size)(resized_image)
|
| 500 |
-
print(f"Image size: {image_size} X {image_size}")
|
| 501 |
-
print(f"Patch size: {patch_size} X {patch_size}")
|
| 502 |
-
print(f"Patches per image: {patches.shape[1]}")
|
| 503 |
-
print(f"Elements per patch: {patches.shape[-1]}")
|
| 504 |
-
|
| 505 |
-
n = int(np.sqrt(patches.shape[1]))
|
| 506 |
-
plt.figure(figsize=(4, 4))
|
| 507 |
-
# Plot all patches as subplots on a grid
|
| 508 |
-
for i, patch in enumerate(patches[sample_index]):
|
| 509 |
-
ax = plt.subplot(n, n, i + 1)
|
| 510 |
-
patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
|
| 511 |
-
plt.imshow(patch_img.numpy().astype("uint8"))
|
| 512 |
-
plt.axis("off")
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
class PatchEncoder(layers.Layer):
|
| 516 |
-
def __init__(self, num_patches: int, projection_dim: int):
|
| 517 |
-
# Initializes the PatchEncoder layer.
|
| 518 |
-
|
| 519 |
-
# Args:
|
| 520 |
-
# num_patches (int): The number of patches to extract from an image.
|
| 521 |
-
# projection_dim (int): The dimensionality of the encoding space.
|
| 522 |
-
super(PatchEncoder, self).__init__()
|
| 523 |
-
self.num_patches = num_patches
|
| 524 |
-
# A Dense layer to project the patches into the encoding space
|
| 525 |
-
self.projection = layers.Dense(units=projection_dim)
|
| 526 |
-
# An Embedding layer to provide position embeddings for each patch
|
| 527 |
-
self.position_embedding = layers.Embedding(
|
| 528 |
-
input_dim=num_patches, output_dim=projection_dim
|
| 529 |
-
)
|
| 530 |
-
|
| 531 |
-
def call(self, patch):
|
| 532 |
-
# Encodes a patch by projecting it into the encoding space and adding a
|
| 533 |
-
# position embedding.
|
| 534 |
-
|
| 535 |
-
# Args:
|
| 536 |
-
# patch (tf.Tensor): A patch extracted from an image.
|
| 537 |
-
|
| 538 |
-
# Returns:
|
| 539 |
-
# tf.Tensor: The encoded patch, with shape (batch_size, projection_dim).
|
| 540 |
-
positions = tf.range(start=0, limit=self.num_patches, delta=1)
|
| 541 |
-
encoded = self.projection(patch) + self.position_embedding(positions)
|
| 542 |
-
return encoded
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
def create_vit_classifier() -> keras.Model:
|
| 546 |
-
# Creates a Vision Transformer (ViT) classifier model.
|
| 547 |
-
|
| 548 |
-
# Returns:
|
| 549 |
-
# keras.Model: A ViT classifier model.
|
| 550 |
-
inputs = layers.Input(shape=input_shape)
|
| 551 |
-
# Augment data.
|
| 552 |
-
augmented = data_augmentation(inputs)
|
| 553 |
-
# Create patches.
|
| 554 |
-
patches = Patches(patch_size)(augmented)
|
| 555 |
-
# Encode patches.
|
| 556 |
-
encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)
|
| 557 |
-
|
| 558 |
-
# Create multiple layers of the Transformer block.
|
| 559 |
-
for _ in range(transformer_layers):
|
| 560 |
-
# Layer normalization 1.
|
| 561 |
-
x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
|
| 562 |
-
# Create a multi-head attention layer.
|
| 563 |
-
attention_output = layers.MultiHeadAttention(
|
| 564 |
-
num_heads=num_heads, key_dim=projection_dim, dropout=0.1
|
| 565 |
-
)(x1, x1)
|
| 566 |
-
# Skip connection 1.
|
| 567 |
-
x2 = layers.Add()([attention_output, encoded_patches])
|
| 568 |
-
# Layer normalization 2.
|
| 569 |
-
x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
|
| 570 |
-
# MLP.
|
| 571 |
-
x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
|
| 572 |
-
# Skip connection 2.
|
| 573 |
-
encoded_patches = layers.Add()([x3, x2])
|
| 574 |
-
|
| 575 |
-
# Create a [batch_size, projection_dim] tensor.
|
| 576 |
-
representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
|
| 577 |
-
representation = layers.Flatten()(representation)
|
| 578 |
-
representation = layers.Dropout(0.5)(representation)
|
| 579 |
-
# Add MLP.
|
| 580 |
-
features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
|
| 581 |
-
# Classify outputs.
|
| 582 |
-
logits = layers.Dense(num_classes)(features)
|
| 583 |
-
# Create the Keras model.
|
| 584 |
-
model = keras.Model(inputs=inputs, outputs=logits)
|
| 585 |
-
return model
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
def run_experiment(model: keras.Model) -> Dict[str, Any]:
|
| 589 |
-
# Trains a given Keras model on training data, evaluates it on test data,
|
| 590 |
-
# and returns the training history.
|
| 591 |
-
|
| 592 |
-
# Args:
|
| 593 |
-
# model (keras.Model): A Keras model to train and evaluate.
|
| 594 |
-
|
| 595 |
-
# Returns:
|
| 596 |
-
# dict: A dictionary containing the training history.
|
| 597 |
-
optimizer = tfa.optimizers.AdamW(
|
| 598 |
-
learning_rate=learning_rate, weight_decay=weight_decay
|
| 599 |
-
)
|
| 600 |
-
|
| 601 |
-
model.compile(
|
| 602 |
-
optimizer=optimizer,
|
| 603 |
-
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
| 604 |
-
metrics=[
|
| 605 |
-
keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
|
| 606 |
-
keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
|
| 607 |
-
],
|
| 608 |
-
)
|
| 609 |
-
|
| 610 |
-
checkpoint_filepath = "/tmp/checkpoint"
|
| 611 |
-
checkpoint_callback = keras.callbacks.ModelCheckpoint(
|
| 612 |
-
checkpoint_filepath,
|
| 613 |
-
monitor="val_accuracy",
|
| 614 |
-
save_best_only=True,
|
| 615 |
-
save_weights_only=True,
|
| 616 |
-
)
|
| 617 |
-
|
| 618 |
-
history = model.fit(
|
| 619 |
-
x=x_train,
|
| 620 |
-
y=y_train,
|
| 621 |
-
batch_size=batch_size,
|
| 622 |
-
epochs=num_epochs,
|
| 623 |
-
validation_split=0.1,
|
| 624 |
-
callbacks=[checkpoint_callback],
|
| 625 |
-
)
|
| 626 |
-
|
| 627 |
-
model.load_weights(checkpoint_filepath)
|
| 628 |
-
_, accuracy, top_5_accuracy = model.evaluate(x_test, y_test)
|
| 629 |
-
print(f"Test accuracy: {round(accuracy * 100, 2)}%")
|
| 630 |
-
print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")
|
| 631 |
-
|
| 632 |
-
return history
|
| 633 |
-
```
|
| 634 |
-
"""
|
| 635 |
-
)
|
| 636 |
-
|
| 637 |
-
st.markdown(
|
| 638 |
-
r"""
|
| 639 |
-
Related work driving the momentum of ViT:
|
| 640 |
-
- [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
|
| 641 |
-
- [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929)
|
| 642 |
-
- [Do Vision Transformers See Like Convolutional Neural Networks?](https://arxiv.org/abs/2108.08810)
|
| 643 |
-
- [Vision Transformers for Dense Prediction](https://arxiv.org/abs/2103.13413)
|
| 644 |
-
- [VT-ADL: A Vision Transformer Network for Image Anomaly Detection and Localization](https://arxiv.org/abs/2104.10036)
|
| 645 |
-
- [ViViT: A Video Vision Transformer](https://openaccess.thecvf.com/content/ICCV2021/papers/Arnab_ViViT_A_Video_Vision_Transformer_ICCV_2021_paper.pdf)
|
| 646 |
-
- [Autonomous Driving: Tesla FSD - image-to-BEV transform + multi-camera fusion](https://www.youtube.com/live/ODSJsviD_SU?si=14Q3If7cBME69hkc)
|
| 647 |
-
"""
|
| 648 |
-
)
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
page_for_cnnadvanced()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/6_Autoencoders.py
DELETED
|
@@ -1,154 +0,0 @@
|
|
| 1 |
-
def page_for_ae():
|
| 2 |
-
import streamlit as st
|
| 3 |
-
|
| 4 |
-
st.write("# Welcome to Autoencoder and Variational Autoencoder! 👋")
|
| 5 |
-
st.sidebar.success("Select a session from the menu of content.")
|
| 6 |
-
|
| 7 |
-
st.markdown(
|
| 8 |
-
r"""
|
| 9 |
-
# Understanding Autoencoders and Variational Autoencoders
|
| 10 |
-
|
| 11 |
-
## Why Use Autoencoders?
|
| 12 |
-
|
| 13 |
-
Autoencoders are a class of neural network models aimed at learning efficient representations (encodings) of the input data, typically for the purpose of dimensionality reduction or feature extraction. By learning to ignore "noise" in the input data, autoencoders focus on reconstructing the most important aspects, effectively learning the most salient features of the training data. They are particularly useful in unsupervised learning tasks where you're dealing with unlabeled data. Autoencoders have wide applications ranging from anomaly detection to image denoising, and they serve as a building block for more complex models such as variational autoencoders.
|
| 14 |
-
|
| 15 |
-
## What is an Autoencoder?
|
| 16 |
-
|
| 17 |
-
An autoencoder is a type of artificial neural network used to learn efficient codings of input data. It consists of two main parts: the encoder and the decoder. The encoder compresses the input into a latent-space representation, and the decoder reconstructs the input from this representation.
|
| 18 |
-
"""
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
st.image(
|
| 22 |
-
"https://blog.keras.io/img/ae/autoencoder_schema.jpg",
|
| 23 |
-
caption="Visualization of an Autoencoder (intuition)",
|
| 24 |
-
use_column_width=True,
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
st.markdown(
|
| 28 |
-
r"""
|
| 29 |
-
### Mathematical Form
|
| 30 |
-
|
| 31 |
-
The mathematical formulation of a simple autoencoder can be represented as follows:
|
| 32 |
-
|
| 33 |
-
1. **Encoder**: This maps the input $ x $ to a latent representation $ h $, denoted as $ h = f(x) $. Here, $ f $ is the encoding function.
|
| 34 |
-
2. **Latent Layer**: This layer represents the compressed knowledge of the input data.
|
| 35 |
-
3. **Decoder**: The decoder maps the latent representation $ h $ back to a reconstruction $ r $, which is as close as possible to the original input $ x $. This is denoted as $ r = g(h) $, where $ g $ is the decoding function.
|
| 36 |
-
|
| 37 |
-
For instance, in an autoencoder trained on the MNIST dataset, the encoder would compress the 28x28 pixel input images into a smaller representation, and the decoder would attempt to reconstruct the original images from this compressed form.
|
| 38 |
-
|
| 39 |
-
"""
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
st.image(
|
| 43 |
-
"https://miro.medium.com/max/1400/0*_K4yRm8ILDZ02vPq.png",
|
| 44 |
-
caption="Visualization of an Autoencoder (mathematical)",
|
| 45 |
-
use_column_width=True,
|
| 46 |
-
)
|
| 47 |
-
|
| 48 |
-
video_file_manytomany_ae = open("./videos/7_manytomany_ae.mp4", "rb")
|
| 49 |
-
video_bytes_manytomany_ae = video_file_manytomany_ae.read()
|
| 50 |
-
st.video(video_bytes_manytomany_ae)
|
| 51 |
-
|
| 52 |
-
st.markdown(
|
| 53 |
-
r"""
|
| 54 |
-
## Autoencoder Implementation in TensorFlow
|
| 55 |
-
|
| 56 |
-
```python
|
| 57 |
-
import tensorflow as tf
|
| 58 |
-
|
| 59 |
-
# Define the encoder part
|
| 60 |
-
encoder = tf.keras.layers.Dense(units=64, activation='relu', input_shape=(784,))
|
| 61 |
-
|
| 62 |
-
# Define the latent space
|
| 63 |
-
latent_space = tf.keras.layers.Dense(units=32, activation='relu')
|
| 64 |
-
|
| 65 |
-
# Define the decoder part
|
| 66 |
-
decoder = tf.keras.layers.Dense(units=784, activation='sigmoid')
|
| 67 |
-
|
| 68 |
-
# Build the autoencoder model
|
| 69 |
-
autoencoder_model = tf.keras.Sequential([encoder, latent_space, decoder])
|
| 70 |
-
|
| 71 |
-
# Compile the model
|
| 72 |
-
autoencoder_model.compile(optimizer='adam', loss='binary_crossentropy')
|
| 73 |
-
|
| 74 |
-
# Summary of the model
|
| 75 |
-
autoencoder_model.summary()
|
| 76 |
-
```
|
| 77 |
-
|
| 78 |
-
- This code sets up a basic autoencoder in TensorFlow for the MNIST dataset.
|
| 79 |
-
- The input data is first flattened (since MNIST images are 28x28 pixels, the input shape is 784).
|
| 80 |
-
- The encoder compresses the input to a 64-unit layer, then to a smaller 32-unit latent layer.
|
| 81 |
-
- The decoder then reconstructs the input from the latent space representation back to the original 784 units.
|
| 82 |
-
- The model is compiled using the Adam optimizer and binary cross-entropy loss, suitable for binary image reconstruction.
|
| 83 |
-
|
| 84 |
-
## Motivation for Variational Autoencoders
|
| 85 |
-
|
| 86 |
-
Variational Autoencoders (VAEs) go a step further than traditional autoencoders by not only learning to encode and decode data but also by learning the distribution of the data. They are generative models, which means they can generate new instances that are similar to the input data. This capability makes VAEs incredibly powerful for tasks like image generation, style transfer, and more advanced forms of anomaly detection. They introduce a probabilistic approach to the encoding process, making them robust and versatile.
|
| 87 |
-
|
| 88 |
-
## Architecture of Variational Autoencoders
|
| 89 |
-
|
| 90 |
-
The architecture of a Variational Autoencoder differs from a standard autoencoder in that it produces two outputs for the encoder: a mean and a variance. These parameters define a distribution for each latent attribute.
|
| 91 |
-
|
| 92 |
-
- The encoder outputs parameters to a statistical distribution, rather than outputting a fixed value for each attribute in the latent space.
|
| 93 |
-
- During training, a sample is drawn from this distribution and passed to the decoder, which then reconstructs the input data.
|
| 94 |
-
- This approach forces the model to learn efficient representations of the data in a structured latent space, often leading to better and more controlled generation of new data.
|
| 95 |
-
"""
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
video_file_manytomany_vae = open("./videos/7_manytomany_vae.mp4", "rb")
|
| 99 |
-
video_bytes_manytomany_vae = video_file_manytomany_vae.read()
|
| 100 |
-
st.video(video_bytes_manytomany_vae)
|
| 101 |
-
|
| 102 |
-
st.markdown(
|
| 103 |
-
r"""
|
| 104 |
-
## Variational Autoencoder Implementation in TensorFlow
|
| 105 |
-
|
| 106 |
-
```python
|
| 107 |
-
class Sampling(tf.keras.layers.Layer):
|
| 108 |
-
# Sampling layer to sample from the distribution
|
| 109 |
-
def call(self, inputs):
|
| 110 |
-
mean, log_var = inputs
|
| 111 |
-
batch = tf.shape(mean)[0]
|
| 112 |
-
dim = tf.shape(mean)[1]
|
| 113 |
-
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
|
| 114 |
-
return mean + tf.exp(0.5 * log_var) * epsilon
|
| 115 |
-
|
| 116 |
-
# Encoder
|
| 117 |
-
vae_encoder_input = tf.keras.Input(shape=(784,))
|
| 118 |
-
x = tf.keras.layers.Dense(64, activation='relu')(vae_encoder_input)
|
| 119 |
-
mean = tf.keras.layers.Dense(32)(x)
|
| 120 |
-
log_var = tf.keras.layers.Dense(
|
| 121 |
-
|
| 122 |
-
32)(x)
|
| 123 |
-
z = Sampling()([mean, log_var])
|
| 124 |
-
|
| 125 |
-
# Decoder
|
| 126 |
-
vae_decoder_input = tf.keras.Input(shape=(32,))
|
| 127 |
-
x = tf.keras.layers.Dense(64, activation='relu')(vae_decoder_input)
|
| 128 |
-
vae_decoder_output = tf.keras.layers.Dense(784, activation='sigmoid')(x)
|
| 129 |
-
|
| 130 |
-
# VAE model
|
| 131 |
-
vae_encoder = tf.keras.Model(vae_encoder_input, [mean, log_var, z])
|
| 132 |
-
vae_decoder = tf.keras.Model(vae_decoder_input, vae_decoder_output)
|
| 133 |
-
vae_output = vae_decoder(vae_encoder(vae_encoder_input)[2])
|
| 134 |
-
vae = tf.keras.Model(vae_encoder_input, vae_output)
|
| 135 |
-
|
| 136 |
-
# Compile VAE
|
| 137 |
-
vae.compile(optimizer='adam', loss='binary_crossentropy')
|
| 138 |
-
|
| 139 |
-
# VAE Summary
|
| 140 |
-
vae.summary()
|
| 141 |
-
```
|
| 142 |
-
|
| 143 |
-
- This TensorFlow code implements a Variational Autoencoder for the MNIST dataset.
|
| 144 |
-
- The `Sampling` layer samples from the distribution defined by the encoder output.
|
| 145 |
-
- The encoder outputs the mean and log variance, which are used to sample a point in the latent space.
|
| 146 |
-
- The decoder reconstructs the input from the sampled latent point.
|
| 147 |
-
- The model is compiled using the Adam optimizer and binary cross-entropy loss.
|
| 148 |
-
|
| 149 |
-
This Variational Autoencoder model can generate new data similar to the input data, offering a powerful tool for generative modeling tasks.
|
| 150 |
-
"""
|
| 151 |
-
)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
page_for_ae()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/AI_Decoded__Yin_2024.pdf
DELETED
|
Binary file (633 kB)
|
|
|