|
|
import numpy as np
|
|
|
|
|
|
|
|
|
from tensorflow.keras.datasets import mnist
|
|
|
from tensorflow.keras.utils import to_categorical
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_mnist_data():
|
|
|
"""MNIST λ°μ΄ν°μ
μ λΆλ¬μ€κ³ μ μ²λ¦¬ν©λλ€."""
|
|
|
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
|
|
|
|
|
|
|
|
x_train = x_train.astype('float32') / 255.0
|
|
|
x_test = x_test.astype('float32') / 255.0
|
|
|
|
|
|
|
|
|
x_train = np.expand_dims(x_train, -1)
|
|
|
x_test = np.expand_dims(x_test, -1)
|
|
|
|
|
|
|
|
|
y_train = to_categorical(y_train, 10)
|
|
|
y_test = to_categorical(y_test, 10)
|
|
|
|
|
|
return x_train, y_train, x_test, y_test
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ConvLayer:
|
|
|
"""ν©μ±κ³± κ³μΈ΅ (λ€μ€ μ±λ μ
λ ₯ μ§μμΌλ‘ μμ λ¨)"""
|
|
|
def __init__(self, num_filters, filter_size, input_channels):
|
|
|
self.num_filters = num_filters
|
|
|
self.filter_size = filter_size
|
|
|
self.input_channels = input_channels
|
|
|
|
|
|
self.filters = np.random.randn(filter_size, filter_size, input_channels, num_filters) * np.sqrt(2. / (filter_size * filter_size * input_channels))
|
|
|
self.biases = np.zeros(self.num_filters)
|
|
|
self.last_input = None
|
|
|
|
|
|
def forward(self, input_image):
|
|
|
self.last_input = input_image
|
|
|
batch_size, input_height, input_width, _ = input_image.shape
|
|
|
output_height = input_height - self.filter_size + 1
|
|
|
output_width = input_width - self.filter_size + 1
|
|
|
|
|
|
output = np.zeros((batch_size, output_height, output_width, self.num_filters))
|
|
|
|
|
|
for b in range(batch_size):
|
|
|
for i in range(output_height):
|
|
|
for j in range(output_width):
|
|
|
region = input_image[b, i:(i + self.filter_size), j:(j + self.filter_size), :]
|
|
|
for f in range(self.num_filters):
|
|
|
output[b, i, j, f] = np.sum(region * self.filters[:, :, :, f]) + self.biases[f]
|
|
|
return output
|
|
|
|
|
|
def backward(self, d_loss_d_output, learning_rate):
|
|
|
"""μμ ν λ©μλ (μ
λ ₯ κ·ΈλλμΈνΈ κ³μ° μΆκ°)"""
|
|
|
batch_size, _, _, _ = self.last_input.shape
|
|
|
d_loss_d_filters = np.zeros_like(self.filters)
|
|
|
d_loss_d_input = np.zeros_like(self.last_input)
|
|
|
|
|
|
|
|
|
for b in range(batch_size):
|
|
|
for i in range(d_loss_d_output.shape[1]):
|
|
|
for j in range(d_loss_d_output.shape[2]):
|
|
|
region = self.last_input[b, i:(i + self.filter_size), j:(j + self.filter_size), :]
|
|
|
for f in range(self.num_filters):
|
|
|
|
|
|
d_loss_d_filters[:, :, :, f] += d_loss_d_output[b, i, j, f] * region
|
|
|
|
|
|
d_loss_d_input[b, i:i+self.filter_size, j:j+self.filter_size, :] += d_loss_d_output[b, i, j, f] * self.filters[:, :, :, f]
|
|
|
|
|
|
|
|
|
d_loss_d_biases = np.sum(d_loss_d_output, axis=(0, 1, 2))
|
|
|
|
|
|
|
|
|
self.filters -= learning_rate * d_loss_d_filters / batch_size
|
|
|
self.biases -= learning_rate * d_loss_d_biases / batch_size
|
|
|
|
|
|
|
|
|
return d_loss_d_input
|
|
|
|
|
|
class ReLULayer:
|
|
|
"""ReLU νμ±ν ν¨μ"""
|
|
|
def __init__(self):
|
|
|
self.last_input = None
|
|
|
|
|
|
def forward(self, input_data):
|
|
|
self.last_input = input_data
|
|
|
return np.maximum(0, input_data)
|
|
|
|
|
|
def backward(self, d_loss_d_output):
|
|
|
d_relu = (self.last_input > 0).astype(int)
|
|
|
return d_loss_d_output * d_relu
|
|
|
|
|
|
class MaxPoolingLayer:
|
|
|
"""λ§₯μ€ νλ§ κ³μΈ΅"""
|
|
|
def __init__(self, pool_size):
|
|
|
self.pool_size = pool_size
|
|
|
self.last_input = None
|
|
|
|
|
|
def forward(self, input_image):
|
|
|
self.last_input = input_image
|
|
|
batch_size, input_height, input_width, num_filters = input_image.shape
|
|
|
output_height = input_height // self.pool_size
|
|
|
output_width = input_width // self.pool_size
|
|
|
|
|
|
output = np.zeros((batch_size, output_height, output_width, num_filters))
|
|
|
|
|
|
for b in range(batch_size):
|
|
|
for i in range(output_height):
|
|
|
for j in range(output_width):
|
|
|
for f in range(num_filters):
|
|
|
region = input_image[b, (i*self.pool_size):(i*self.pool_size + self.pool_size),
|
|
|
(j*self.pool_size):(j*self.pool_size + self.pool_size), f]
|
|
|
output[b, i, j, f] = np.max(region)
|
|
|
return output
|
|
|
|
|
|
def backward(self, d_loss_d_output):
|
|
|
d_loss_d_input = np.zeros_like(self.last_input)
|
|
|
|
|
|
for b in range(d_loss_d_output.shape[0]):
|
|
|
for i in range(d_loss_d_output.shape[1]):
|
|
|
for j in range(d_loss_d_output.shape[2]):
|
|
|
for f in range(d_loss_d_output.shape[3]):
|
|
|
region = self.last_input[b, (i*self.pool_size):(i*self.pool_size + self.pool_size),
|
|
|
(j*self.pool_size):(j*self.pool_size + self.pool_size), f]
|
|
|
max_val = np.max(region)
|
|
|
mask = (region == max_val)
|
|
|
d_loss_d_input[b, (i*self.pool_size):(i*self.pool_size + self.pool_size),
|
|
|
(j*self.pool_size):(j*self.pool_size + self.pool_size), f] += \
|
|
|
mask * d_loss_d_output[b, i, j, f]
|
|
|
return d_loss_d_input
|
|
|
|
|
|
class FlattenLayer:
|
|
|
"""ννν κ³μΈ΅"""
|
|
|
def __init__(self):
|
|
|
self.last_input_shape = None
|
|
|
|
|
|
def forward(self, input_data):
|
|
|
self.last_input_shape = input_data.shape
|
|
|
batch_size = input_data.shape[0]
|
|
|
return input_data.reshape(batch_size, -1)
|
|
|
|
|
|
def backward(self, d_loss_d_output):
|
|
|
return d_loss_d_output.reshape(self.last_input_shape)
|
|
|
|
|
|
class DenseLayer:
|
|
|
"""μμ μ°κ²° κ³μΈ΅"""
|
|
|
def __init__(self, input_size, output_size):
|
|
|
self.weights = np.random.randn(input_size, output_size) * np.sqrt(2. / input_size)
|
|
|
self.biases = np.zeros(output_size)
|
|
|
self.last_input = None
|
|
|
self.last_input_shape = None
|
|
|
|
|
|
def forward(self, input_data):
|
|
|
self.last_input_shape = input_data.shape
|
|
|
self.last_input = input_data
|
|
|
return np.dot(input_data, self.weights) + self.biases
|
|
|
|
|
|
def backward(self, d_loss_d_output, learning_rate):
|
|
|
batch_size = self.last_input.shape[0]
|
|
|
d_loss_d_input = np.dot(d_loss_d_output, self.weights.T)
|
|
|
d_loss_d_weights = np.dot(self.last_input.T, d_loss_d_output)
|
|
|
d_loss_d_biases = np.sum(d_loss_d_output, axis=0)
|
|
|
self.weights -= learning_rate * d_loss_d_weights / batch_size
|
|
|
self.biases -= learning_rate * d_loss_d_biases / batch_size
|
|
|
return d_loss_d_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def softmax(logits):
|
|
|
exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
|
|
|
return exps / np.sum(exps, axis=1, keepdims=True)
|
|
|
|
|
|
def cross_entropy_loss(y_pred, y_true):
|
|
|
samples = y_true.shape[0]
|
|
|
epsilon = 1e-12
|
|
|
y_pred_clipped = np.clip(y_pred, epsilon, 1. - epsilon)
|
|
|
loss = -np.sum(y_true * np.log(y_pred_clipped)) / samples
|
|
|
return loss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SimpleCNN:
|
|
|
def __init__(self):
|
|
|
|
|
|
self.conv1 = ConvLayer(num_filters=8, filter_size=3, input_channels=1)
|
|
|
self.relu1 = ReLULayer()
|
|
|
self.pool1 = MaxPoolingLayer(pool_size=2)
|
|
|
|
|
|
self.conv2 = ConvLayer(num_filters=16, filter_size=3, input_channels=8)
|
|
|
self.relu2 = ReLULayer()
|
|
|
self.pool2 = MaxPoolingLayer(pool_size=2)
|
|
|
|
|
|
self.flatten = FlattenLayer()
|
|
|
self.dense = DenseLayer(5 * 5 * 16, 10)
|
|
|
|
|
|
def forward(self, image):
|
|
|
out = self.conv1.forward(image)
|
|
|
out = self.relu1.forward(out)
|
|
|
out = self.pool1.forward(out)
|
|
|
out = self.conv2.forward(out)
|
|
|
out = self.relu2.forward(out)
|
|
|
out = self.pool2.forward(out)
|
|
|
out = self.flatten.forward(out)
|
|
|
out = self.dense.forward(out)
|
|
|
return out
|
|
|
|
|
|
def backward(self, d_loss_d_out, learning_rate):
|
|
|
"""μμ ν λ©μλ (κ·ΈλλμΈνΈ νλ¦ μμ )"""
|
|
|
|
|
|
gradient = self.dense.backward(d_loss_d_out, learning_rate)
|
|
|
gradient = self.flatten.backward(gradient)
|
|
|
|
|
|
|
|
|
gradient = self.pool2.backward(gradient)
|
|
|
gradient = self.relu2.backward(gradient)
|
|
|
gradient = self.conv2.backward(gradient, learning_rate)
|
|
|
|
|
|
|
|
|
gradient = self.pool1.backward(gradient)
|
|
|
gradient = self.relu1.backward(gradient)
|
|
|
self.conv1.backward(gradient, learning_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
x_train, y_train, x_test, y_test = load_mnist_data()
|
|
|
x_train_small, y_train_small = x_train[:1000], y_train[:1000]
|
|
|
x_test_small, y_test_small = x_test[:500], y_test[:500]
|
|
|
|
|
|
model = SimpleCNN()
|
|
|
|
|
|
learning_rate = 0.01
|
|
|
epochs = 5
|
|
|
batch_size = 32
|
|
|
|
|
|
print("νμ΅ μμ...")
|
|
|
for epoch in range(epochs):
|
|
|
epoch_loss = 0
|
|
|
|
|
|
for i in range(0, x_train_small.shape[0], batch_size):
|
|
|
x_batch = x_train_small[i:i+batch_size]
|
|
|
y_batch = y_train_small[i:i+batch_size]
|
|
|
|
|
|
logits = model.forward(x_batch)
|
|
|
predictions = softmax(logits)
|
|
|
|
|
|
loss = cross_entropy_loss(predictions, y_batch)
|
|
|
epoch_loss += loss
|
|
|
|
|
|
d_loss_d_out = (predictions - y_batch)
|
|
|
model.backward(d_loss_d_out, learning_rate)
|
|
|
|
|
|
avg_loss = epoch_loss / (len(x_train_small) / batch_size)
|
|
|
print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")
|
|
|
|
|
|
print("\nν
μ€νΈ μμ...")
|
|
|
test_logits = model.forward(x_test_small)
|
|
|
test_predictions = softmax(test_logits)
|
|
|
|
|
|
predicted_labels = np.argmax(test_predictions, axis=1)
|
|
|
true_labels = np.argmax(y_test_small, axis=1)
|
|
|
accuracy = np.mean(predicted_labels == true_labels)
|
|
|
|
|
|
print(f"ν
μ€νΈ μ νλ: {accuracy * 100:.2f}%") |