import numpy as np from abc import ABC, abstractmethod class Layer(ABC): def __init__(self, input_size, output_size): self.input = None self.output = None self.input_size = input_size self.output_size = output_size self.weights = np.random.randn(input_size, output_size) * np.sqrt(2. / input_size) self.bias = np.zeros((1, output_size)) @staticmethod def activate(z, activation: str): if activation == 'sigmoid': return 1 / (1 + np.exp(-z)) elif activation == 'relu': return np.maximum(0, z) else: print("Undefined activation type") return None @staticmethod def derivative(activation: str, z): if activation == 'sigmoid': return z * (1 - z) elif activation == 'relu': return np.where(z > 0, 1, 0) else: print("Undefined activation type") return None @abstractmethod def feedforward(self, x_train): pass @abstractmethod def backpropagation(self, x_train, y_train, learning_rate): pass class Dense(Layer): def __init__(self, input_size, output_size, activation: str): super().__init__(input_size, output_size) self.activation = activation def feedforward(self, input): self.input = input z = np.dot(self.input, self.weights) + self.bias self.output = self.activate(z, self.activation) return self.output def backpropagation(self, error, learning_rate): d = self.derivative(self.activation, self.output) db = np.sum(error * d, axis=0, keepdims=True) dW = np.dot(self.input.T, error * d) input_error = np.dot(error * d, self.weights.T) self.weights -= learning_rate * dW self.bias -= learning_rate * db return input_error class MLP: def __init__(self): self.layers = [] @staticmethod def loss_MSE(y_train, yhat): loss = np.mean(np.square(yhat - y_train)) return loss @staticmethod def loss_cross(y_train, yhat): epsilon = 1e-9 # avoid log(0) loss = -np.sum(y_train * np.log(yhat + epsilon)) / y_train.shape[0] return loss def addlayer(self, layer: Dense): self.layers.append(layer) def predict(self, input): for layer in self.layers: input = layer.feedforward(input) return input def fit(self, x_train, y_train, learning_rate=0.01, batch_size=8, epochs=10, loss_type: str = 'MSE'): num_samples = x_train.shape[0] for epoch in range(epochs): indices = np.arange(num_samples) np.random.shuffle(indices) x_train = x_train[indices] y_train = y_train[indices] loss = 0 for i in range(0, num_samples, batch_size): x_batch = x_train[i: i + batch_size] y_batch = y_train[i: i + batch_size] output = self.predict(input=x_batch) error = output - y_batch if loss_type == 'MSE': loss += self.loss_MSE(y_batch, output) else: loss += self.loss_cross(y_batch, output) for layer in reversed(self.layers): error = layer.backpropagation(error, learning_rate) print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss / (num_samples // batch_size)}')