Upload 3 files

Browse files

Update 3 Project Files: Conventional_Neural_Network , Single_Layer_Perceptron , Multi_Layer_Perceptron

Files changed (3) hide show

Multi_Layer_Perceptron.py +129 -0
Single_Layer_Perceptron.py +141 -0
conventional_neural_network.py +103 -0

Multi_Layer_Perceptron.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import numpy as np
+# --- 1. Dữ liệu XOR gate ---
+X = np.array([[0, 0],
+              [0, 1],
+              [1, 0],
+              [1, 1]])
+y = np.array([[0], [1], [1], [0]])  # output XOR
+# --- 2. Hàm sigmoid & derivative ---
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+def sigmoid_derivative(x):
+    return x * (1 - x)
+# --- 3. Khởi tạo weights & bias ---
+np.random.seed(42)
+input_dim = 2
+hidden_dim = 2 # thử đổi thành 3, 4, 5...
+'''
+Ảnh hưởng:
+2 neurons: học XOR ổn
+1 neuron: không học được
+2 neurons: học nhanh hơn, loss giảm mạnh 👉 Đây là chỗ dễ thấy sự thay đổi nhất.
+'''
+output_dim = 1
+lr = 0.5 # thử 0.1, 1.0, 2.0
+'''
+Ảnh hưởng:
+lr nhỏ → học chậm, mượt
+lr lớn → lúc học rất nhanh, lúc bị “nhảy loạn”, dễ diverge 👉 Thay đổi learning rate luôn thấy kết quả khác.
+'''
+epochs = 10000 # thử 3000, 50000
+'''
+Ảnh hưởng:
+ít epoch → chưa học hết, dự đoán sai
+nhiều epoch → XOR học hoàn hảo hơn
+'''
+# weights: input -> hidden
+w1 = np.random.randn(input_dim, hidden_dim) # Có thể thử nhân thêm weight với 0.1 hoặc 0.001
+'''
+Hiệu ứng: khi nhân với 0.1
+Train rất mượt
+Loss giảm đều
+Tốc độ học nhanh
+Đây là “sweet spot”.
+Hiệu ứng: khi nhân với 0.001
+Activation gần 0 → mô hình học chậm
+Loss giảm nhưng rất từ từ
+'''
+b1 = np.zeros((1, hidden_dim))
+# weights: hidden -> output
+w2 = np.random.randn(hidden_dim, output_dim)
+b2 = np.zeros((1, output_dim)) # Có thể thử Bias random:  b1 = np.random.randn((1, output_dim))
+'''
+Hiệu ứng CÓ THỂ THẤY RÕ:
+Decision boundary bắt đầu lệch → học XOR nhanh hơn
+Loss giảm nhanh từ những bước đầu tiên
+Output có thể ra đúng từ rất sớm (epoch 10–20)
+'''
+# --- 4. Huấn luyện bằng Backpropagation ---
+for epoch in range(epochs):
+    # Forward pass
+    z1 = np.dot(X, w1) + b1
+    h = sigmoid(z1)
+    z2 = np.dot(h, w2) + b2
+    y_pred = sigmoid(z2)
+    # Tính lỗi
+    error = y - y_pred
+    # Backward pass
+    d_y_pred = error * sigmoid_derivative(y_pred)
+    d_h = d_y_pred.dot(w2.T) * sigmoid_derivative(h)
+    # Cập nhật weights & bias
+    w2 += h.T.dot(d_y_pred) * lr
+    b2 += np.sum(d_y_pred, axis=0, keepdims=True) * lr
+    w1 += X.T.dot(d_h) * lr
+    b1 += np.sum(d_h, axis=0, keepdims=True) * lr
+# --- 5. Test MLP ---
+print("Testing trained MLP:")
+z1 = np.dot(X, w1) + b1
+h = sigmoid(z1)
+z2 = np.dot(h, w2) + b2
+y_pred = sigmoid(z2)
+print(np.round(y_pred))
+'''
+✅ Tóm tắt học thuật:
+| Tiêu chí                  | Single Layer Perceptron (SLP)         | Multi-Layer Perceptron (MLP)                                           |
+| ------------------------- | ------------------------------------- | ---------------------------------------------------------------------- |
+| **Số lớp**                | 1 lớp (input → output)                | Nhiều lớp (input → hidden → output)                                    |
+| **Hàm học**               | Tuyến tính                            | Phi tuyến tính (nhờ lớp ẩn và activation)                              |
+| **Hàm kích hoạt**         | Step function                         | Sigmoid, ReLU, Tanh hoặc các hàm phi tuyến khác                        |
+| **Khả năng học XOR**      | Không                                 | Có                                                                     |
+| **Thuật toán huấn luyện** | Perceptron learning rule              | Backpropagation + gradient descent                                     |
+| **Ứng dụng**              | Phân loại tuyến tính cơ bản (AND, OR) | Classification, regression, nhận dạng hình ảnh, NLP, dữ liệu phi tuyến |
+| **Ưu điểm**               | Đơn giản, dễ hiểu                     | Học được phi tuyến, khả năng biểu diễn cao                             |
+| **Hạn chế**               | Chỉ học tuyến tính                    | Dễ overfitting, cần tuning hyperparameters, tốn tài nguyên             |
+'''
+'''
+Epoch không phải là weight hay bias, nhưng về thuật toán học, nó được coi là hyperparameter.
+Hyperparameter = tham số do người đặt trước khi huấn luyện (khác với parameter là giá trị học được từ dữ liệu).
+Các hyperparameters phổ biến:
+  Learning rate (𝜂)
+  Batch size
+  Số epoch
+  Số lớp ẩn, số neuron
+  Hàm kích hoạt
+Như vậy: Epoch → hyperparameter, ảnh hưởng trực tiếp đến quá trình huấn luyện.
+3. Epoch ảnh hưởng đến quá trình học như thế nào
+Quá ít epoch → underfitting, model chưa học đủ.
+Quá nhiều epoch → overfitting, model nhớ dữ liệu training quá mức, generalization kém.
+Kết hợp với learning rate → số epoch quyết định model có hội tụ hay không.
+4. Kết luận
+Epoch là hyperparameter, nhưng nó không phải parameter học được từ dữ liệu.
+Chọn epoch phù hợp = một phần quan trọng trong tuning hyperparameters để đạt hiệu quả tối ưu.
+'''

Single_Layer_Perceptron.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import numpy as np
+# --- 1. Dữ liệu AND gate ---
+X = np.array([[0,0],
+              [0,1],
+              [1,0],
+              [1,1]])
+y = np.array([0, 0, 0, 1])  # output AND
+# --- 2. Hàm kích hoạt step function ---
+def step(x):
+    return 1 if x > 0 else 0
+# --- 3. Khởi tạo weights và bias ---
+np.random.seed(42)
+weights = np.random.randn(2)
+bias = np.random.randn()
+learning_rate = 0.1
+epochs = 10
+# --- 4. Huấn luyện Perceptron ---
+for epoch in range(epochs):
+    print(f"Epoch {epoch+1}")
+    for xi, yi in zip(X, y):
+        z = np.dot(xi, weights) + bias
+        y_pred = step(z)
+        error = yi - y_pred
+        weights += learning_rate * error * xi
+        bias += learning_rate * error
+        print(f"Input: {xi}, Pred: {y_pred}, Error: {error}, Weights: {weights}, Bias: {bias}")
+    print("-"*50)
+# --- 5. Test Perceptron ---
+print("Testing trained perceptron (AND):")
+for xi in X:
+    z = np.dot(xi, weights) + bias
+    y_pred = step(z)
+    print(f"Input: {xi}, Predicted: {y_pred}")
+# ===============================================================
+# ===============================================================
+#                   THÊM CHO OR GATE
+# ===============================================================
+# ===============================================================
+print("\n\n===== OR GATE =====")
+# Dữ liệu OR gate
+X_or = np.array([[0,0],
+                 [0,1],
+                 [1,0],
+                 [1,1]])
+y_or = np.array([0, 1, 1, 1])  # output OR
+# Khởi tạo mới weights, bias
+weights = np.random.randn(2)
+bias = np.random.randn()
+# Huấn luyện OR
+for epoch in range(epochs):
+    for xi, yi in zip(X_or, y_or):
+        z = np.dot(xi, weights) + bias
+        y_pred = step(z)
+        error = yi - y_pred
+        weights += learning_rate * error * xi
+        bias += learning_rate * error
+# Test OR
+print("Testing trained perceptron (OR):")
+for xi in X_or:
+    z = np.dot(xi, weights) + bias
+    y_pred = step(z)
+    print(f"Input: {xi}, Predicted: {y_pred}")
+# ===============================================================
+# ===============================================================
+#                   THÊM CHO NOT GATE
+# ===============================================================
+# ===============================================================
+print("\n\n===== NOT GATE =====")
+# Dữ liệu NOT gate (1 input → output đảo)
+X_not = np.array([[0],
+                  [1]])
+y_not = np.array([1, 0])  # NOT
+# Khởi tạo weight, bias (1 chiều)
+weight = np.random.randn(1)
+bias = np.random.randn()
+# Huấn luyện NOT
+for epoch in range(epochs):
+    for xi, yi in zip(X_not, y_not):
+        z = np.dot(xi, weight) + bias
+        y_pred = step(z)
+        error = yi - y_pred
+        weight += learning_rate * error * xi
+        bias += learning_rate * error
+# Test NOT
+print("Testing trained perceptron (NOT):")
+for xi in X_not:
+    z = np.dot(xi, weight) + bias
+    y_pred = step(z)
+    print(f"Input: {xi}, Predicted: {y_pred}")
+'''
+✅ Tóm tắt học thuật:
+| Tiêu chí                  | Single Layer Perceptron (SLP)         | Multi-Layer Perceptron (MLP)                                           |
+| ------------------------- | ------------------------------------- | ---------------------------------------------------------------------- |
+| **Số lớp**                | 1 lớp (input → output)                | Nhiều lớp (input → hidden → output)                                    |
+| **Hàm học**               | Tuyến tính                            | Phi tuyến tính (nhờ lớp ẩn và activation)                              |
+| **Hàm kích hoạt**         | Step function                         | Sigmoid, ReLU, Tanh hoặc các hàm phi tuyến khác                        |
+| **Khả năng học XOR**      | Không                                 | Có                                                                     |
+| **Thuật toán huấn luyện** | Perceptron learning rule              | Backpropagation + gradient descent                                     |
+| **Ứng dụng**              | Phân loại tuyến tính cơ bản (AND, OR, Not) | Classification, regression, nhận dạng hình ảnh, NLP, dữ liệu phi tuyến |
+| **Ưu điểm**               | Đơn giản, dễ hiểu                     | Học được phi tuyến, khả năng biểu diễn cao                             |
+| **Hạn chế**               | Chỉ học tuyến tính                    | Dễ overfitting, cần tuning hyperparameters, tốn tài nguyên             |
+'''
+'''
+Epoch không phải là weight hay bias, nhưng về thuật toán học, nó được coi là hyperparameter.
+Hyperparameter = tham số do con người đặt trước trước khi huấn luy��n (khác với parameter là giá trị học được từ dữ liệu).
+Các hyperparameters phổ biến:
+  Learning rate (𝜂)
+  Batch size
+  Số epoch
+  Số lớp ẩn, số neuron
+  Hàm kích hoạt
+Như vậy: Epoch → hyperparameter, ảnh hưởng trực tiếp đến quá trình huấn luyện.
+3. Epoch ảnh hưởng đến quá trình học như thế nào
+Quá ít epoch → underfitting, model chưa học đủ.
+Quá nhiều epoch → overfitting, model nhớ dữ liệu training quá mức, generalization kém.
+Kết hợp với learning rate → số epoch quyết định model có hội tụ hay không.
+4. Kết luận
+Epoch là hyperparameter, nhưng nó không phải parameter học được từ dữ liệu.
+Chọn epoch phù hợp = một phần quan trọng trong tuning hyperparameters để đạt hiệu quả tối ưu.
+'''

conventional_neural_network.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+Contain the implementation of a simple neural network
+Author: Son Phat Tran
+"""
+import numpy as np
+from utils import sigmoid, sigmoid_derivative
+class ConventionalNeuralNetwork:
+    def __init__(self, input_size, hidden_size):
+        """
+        Create a two-layer neural network
+        NOTE:
+        - The network does not include any bias b
+        - The network uses the sigmoid activation function
+        :param input_size: size of the input vector
+        :param hidden_size: size of the hidden layer
+        :return:
+        """
+        # Cache the size
+        self.input_size = input_size
+        self.hidden_size = hidden_size
+        # Create the layer
+        self.W1 = np.random.normal(size=(self.input_size, self.hidden_size))
+        self.W2 = np.random.normal(size=(self.hidden_size, 1))
+        # Create a cache
+        self.cache = {}
+    def forward(self, x_train, y_train):
+        """
+        Perform the forward pass of the neural network
+        :param x_train: the training input of the neural network
+        :param y_train: the training
+        :return: the output of the neural network
+        """
+        # Calculate the output of the first layer
+        a1 = x_train @ self.W1
+        z1 = sigmoid(a1)
+        # Calculate the output of the second layer
+        a2 = z1 @ self.W2
+        # Cache the values
+        self.cache = {
+            "x_train": x_train,
+            "y_train": y_train,
+            "a1": a1,
+            "z1": z1,
+            "a2": a2
+        }
+        # Calculate the error function
+        score = (1 / 2) * np.sum((y_train.reshape(-1) - a2.reshape(-1)) ** 2) / y_train.shape[0]
+        return a2, score
+    def predict(self, x_test):
+        """
+        Perform the prediction
+        :param x_test: the test points
+        :return: the output prediction
+        """
+        # Calculate the output of the first layer
+        a1 = x_test @ self.W1
+        z1 = sigmoid(a1)
+        # Calculate the output of the second layer
+        a2 = z1 @ self.W2
+        return a2
+    def backward(self, learning_rate):
+        """
+        Perform back-propagation
+        :param learning_rate: Learning rate of back-propagation
+        :return: None
+        """
+        # Get cached values
+        x_train, y_train, a1, z1, a2 = self.cache["x_train"], self.cache["y_train"], \
+            self.cache["a1"], self.cache["z1"], self.cache["a2"]
+        # Calculate the gradient w.r.t a2
+        d_a2 = (a2 - y_train.reshape(-1, 1)).reshape(-1, 1)
+        # Calculate the gradient w.r.t z1
+        d_z1 = d_a2 @ self.W2.T
+        # Calculate the gradient w.r.t W2
+        d_W2 = z1.T @ d_a2
+        # Calculate the gradient w.r.t a1
+        d_a1 = d_z1 * sigmoid_derivative(a1)
+        # Calculate the gradient w.r.t W1
+        d_W1 = x_train.T @ d_a1
+        # Perform back-prop
+        self.W1 -= learning_rate * d_W1
+        self.W2 -= learning_rate * d_W2