File size: 8,284 Bytes
e5dcb32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | // ===========================================================================
// newnet β Neural Network from Scratch
//
// Compile: g++ -std=c++17 -O2 -pthread -o newnet main.cpp
// Run: ./newnet
//
// This trains a small network on the XOR problem to prove:
// 1. Forward pass works (matmul + bias + activation)
// 2. Backward pass works (chain rule, gradient computation)
// 3. Optimizer works (weights update, loss decreases)
// 4. Non-linear problems are solvable (XOR needs hidden layers)
// ===========================================================================
#include "core/tensor.hpp"
#include "core/backend.hpp"
#include "layers/dense.hpp"
#include "graph/graph.hpp"
#include "graph/optimizer.hpp"
#include "loss/loss.hpp"
#include <iostream>
#include <iomanip>
#include <chrono>
#include <string>
using namespace newnet;
// --- Progress bar ---
void print_progress(int epoch, int total_epochs, float loss, float elapsed_ms) {
int bar_width = 30;
float progress = (float)(epoch + 1) / total_epochs;
int filled = (int)(bar_width * progress);
std::cout << "\r [";
for (int i = 0; i < bar_width; i++) {
if (i < filled) std::cout << "β";
else std::cout << "β";
}
std::cout << "] "
<< std::setw(4) << epoch + 1 << "/" << total_epochs
<< " | loss: " << std::fixed << std::setprecision(6) << loss
<< " | " << std::fixed << std::setprecision(1) << elapsed_ms << "ms"
<< std::flush;
}
// --- Print predictions ---
void print_predictions(Sequential& net, const Tensor& input, const Tensor& target) {
Tensor output = net.forward(input);
std::cout << "\n ββββββββββββββββ¬βββββββββββββ¬βββββββββββββ¬ββββββββββ\n";
std::cout << " β Input β Predicted β Target β Correct β\n";
std::cout << " ββββββββββββββββΌβββββββββββββΌβββββββββββββΌββββββββββ€\n";
int correct = 0;
for (int i = 0; i < input.rows(); i++) {
float pred = output(i, 0);
float tgt = target(i, 0);
bool is_correct = (pred > 0.5f) == (tgt > 0.5f);
if (is_correct) correct++;
std::cout << " β ["
<< std::fixed << std::setprecision(0) << input(i, 0) << ", "
<< input(i, 1) << "]"
<< " β " << std::fixed << std::setprecision(4) << pred
<< " β " << std::fixed << std::setprecision(4) << tgt
<< " β " << (is_correct ? "β" : "β") << " β\n";
}
std::cout << " ββββββββββββββββ΄βββββββββββββ΄βββββββββββββ΄ββββββββββ\n";
std::cout << " Accuracy: " << correct << "/" << input.rows()
<< " (" << (100.0f * correct / input.rows()) << "%)\n";
}
int main() {
std::cout << "\n";
std::cout << " βββββββββββββββββββββββββββββββββββββββββββββββββ\n";
std::cout << " β newnet v0.1 β Training Demo β\n";
std::cout << " β Neural Network Engine from Scratch (C++) β\n";
std::cout << " βββββββββββββββββββββββββββββββββββββββββββββββββ\n\n";
std::cout << " Hardware threads: " << std::thread::hardware_concurrency() << "\n";
std::cout << " Backend: CPU (multi-threaded)\n\n";
// =========================================================================
// Dataset: XOR
// This is the simplest non-linear classification problem.
// A single layer (linear model) CANNOT solve this.
// You need at least one hidden layer β proving our NN works.
// =========================================================================
std::cout << " ββ Dataset: XOR ββββββββββββββββββββββββββββββββββ\n\n";
Tensor input({4, 2});
input(0,0) = 0; input(0,1) = 0; // 0 XOR 0 = 0
input(1,0) = 0; input(1,1) = 1; // 0 XOR 1 = 1
input(2,0) = 1; input(2,1) = 0; // 1 XOR 0 = 1
input(3,0) = 1; input(3,1) = 1; // 1 XOR 1 = 0
Tensor target({4, 1});
target(0,0) = 0;
target(1,0) = 1;
target(2,0) = 1;
target(3,0) = 0;
std::cout << " Samples: 4 | Features: 2 | Output: 1 (binary)\n\n";
// =========================================================================
// Model: 2 β 16 (relu) β 8 (relu) β 1 (sigmoid)
// =========================================================================
std::cout << " ββ Model Architecture ββββββββββββββββββββββββββββ\n\n";
std::cout << " Input(2) β Dense(16, relu) β Dense(8, relu) β Dense(1, sigmoid)\n";
std::cout << " Parameters: " << (2*16+16) + (16*8+8) + (8*1+1) << " total\n\n";
Sequential net;
net.add(new Dense(2, 16, "relu"));
net.add(new Dense(16, 8, "relu"));
net.add(new Dense(8, 1, "sigmoid"));
// =========================================================================
// Training
// =========================================================================
std::cout << " ββ Training ββββββββββββββββββββββββββββββββββββββ\n\n";
std::cout << " Optimizer: Adam (lr=0.01)\n";
std::cout << " Loss: MSE\n";
std::cout << " Epochs: 2000\n\n";
Adam optimizer(0.01f);
MSELoss loss_fn;
int epochs = 2000;
auto train_start = std::chrono::high_resolution_clock::now();
float final_loss = 0.0f;
for (int epoch = 0; epoch < epochs; epoch++) {
// 1. Zero gradients from previous iteration
optimizer.zero_grad(net.parameters());
// 2. Forward pass
Tensor output = net.forward(input);
// 3. Compute loss
float loss = loss_fn.forward(output, target);
final_loss = loss;
// 4. Backward pass β compute gradients via chain rule
Tensor grad = loss_fn.backward();
net.backward(grad);
// 5. Update weights
optimizer.step(net.parameters());
// Progress bar update
if (epoch % 50 == 0 || epoch == epochs - 1) {
auto now = std::chrono::high_resolution_clock::now();
float elapsed = std::chrono::duration<float, std::milli>(now - train_start).count();
print_progress(epoch, epochs, loss, elapsed);
}
}
auto train_end = std::chrono::high_resolution_clock::now();
float total_ms = std::chrono::duration<float, std::milli>(train_end - train_start).count();
std::cout << "\n\n Training complete in " << std::fixed << std::setprecision(1)
<< total_ms << "ms\n";
std::cout << " Final loss: " << std::fixed << std::setprecision(6) << final_loss << "\n\n";
// =========================================================================
// Results
// =========================================================================
std::cout << " ββ Predictions βββββββββββββββββββββββββββββββββββ\n";
print_predictions(net, input, target);
std::cout << "\n ββ Summary βββββββββββββββββββββββββββββββββββββββ\n\n";
std::cout << " If predictions are close to targets (>0.5 = 1, <0.5 = 0),\n";
std::cout << " then forward pass, backward pass, and optimizer all work.\n";
std::cout << " XOR is non-linear β a single Dense layer cannot solve it.\n";
std::cout << " The hidden layers learned the non-linear decision boundary.\n\n";
return 0;
}
|