File size: 8,284 Bytes
e5dcb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// ===========================================================================
// newnet β€” Neural Network from Scratch
// 
// Compile: g++ -std=c++17 -O2 -pthread -o newnet main.cpp
// Run:     ./newnet
//
// This trains a small network on the XOR problem to prove:
// 1. Forward pass works (matmul + bias + activation)
// 2. Backward pass works (chain rule, gradient computation)
// 3. Optimizer works (weights update, loss decreases)
// 4. Non-linear problems are solvable (XOR needs hidden layers)
// ===========================================================================

#include "core/tensor.hpp"
#include "core/backend.hpp"
#include "layers/dense.hpp"
#include "graph/graph.hpp"
#include "graph/optimizer.hpp"
#include "loss/loss.hpp"

#include <iostream>
#include <iomanip>
#include <chrono>
#include <string>

using namespace newnet;

// --- Progress bar ---
void print_progress(int epoch, int total_epochs, float loss, float elapsed_ms) {
    int bar_width = 30;
    float progress = (float)(epoch + 1) / total_epochs;
    int filled = (int)(bar_width * progress);
    
    std::cout << "\r  [";
    for (int i = 0; i < bar_width; i++) {
        if (i < filled) std::cout << "β–ˆ";
        else std::cout << "β–‘";
    }
    std::cout << "] " 
              << std::setw(4) << epoch + 1 << "/" << total_epochs
              << " | loss: " << std::fixed << std::setprecision(6) << loss
              << " | " << std::fixed << std::setprecision(1) << elapsed_ms << "ms"
              << std::flush;
}

// --- Print predictions ---
void print_predictions(Sequential& net, const Tensor& input, const Tensor& target) {
    Tensor output = net.forward(input);
    
    std::cout << "\n  β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”\n";
    std::cout << "  β”‚    Input     β”‚  Predicted β”‚   Target   β”‚ Correct β”‚\n";
    std::cout << "  β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n";
    
    int correct = 0;
    for (int i = 0; i < input.rows(); i++) {
        float pred = output(i, 0);
        float tgt = target(i, 0);
        bool is_correct = (pred > 0.5f) == (tgt > 0.5f);
        if (is_correct) correct++;
        
        std::cout << "  β”‚  [" 
                  << std::fixed << std::setprecision(0) << input(i, 0) << ", " 
                  << input(i, 1) << "]"
                  << "      β”‚   " << std::fixed << std::setprecision(4) << pred
                  << "   β”‚   " << std::fixed << std::setprecision(4) << tgt
                  << "   β”‚   " << (is_correct ? "βœ“" : "βœ—") << "   β”‚\n";
    }
    
    std::cout << "  β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n";
    std::cout << "  Accuracy: " << correct << "/" << input.rows() 
              << " (" << (100.0f * correct / input.rows()) << "%)\n";
}

int main() {
    std::cout << "\n";
    std::cout << "  ╔═══════════════════════════════════════════════╗\n";
    std::cout << "  β•‘         newnet v0.1 β€” Training Demo           β•‘\n";
    std::cout << "  β•‘    Neural Network Engine from Scratch (C++)   β•‘\n";
    std::cout << "  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•\n\n";
    
    std::cout << "  Hardware threads: " << std::thread::hardware_concurrency() << "\n";
    std::cout << "  Backend: CPU (multi-threaded)\n\n";
    
    // =========================================================================
    // Dataset: XOR
    // This is the simplest non-linear classification problem.
    // A single layer (linear model) CANNOT solve this.
    // You need at least one hidden layer β€” proving our NN works.
    // =========================================================================
    
    std::cout << "  ── Dataset: XOR ──────────────────────────────────\n\n";
    
    Tensor input({4, 2});
    input(0,0) = 0; input(0,1) = 0;  // 0 XOR 0 = 0
    input(1,0) = 0; input(1,1) = 1;  // 0 XOR 1 = 1
    input(2,0) = 1; input(2,1) = 0;  // 1 XOR 0 = 1
    input(3,0) = 1; input(3,1) = 1;  // 1 XOR 1 = 0
    
    Tensor target({4, 1});
    target(0,0) = 0;
    target(1,0) = 1;
    target(2,0) = 1;
    target(3,0) = 0;
    
    std::cout << "  Samples: 4  |  Features: 2  |  Output: 1 (binary)\n\n";
    
    // =========================================================================
    // Model: 2 β†’ 16 (relu) β†’ 8 (relu) β†’ 1 (sigmoid)
    // =========================================================================
    
    std::cout << "  ── Model Architecture ────────────────────────────\n\n";
    std::cout << "  Input(2) β†’ Dense(16, relu) β†’ Dense(8, relu) β†’ Dense(1, sigmoid)\n";
    std::cout << "  Parameters: " << (2*16+16) + (16*8+8) + (8*1+1) << " total\n\n";
    
    Sequential net;
    net.add(new Dense(2, 16, "relu"));
    net.add(new Dense(16, 8, "relu"));
    net.add(new Dense(8, 1, "sigmoid"));
    
    // =========================================================================
    // Training
    // =========================================================================
    
    std::cout << "  ── Training ──────────────────────────────────────\n\n";
    std::cout << "  Optimizer: Adam (lr=0.01)\n";
    std::cout << "  Loss: MSE\n";
    std::cout << "  Epochs: 2000\n\n";
    
    Adam optimizer(0.01f);
    MSELoss loss_fn;
    
    int epochs = 2000;
    auto train_start = std::chrono::high_resolution_clock::now();
    
    float final_loss = 0.0f;
    
    for (int epoch = 0; epoch < epochs; epoch++) {
        // 1. Zero gradients from previous iteration
        optimizer.zero_grad(net.parameters());
        
        // 2. Forward pass
        Tensor output = net.forward(input);
        
        // 3. Compute loss
        float loss = loss_fn.forward(output, target);
        final_loss = loss;
        
        // 4. Backward pass β€” compute gradients via chain rule
        Tensor grad = loss_fn.backward();
        net.backward(grad);
        
        // 5. Update weights
        optimizer.step(net.parameters());
        
        // Progress bar update
        if (epoch % 50 == 0 || epoch == epochs - 1) {
            auto now = std::chrono::high_resolution_clock::now();
            float elapsed = std::chrono::duration<float, std::milli>(now - train_start).count();
            print_progress(epoch, epochs, loss, elapsed);
        }
    }
    
    auto train_end = std::chrono::high_resolution_clock::now();
    float total_ms = std::chrono::duration<float, std::milli>(train_end - train_start).count();
    
    std::cout << "\n\n  Training complete in " << std::fixed << std::setprecision(1) 
              << total_ms << "ms\n";
    std::cout << "  Final loss: " << std::fixed << std::setprecision(6) << final_loss << "\n\n";
    
    // =========================================================================
    // Results
    // =========================================================================
    
    std::cout << "  ── Predictions ───────────────────────────────────\n";
    print_predictions(net, input, target);
    
    std::cout << "\n  ── Summary ───────────────────────────────────────\n\n";
    std::cout << "  If predictions are close to targets (>0.5 = 1, <0.5 = 0),\n";
    std::cout << "  then forward pass, backward pass, and optimizer all work.\n";
    std::cout << "  XOR is non-linear β€” a single Dense layer cannot solve it.\n";
    std::cout << "  The hidden layers learned the non-linear decision boundary.\n\n";
    
    return 0;
}