yrrhall commited on
Commit
c55d2d3
·
verified ·
1 Parent(s): 77f4c50

Create self c++

Browse files
Files changed (1) hide show
  1. self c++ +474 -0
self c++ ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <vector>
3
+ #include <cmath>
4
+ #include <stdexcept>
5
+ #include <fstream>
6
+ #include <cstdint>
7
+ #include <memory> // Add this for std::shared_ptr and std::make_shared
8
+
9
+ // Template-based Tensor Class
10
+ template <typename T>
11
+ class Tensor {
12
+ public:
13
+ std::vector<std::vector<std::vector<T>>> data;
14
+ int depth, rows, cols;
15
+
16
+ // Constructor to initialize a tensor with given dimensions
17
+ Tensor(int d = 1, int r = 1, int c = 1) : depth(d), rows(r), cols(c) {
18
+ data.resize(depth, std::vector<std::vector<T>>(rows, std::vector<T>(cols, static_cast<T>(0))));
19
+ }
20
+
21
+ // Function to fill the tensor with random values
22
+ void randomize() {
23
+ for (int i = 0; i < depth; ++i) {
24
+ for (int j = 0; j < rows; ++j) {
25
+ for (int k = 0; k < cols; ++k) {
26
+ data[i][j][k] = static_cast<T>(rand()) / RAND_MAX * 0.1f; // Small random values
27
+ }
28
+ }
29
+ }
30
+ }
31
+
32
+ // Element-wise addition
33
+ Tensor<T> add(const Tensor<T>& other) const {
34
+ if (depth != other.depth || rows != other.rows || cols != other.cols) {
35
+ throw std::invalid_argument("Tensor dimensions do not match for addition.");
36
+ }
37
+
38
+ Tensor<T> result(depth, rows, cols);
39
+ for (int i = 0; i < depth; ++i) {
40
+ for (int j = 0; j < rows; ++j) {
41
+ for (int k = 0; k < cols; ++k) {
42
+ result.data[i][j][k] = data[i][j][k] + other.data[i][j][k];
43
+ }
44
+ }
45
+ }
46
+ return result;
47
+ }
48
+
49
+ // Element-wise subtraction
50
+ Tensor<T> subtract(const Tensor<T>& other) const {
51
+ if (depth != other.depth || rows != other.rows || cols != other.cols) {
52
+ throw std::invalid_argument("Tensor dimensions do not match for subtraction.");
53
+ }
54
+
55
+ Tensor<T> result(depth, rows, cols);
56
+ for (int i = 0; i < depth; ++i) {
57
+ for (int j = 0; j < rows; ++j) {
58
+ for (int k = 0; k < cols; ++k) {
59
+ result.data[i][j][k] = data[i][j][k] - other.data[i][j][k];
60
+ }
61
+ }
62
+ }
63
+ return result;
64
+ }
65
+
66
+ // Element-wise multiplication
67
+ Tensor<T> multiply(const Tensor<T>& other) const {
68
+ if (depth != other.depth || rows != other.rows || cols != other.cols) {
69
+ throw std::invalid_argument("Tensor dimensions do not match for element-wise multiplication.");
70
+ }
71
+
72
+ Tensor<T> result(depth, rows, cols);
73
+ for (int i = 0; i < depth; ++i) {
74
+ for (int j = 0; j < rows; ++j) {
75
+ for (int k = 0; k < cols; ++k) {
76
+ result.data[i][j][k] = data[i][j][k] * other.data[i][j][k];
77
+ }
78
+ }
79
+ }
80
+ return result;
81
+ }
82
+
83
+ // Element-wise multiplication with a scalar
84
+ Tensor<T> multiply(T scalar) const {
85
+ Tensor<T> result(depth, rows, cols);
86
+ for (int i = 0; i < depth; ++i) {
87
+ for (int j = 0; j < rows; ++j) {
88
+ for (int k = 0; k < cols; ++k) {
89
+ result.data[i][j][k] = data[i][j][k] * scalar;
90
+ }
91
+ }
92
+ }
93
+ return result;
94
+ }
95
+
96
+ // Matrix multiplication along the last two dimensions (rows and cols)
97
+ Tensor<T> matmul(const Tensor<T>& other) const {
98
+ if (cols != other.rows) {
99
+ throw std::invalid_argument("Matrix dimensions do not match for multiplication.");
100
+ }
101
+
102
+ Tensor<T> result(depth, rows, other.cols);
103
+ for (int i = 0; i < depth; ++i) {
104
+ for (int j = 0; j < rows; ++j) {
105
+ for (int k = 0; k < other.cols; ++k) {
106
+ for (int l = 0; l < cols; ++l) {
107
+ result.data[i][j][k] += data[i][j][l] * other.data[i][l][k];
108
+ }
109
+ }
110
+ }
111
+ }
112
+ return result;
113
+ }
114
+
115
+ // Transpose tensor (swap rows and columns)
116
+ Tensor<T> transpose() const {
117
+ Tensor<T> result(1, cols, rows); // Fixed dimensions: Depth=1, Rows=cols, Cols=rows
118
+ for (int i = 0; i < depth; ++i) {
119
+ for (int j = 0; j < rows; ++j) {
120
+ for (int k = 0; k < cols; ++k) {
121
+ result.data[0][k][j] = data[i][j][k]; // Fixed indexing
122
+ }
123
+ }
124
+ }
125
+ return result;
126
+ }
127
+
128
+ // Apply ReLU activation function
129
+ Tensor<T> relu() const {
130
+ Tensor<T> result(depth, rows, cols);
131
+ for (int i = 0; i < depth; ++i) {
132
+ for (int j = 0; j < rows; ++j) {
133
+ for (int k = 0; k < cols; ++k) {
134
+ result.data[i][j][k] = std::max(static_cast<T>(0), data[i][j][k]);
135
+ }
136
+ }
137
+ }
138
+ return result;
139
+ }
140
+
141
+ // Apply Softmax activation function
142
+ Tensor<T> softmax() const {
143
+ Tensor<T> result(depth, rows, cols);
144
+ for (int i = 0; i < depth; ++i) {
145
+ T maxVal = data[i][0][0];
146
+ for (int j = 0; j < rows; ++j) {
147
+ for (int k = 0; k < cols; ++k) {
148
+ if (data[i][j][k] > maxVal) {
149
+ maxVal = data[i][j][k];
150
+ }
151
+ }
152
+ }
153
+ T sumExp = 0.0f;
154
+ for (int j = 0; j < rows; ++j) {
155
+ for (int k = 0; k < cols; ++k) {
156
+ sumExp += std::exp(data[i][j][k] - maxVal);
157
+ }
158
+ }
159
+ for (int j = 0; j < rows; ++j) {
160
+ for (int k = 0; k < cols; ++k) {
161
+ result.data[i][j][k] = std::exp(data[i][j][k] - maxVal) / sumExp;
162
+ }
163
+ }
164
+ }
165
+ return result;
166
+ }
167
+ };
168
+
169
+ // Loss Functions
170
+ float crossEntropyLoss(const Tensor<float>& predictions, const Tensor<float>& labels) {
171
+ float loss = 0.0f;
172
+ for (int i = 0; i < predictions.depth; ++i) {
173
+ for (int j = 0; j < predictions.rows; ++j) {
174
+ for (int k = 0; k < predictions.cols; ++k) {
175
+ float pred = predictions.data[i][j][k];
176
+ float label = labels.data[i][j][k];
177
+
178
+ // Ensure predictions are within valid range [epsilon, 1 - epsilon]
179
+ pred = std::max(1e-8f, std::min(1.0f - 1e-8f, pred));
180
+
181
+ // Validate labels
182
+ if (label < 0.0f || label > 1.0f) {
183
+ throw std::runtime_error("Invalid label value in cross entropy loss calculation.");
184
+ }
185
+
186
+ loss -= label * std::log(pred);
187
+ }
188
+ }
189
+ }
190
+ float avgLoss = loss / (predictions.depth * predictions.rows * predictions.cols);
191
+ return avgLoss;
192
+ }
193
+
194
+ // Neural Network Layer Base Class
195
+ class Layer {
196
+ public:
197
+ virtual Tensor<float> forward(const Tensor<float>& input) = 0;
198
+ virtual Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) = 0;
199
+ };
200
+
201
+ // Dense Layer
202
+ class DenseLayer : public Layer {
203
+ private:
204
+ Tensor<float> weights, biases;
205
+ Tensor<float> input;
206
+
207
+ public:
208
+ DenseLayer(int inputSize, int outputSize) {
209
+ weights = Tensor<float>(1, inputSize, outputSize);
210
+ biases = Tensor<float>(1, 1, outputSize);
211
+ weights.randomize();
212
+ biases.randomize();
213
+ }
214
+
215
+ Tensor<float> forward(const Tensor<float>& input) override {
216
+ this->input = input;
217
+ Tensor<float> result = input.matmul(weights).add(biases);
218
+ return result.relu(); // Use ReLU for hidden layers
219
+ }
220
+
221
+ Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) override {
222
+ Tensor<float> transposedWeights = weights.transpose();
223
+ Tensor<float> inputGradient = outputGradient.matmul(transposedWeights);
224
+
225
+ Tensor<float> weightGradient = input.transpose().matmul(outputGradient);
226
+ Tensor<float> biasGradient = outputGradient;
227
+
228
+ weights = weights.subtract(weightGradient.multiply(learningRate));
229
+ biases = biases.subtract(biasGradient.multiply(learningRate));
230
+
231
+ return inputGradient;
232
+ }
233
+ };
234
+
235
+ // Batch Normalization Layer
236
+ class BatchNormLayer : public Layer {
237
+ private:
238
+ Tensor<float> gamma, beta;
239
+ Tensor<float> runningMean, runningVariance;
240
+ float momentum;
241
+
242
+ public:
243
+ BatchNormLayer(int size, float momentum = 0.9f) : momentum(momentum) {
244
+ gamma = Tensor<float>(1, 1, size);
245
+ beta = Tensor<float>(1, 1, size);
246
+ runningMean = Tensor<float>(1, 1, size);
247
+ runningVariance = Tensor<float>(1, 1, size);
248
+ gamma.randomize();
249
+ beta.randomize();
250
+ }
251
+
252
+ Tensor<float> forward(const Tensor<float>& input) override {
253
+ // Calculate mean and variance
254
+ Tensor<float> mean = Tensor<float>(1, 1, input.cols);
255
+ Tensor<float> variance = Tensor<float>(1, 1, input.cols);
256
+ for (int k = 0; k < input.cols; ++k) {
257
+ float sum = 0.0f;
258
+ for (int i = 0; i < input.depth; ++i) {
259
+ for (int j = 0; j < input.rows; ++j) {
260
+ sum += input.data[i][j][k];
261
+ }
262
+ }
263
+ mean.data[0][0][k] = sum / (input.depth * input.rows);
264
+
265
+ float varSum = 0.0f;
266
+ for (int i = 0; i < input.depth; ++i) {
267
+ for (int j = 0; j < input.rows; ++j) {
268
+ varSum += std::pow(input.data[i][j][k] - mean.data[0][0][k], 2);
269
+ }
270
+ }
271
+ variance.data[0][0][k] = varSum / (input.depth * input.rows);
272
+ }
273
+
274
+ // Update running mean and variance
275
+ runningMean = runningMean.multiply(momentum).add(mean.multiply(1.0f - momentum));
276
+ runningVariance = runningVariance.multiply(momentum).add(variance.multiply(1.0f - momentum));
277
+
278
+ // Normalize input
279
+ Tensor<float> normalized = input;
280
+ for (int k = 0; k < input.cols; ++k) {
281
+ for (int i = 0; i < input.depth; ++i) {
282
+ for (int j = 0; j < input.rows; ++j) {
283
+ normalized.data[i][j][k] = (input.data[i][j][k] - mean.data[0][0][k]) /
284
+ std::sqrt(variance.data[0][0][k] + 1e-8f);
285
+ }
286
+ }
287
+ }
288
+
289
+ // Scale and shift
290
+ Tensor<float> result = normalized.multiply(gamma).add(beta);
291
+ return result;
292
+ }
293
+
294
+ Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) override {
295
+ // Simplified backpropagation for batch normalization
296
+ return outputGradient;
297
+ }
298
+ };
299
+
300
+ // Neural Network
301
+ class NeuralNetwork {
302
+ private:
303
+ std::vector<std::shared_ptr<Layer>> layers; // Define layers as a vector of shared pointers
304
+
305
+ public:
306
+ void addLayer(std::shared_ptr<Layer> layer) {
307
+ layers.push_back(layer);
308
+ }
309
+
310
+ Tensor<float> forward(const Tensor<float>& input) {
311
+ Tensor<float> output = input;
312
+ for (const auto& layer : layers) {
313
+ output = layer->forward(output);
314
+ }
315
+ return output;
316
+ }
317
+
318
+ void train(const std::vector<Tensor<float>>& inputs, const std::vector<Tensor<float>>& labels, int epochs, float learningRate) {
319
+ if (inputs.empty() || labels.empty() || inputs.size() != labels.size()) {
320
+ throw std::invalid_argument("Inputs and labels must be non-empty and have the same size.");
321
+ }
322
+
323
+ std::vector<float> losses; // To store loss values for plotting
324
+
325
+ for (int epoch = 0; epoch < epochs; ++epoch) {
326
+ float totalLoss = 0.0f;
327
+ for (size_t i = 0; i < inputs.size(); ++i) {
328
+ Tensor<float> output = forward(inputs[i]);
329
+ float loss = ::crossEntropyLoss(output, labels[i]);
330
+ totalLoss += loss;
331
+
332
+ // Compute gradients (example)
333
+ Tensor<float> gradients = output.subtract(labels[i]);
334
+
335
+ // Backpropagation
336
+ for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
337
+ gradients = (*it)->backward(gradients, learningRate);
338
+ }
339
+ }
340
+ float avgLoss = totalLoss / inputs.size();
341
+ losses.push_back(avgLoss);
342
+ std::cout << "Epoch " << epoch + 1 << ", Loss: " << avgLoss << std::endl;
343
+ }
344
+
345
+ // Save losses to a file for plotting
346
+ std::ofstream lossFile("losses.txt");
347
+ for (float loss : losses) {
348
+ lossFile << loss << "\n";
349
+ }
350
+ lossFile.close();
351
+ }
352
+
353
+ // Evaluate the model on test data
354
+ float evaluate(const std::vector<Tensor<float>>& inputs, const std::vector<Tensor<float>>& labels) {
355
+ if (inputs.empty() || labels.empty() || inputs.size() != labels.size()) {
356
+ throw std::invalid_argument("Inputs and labels must be non-empty and have the same size.");
357
+ }
358
+
359
+ int correctPredictions = 0;
360
+ for (size_t i = 0; i < inputs.size(); ++i) {
361
+ Tensor<float> output = forward(inputs[i]);
362
+ Tensor<float> label = labels[i];
363
+
364
+ // Find the index of the maximum value in the output and label
365
+ int predictedClass = 0, trueClass = 0;
366
+ float maxOutput = output.data[0][0][0], maxLabel = label.data[0][0][0];
367
+ for (int k = 0; k < output.cols; ++k) {
368
+ if (output.data[0][0][k] > maxOutput) {
369
+ maxOutput = output.data[0][0][k];
370
+ predictedClass = k;
371
+ }
372
+ if (label.data[0][0][k] > maxLabel) {
373
+ maxLabel = label.data[0][0][k];
374
+ trueClass = k;
375
+ }
376
+ }
377
+
378
+ if (predictedClass == trueClass) {
379
+ ++correctPredictions;
380
+ }
381
+ }
382
+
383
+ float accuracy = static_cast<float>(correctPredictions) / inputs.size();
384
+ std::cout << "Accuracy: " << accuracy * 100.0f << "%" << std::endl;
385
+ return accuracy;
386
+ }
387
+ };
388
+
389
+ // Function to load MNIST dataset from binary files
390
+ std::pair<std::vector<Tensor<float>>, std::vector<Tensor<float>>> loadMNIST(const std::string& imageFile, const std::string& labelFile) {
391
+ std::vector<Tensor<float>> images;
392
+ std::vector<Tensor<float>> labels;
393
+
394
+ // Load images
395
+ std::ifstream imageStream(imageFile, std::ios::binary);
396
+ if (!imageStream) {
397
+ throw std::runtime_error("Failed to open image file.");
398
+ }
399
+
400
+ // Read image file header
401
+ uint32_t magicNumber, numImages, numRows, numCols;
402
+ imageStream.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
403
+ imageStream.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
404
+ imageStream.read(reinterpret_cast<char*>(&numRows), sizeof(numRows));
405
+ imageStream.read(reinterpret_cast<char*>(&numCols), sizeof(numCols));
406
+
407
+ magicNumber = __builtin_bswap32(magicNumber); // Convert from big-endian to little-endian
408
+ numImages = __builtin_bswap32(numImages);
409
+ numRows = __builtin_bswap32(numRows);
410
+ numCols = __builtin_bswap32(numCols);
411
+
412
+ for (uint32_t i = 0; i < numImages; ++i) {
413
+ Tensor<float> image(1, 1, numRows * numCols);
414
+ for (int j = 0; j < numRows * numCols; ++j) {
415
+ unsigned char pixel;
416
+ imageStream.read(reinterpret_cast<char*>(&pixel), sizeof(pixel));
417
+ image.data[0][0][j] = static_cast<float>(pixel) / 255.0f; // Normalize to [0, 1]
418
+ }
419
+ images.push_back(image);
420
+ }
421
+
422
+ // Load labels
423
+ std::ifstream labelStream(labelFile, std::ios::binary);
424
+ if (!labelStream) {
425
+ throw std::runtime_error("Failed to open label file.");
426
+ }
427
+
428
+ // Read label file header
429
+ uint32_t labelMagicNumber, numLabels;
430
+ labelStream.read(reinterpret_cast<char*>(&labelMagicNumber), sizeof(labelMagicNumber));
431
+ labelStream.read(reinterpret_cast<char*>(&numLabels), sizeof(numLabels));
432
+
433
+ labelMagicNumber = __builtin_bswap32(labelMagicNumber);
434
+ numLabels = __builtin_bswap32(numLabels);
435
+
436
+ for (uint32_t i = 0; i < numLabels; ++i) {
437
+ Tensor<float> label(1, 1, 10); // One-hot encoding for 10 classes
438
+ unsigned char labelValue;
439
+ labelStream.read(reinterpret_cast<char*>(&labelValue), sizeof(labelValue));
440
+ label.data[0][0][labelValue] = 1.0f; // Set the corresponding class to 1
441
+ labels.push_back(label);
442
+ }
443
+
444
+ return {images, labels};
445
+ }
446
+
447
+ // Main Function
448
+ int main() {
449
+ try {
450
+ // Load MNIST dataset
451
+ auto [images, labels] = loadMNIST("train-images.idx3-ubyte", "train-labels.idx1-ubyte");
452
+
453
+ // Create neural network
454
+ NeuralNetwork nn;
455
+ nn.addLayer(std::make_shared<DenseLayer>(784, 256)); // Hidden layer with 256 neurons
456
+ nn.addLayer(std::make_shared<BatchNormLayer>(256)); // Batch Normalization layer
457
+ nn.addLayer(std::make_shared<DenseLayer>(256, 128)); // Another hidden layer with 128 neurons
458
+ nn.addLayer(std::make_shared<BatchNormLayer>(128)); // Batch Normalization layer
459
+ nn.addLayer(std::make_shared<DenseLayer>(128, 64)); // Another hidden layer with 64 neurons
460
+ nn.addLayer(std::make_shared<DenseLayer>(64, 10)); // Output layer with 10 neurons
461
+
462
+ // Train neural network
463
+ nn.train(images, labels, 20, 0.001); // Train for 20 epochs with learning rate 0.001
464
+
465
+ // Evaluate the model
466
+ nn.evaluate(images, labels);
467
+
468
+ // Note: Plot the losses using Python's Matplotlib by reading "losses.txt"
469
+ } catch (const std::exception& e) {
470
+ std::cerr << "Error: " << e.what() << std::endl;
471
+ }
472
+
473
+ return 0;
474
+ }