yrrhall
/

yazan_2

Model card Files Files and versions

xet

Community

yrrhall commited on Feb 8, 2025

Commit

c55d2d3

verified ·

1 Parent(s): 77f4c50

Create self c++

Browse files

Files changed (1) hide show

self c++ +474 -0

self c++ ADDED Viewed

	@@ -0,0 +1,474 @@

+#include <iostream>
+#include <vector>
+#include <cmath>
+#include <stdexcept>
+#include <fstream>
+#include <cstdint>
+#include <memory> // Add this for std::shared_ptr and std::make_shared
+// Template-based Tensor Class
+template <typename T>
+class Tensor {
+public:
+    std::vector<std::vector<std::vector<T>>> data;
+    int depth, rows, cols;
+    // Constructor to initialize a tensor with given dimensions
+    Tensor(int d = 1, int r = 1, int c = 1) : depth(d), rows(r), cols(c) {
+        data.resize(depth, std::vector<std::vector<T>>(rows, std::vector<T>(cols, static_cast<T>(0))));
+    }
+    // Function to fill the tensor with random values
+    void randomize() {
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    data[i][j][k] = static_cast<T>(rand()) / RAND_MAX * 0.1f; // Small random values
+                }
+            }
+        }
+    }
+    // Element-wise addition
+    Tensor<T> add(const Tensor<T>& other) const {
+        if (depth != other.depth || rows != other.rows || cols != other.cols) {
+            throw std::invalid_argument("Tensor dimensions do not match for addition.");
+        }
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = data[i][j][k] + other.data[i][j][k];
+                }
+            }
+        }
+        return result;
+    }
+    // Element-wise subtraction
+    Tensor<T> subtract(const Tensor<T>& other) const {
+        if (depth != other.depth || rows != other.rows || cols != other.cols) {
+            throw std::invalid_argument("Tensor dimensions do not match for subtraction.");
+        }
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = data[i][j][k] - other.data[i][j][k];
+                }
+            }
+        }
+        return result;
+    }
+    // Element-wise multiplication
+    Tensor<T> multiply(const Tensor<T>& other) const {
+        if (depth != other.depth || rows != other.rows || cols != other.cols) {
+            throw std::invalid_argument("Tensor dimensions do not match for element-wise multiplication.");
+        }
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = data[i][j][k] * other.data[i][j][k];
+                }
+            }
+        }
+        return result;
+    }
+    // Element-wise multiplication with a scalar
+    Tensor<T> multiply(T scalar) const {
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = data[i][j][k] * scalar;
+                }
+            }
+        }
+        return result;
+    }
+    // Matrix multiplication along the last two dimensions (rows and cols)
+    Tensor<T> matmul(const Tensor<T>& other) const {
+        if (cols != other.rows) {
+            throw std::invalid_argument("Matrix dimensions do not match for multiplication.");
+        }
+        Tensor<T> result(depth, rows, other.cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < other.cols; ++k) {
+                    for (int l = 0; l < cols; ++l) {
+                        result.data[i][j][k] += data[i][j][l] * other.data[i][l][k];
+                    }
+                }
+            }
+        }
+        return result;
+    }
+    // Transpose tensor (swap rows and columns)
+    Tensor<T> transpose() const {
+        Tensor<T> result(1, cols, rows); // Fixed dimensions: Depth=1, Rows=cols, Cols=rows
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[0][k][j] = data[i][j][k]; // Fixed indexing
+                }
+            }
+        }
+        return result;
+    }
+    // Apply ReLU activation function
+    Tensor<T> relu() const {
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = std::max(static_cast<T>(0), data[i][j][k]);
+                }
+            }
+        }
+        return result;
+    }
+    // Apply Softmax activation function
+    Tensor<T> softmax() const {
+        Tensor<T> result(depth, rows, cols);
+        for (int i = 0; i < depth; ++i) {
+            T maxVal = data[i][0][0];
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    if (data[i][j][k] > maxVal) {
+                        maxVal = data[i][j][k];
+                    }
+                }
+            }
+            T sumExp = 0.0f;
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    sumExp += std::exp(data[i][j][k] - maxVal);
+                }
+            }
+            for (int j = 0; j < rows; ++j) {
+                for (int k = 0; k < cols; ++k) {
+                    result.data[i][j][k] = std::exp(data[i][j][k] - maxVal) / sumExp;
+                }
+            }
+        }
+        return result;
+    }
+};
+// Loss Functions
+float crossEntropyLoss(const Tensor<float>& predictions, const Tensor<float>& labels) {
+    float loss = 0.0f;
+    for (int i = 0; i < predictions.depth; ++i) {
+        for (int j = 0; j < predictions.rows; ++j) {
+            for (int k = 0; k < predictions.cols; ++k) {
+                float pred = predictions.data[i][j][k];
+                float label = labels.data[i][j][k];
+                // Ensure predictions are within valid range [epsilon, 1 - epsilon]
+                pred = std::max(1e-8f, std::min(1.0f - 1e-8f, pred));
+                // Validate labels
+                if (label < 0.0f || label > 1.0f) {
+                    throw std::runtime_error("Invalid label value in cross entropy loss calculation.");
+                }
+                loss -= label * std::log(pred);
+            }
+        }
+    }
+    float avgLoss = loss / (predictions.depth * predictions.rows * predictions.cols);
+    return avgLoss;
+}
+// Neural Network Layer Base Class
+class Layer {
+public:
+    virtual Tensor<float> forward(const Tensor<float>& input) = 0;
+    virtual Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) = 0;
+};
+// Dense Layer
+class DenseLayer : public Layer {
+private:
+    Tensor<float> weights, biases;
+    Tensor<float> input;
+public:
+    DenseLayer(int inputSize, int outputSize) {
+        weights = Tensor<float>(1, inputSize, outputSize);
+        biases = Tensor<float>(1, 1, outputSize);
+        weights.randomize();
+        biases.randomize();
+    }
+    Tensor<float> forward(const Tensor<float>& input) override {
+        this->input = input;
+        Tensor<float> result = input.matmul(weights).add(biases);
+        return result.relu(); // Use ReLU for hidden layers
+    }
+    Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) override {
+        Tensor<float> transposedWeights = weights.transpose();
+        Tensor<float> inputGradient = outputGradient.matmul(transposedWeights);
+        Tensor<float> weightGradient = input.transpose().matmul(outputGradient);
+        Tensor<float> biasGradient = outputGradient;
+        weights = weights.subtract(weightGradient.multiply(learningRate));
+        biases = biases.subtract(biasGradient.multiply(learningRate));
+        return inputGradient;
+    }
+};
+// Batch Normalization Layer
+class BatchNormLayer : public Layer {
+private:
+    Tensor<float> gamma, beta;
+    Tensor<float> runningMean, runningVariance;
+    float momentum;
+public:
+    BatchNormLayer(int size, float momentum = 0.9f) : momentum(momentum) {
+        gamma = Tensor<float>(1, 1, size);
+        beta = Tensor<float>(1, 1, size);
+        runningMean = Tensor<float>(1, 1, size);
+        runningVariance = Tensor<float>(1, 1, size);
+        gamma.randomize();
+        beta.randomize();
+    }
+    Tensor<float> forward(const Tensor<float>& input) override {
+        // Calculate mean and variance
+        Tensor<float> mean = Tensor<float>(1, 1, input.cols);
+        Tensor<float> variance = Tensor<float>(1, 1, input.cols);
+        for (int k = 0; k < input.cols; ++k) {
+            float sum = 0.0f;
+            for (int i = 0; i < input.depth; ++i) {
+                for (int j = 0; j < input.rows; ++j) {
+                    sum += input.data[i][j][k];
+                }
+            }
+            mean.data[0][0][k] = sum / (input.depth * input.rows);
+            float varSum = 0.0f;
+            for (int i = 0; i < input.depth; ++i) {
+                for (int j = 0; j < input.rows; ++j) {
+                    varSum += std::pow(input.data[i][j][k] - mean.data[0][0][k], 2);
+                }
+            }
+            variance.data[0][0][k] = varSum / (input.depth * input.rows);
+        }
+        // Update running mean and variance
+        runningMean = runningMean.multiply(momentum).add(mean.multiply(1.0f - momentum));
+        runningVariance = runningVariance.multiply(momentum).add(variance.multiply(1.0f - momentum));
+        // Normalize input
+        Tensor<float> normalized = input;
+        for (int k = 0; k < input.cols; ++k) {
+            for (int i = 0; i < input.depth; ++i) {
+                for (int j = 0; j < input.rows; ++j) {
+                    normalized.data[i][j][k] = (input.data[i][j][k] - mean.data[0][0][k]) /
+                                               std::sqrt(variance.data[0][0][k] + 1e-8f);
+                }
+            }
+        }
+        // Scale and shift
+        Tensor<float> result = normalized.multiply(gamma).add(beta);
+        return result;
+    }
+    Tensor<float> backward(const Tensor<float>& outputGradient, float learningRate) override {
+        // Simplified backpropagation for batch normalization
+        return outputGradient;
+    }
+};
+// Neural Network
+class NeuralNetwork {
+private:
+    std::vector<std::shared_ptr<Layer>> layers; // Define layers as a vector of shared pointers
+public:
+    void addLayer(std::shared_ptr<Layer> layer) {
+        layers.push_back(layer);
+    }
+    Tensor<float> forward(const Tensor<float>& input) {
+        Tensor<float> output = input;
+        for (const auto& layer : layers) {
+            output = layer->forward(output);
+        }
+        return output;
+    }
+    void train(const std::vector<Tensor<float>>& inputs, const std::vector<Tensor<float>>& labels, int epochs, float learningRate) {
+        if (inputs.empty() || labels.empty() || inputs.size() != labels.size()) {
+            throw std::invalid_argument("Inputs and labels must be non-empty and have the same size.");
+        }
+        std::vector<float> losses; // To store loss values for plotting
+        for (int epoch = 0; epoch < epochs; ++epoch) {
+            float totalLoss = 0.0f;
+            for (size_t i = 0; i < inputs.size(); ++i) {
+                Tensor<float> output = forward(inputs[i]);
+                float loss = ::crossEntropyLoss(output, labels[i]);
+                totalLoss += loss;
+                // Compute gradients (example)
+                Tensor<float> gradients = output.subtract(labels[i]);
+                // Backpropagation
+                for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
+                    gradients = (*it)->backward(gradients, learningRate);
+                }
+            }
+            float avgLoss = totalLoss / inputs.size();
+            losses.push_back(avgLoss);
+            std::cout << "Epoch " << epoch + 1 << ", Loss: " << avgLoss << std::endl;
+        }
+        // Save losses to a file for plotting
+        std::ofstream lossFile("losses.txt");
+        for (float loss : losses) {
+            lossFile << loss << "\n";
+        }
+        lossFile.close();
+    }
+    // Evaluate the model on test data
+    float evaluate(const std::vector<Tensor<float>>& inputs, const std::vector<Tensor<float>>& labels) {
+        if (inputs.empty() || labels.empty() || inputs.size() != labels.size()) {
+            throw std::invalid_argument("Inputs and labels must be non-empty and have the same size.");
+        }
+        int correctPredictions = 0;
+        for (size_t i = 0; i < inputs.size(); ++i) {
+            Tensor<float> output = forward(inputs[i]);
+            Tensor<float> label = labels[i];
+            // Find the index of the maximum value in the output and label
+            int predictedClass = 0, trueClass = 0;
+            float maxOutput = output.data[0][0][0], maxLabel = label.data[0][0][0];
+            for (int k = 0; k < output.cols; ++k) {
+                if (output.data[0][0][k] > maxOutput) {
+                    maxOutput = output.data[0][0][k];
+                    predictedClass = k;
+                }
+                if (label.data[0][0][k] > maxLabel) {
+                    maxLabel = label.data[0][0][k];
+                    trueClass = k;
+                }
+            }
+            if (predictedClass == trueClass) {
+                ++correctPredictions;
+            }
+        }
+        float accuracy = static_cast<float>(correctPredictions) / inputs.size();
+        std::cout << "Accuracy: " << accuracy * 100.0f << "%" << std::endl;
+        return accuracy;
+    }
+};
+// Function to load MNIST dataset from binary files
+std::pair<std::vector<Tensor<float>>, std::vector<Tensor<float>>> loadMNIST(const std::string& imageFile, const std::string& labelFile) {
+    std::vector<Tensor<float>> images;
+    std::vector<Tensor<float>> labels;
+    // Load images
+    std::ifstream imageStream(imageFile, std::ios::binary);
+    if (!imageStream) {
+        throw std::runtime_error("Failed to open image file.");
+    }
+    // Read image file header
+    uint32_t magicNumber, numImages, numRows, numCols;
+    imageStream.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
+    imageStream.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
+    imageStream.read(reinterpret_cast<char*>(&numRows), sizeof(numRows));
+    imageStream.read(reinterpret_cast<char*>(&numCols), sizeof(numCols));
+    magicNumber = __builtin_bswap32(magicNumber); // Convert from big-endian to little-endian
+    numImages = __builtin_bswap32(numImages);
+    numRows = __builtin_bswap32(numRows);
+    numCols = __builtin_bswap32(numCols);
+    for (uint32_t i = 0; i < numImages; ++i) {
+        Tensor<float> image(1, 1, numRows * numCols);
+        for (int j = 0; j < numRows * numCols; ++j) {
+            unsigned char pixel;
+            imageStream.read(reinterpret_cast<char*>(&pixel), sizeof(pixel));
+            image.data[0][0][j] = static_cast<float>(pixel) / 255.0f; // Normalize to [0, 1]
+        }
+        images.push_back(image);
+    }
+    // Load labels
+    std::ifstream labelStream(labelFile, std::ios::binary);
+    if (!labelStream) {
+        throw std::runtime_error("Failed to open label file.");
+    }
+    // Read label file header
+    uint32_t labelMagicNumber, numLabels;
+    labelStream.read(reinterpret_cast<char*>(&labelMagicNumber), sizeof(labelMagicNumber));
+    labelStream.read(reinterpret_cast<char*>(&numLabels), sizeof(numLabels));
+    labelMagicNumber = __builtin_bswap32(labelMagicNumber);
+    numLabels = __builtin_bswap32(numLabels);
+    for (uint32_t i = 0; i < numLabels; ++i) {
+        Tensor<float> label(1, 1, 10); // One-hot encoding for 10 classes
+        unsigned char labelValue;
+        labelStream.read(reinterpret_cast<char*>(&labelValue), sizeof(labelValue));
+        label.data[0][0][labelValue] = 1.0f; // Set the corresponding class to 1
+        labels.push_back(label);
+    }
+    return {images, labels};
+}
+// Main Function
+int main() {
+    try {
+        // Load MNIST dataset
+        auto [images, labels] = loadMNIST("train-images.idx3-ubyte", "train-labels.idx1-ubyte");
+        // Create neural network
+        NeuralNetwork nn;
+        nn.addLayer(std::make_shared<DenseLayer>(784, 256)); // Hidden layer with 256 neurons
+        nn.addLayer(std::make_shared<BatchNormLayer>(256));  // Batch Normalization layer
+        nn.addLayer(std::make_shared<DenseLayer>(256, 128)); // Another hidden layer with 128 neurons
+        nn.addLayer(std::make_shared<BatchNormLayer>(128));  // Batch Normalization layer
+        nn.addLayer(std::make_shared<DenseLayer>(128, 64));  // Another hidden layer with 64 neurons
+        nn.addLayer(std::make_shared<DenseLayer>(64, 10));   // Output layer with 10 neurons
+        // Train neural network
+        nn.train(images, labels, 20, 0.001); // Train for 20 epochs with learning rate 0.001
+        // Evaluate the model
+        nn.evaluate(images, labels);
+        // Note: Plot the losses using Python's Matplotlib by reading "losses.txt"
+    } catch (const std::exception& e) {
+        std::cerr << "Error: " << e.what() << std::endl;
+    }
+    return 0;
+}