Spaces:

appvoid
/

carbono

Running

App Files Files Community

appvoid commited on Nov 13, 2024

Commit

9bfdf90

verified ·

1 Parent(s): 7666f30

Update index.html

Browse files

Files changed (1) hide show

index.html +144 -477

index.html CHANGED Viewed

@@ -318,486 +318,170 @@
   </div>
   <script>
-class ReinforcementModule {
-  constructor(network, options = {}) {
-    this.network = network;
-    this.options = {
-      memorySize: options.memorySize || 128,
-      batchSize: options.batchSize || 16,
-      learningRate: options.learningRate || 0.01,
-      gamma: options.gamma || 0.9,
-      epsilon: options.epsilon || 1,
-      epsilonMin: options.epsilonMin || 0.01,
-      epsilonDecay: options.epsilonDecay || 0.95,
-      weightUpdateRange: options.weightUpdateRange || 0.02,
-      actionSpace: options.actionSpace || 2048,
-      memoryLayerSize: options.memoryLayerSize || 32,
-      predictionHorizon: options.predictionHorizon || 16,
-      memoryCellDecay: options.memoryCellDecay || 0.9
-    };
-    // Initialize memory cells
-    this.memoryCells = {
-      shortTerm: new Array(this.options.memoryLayerSize).fill(0),
-      longTerm: new Array(this.options.memoryLayerSize).fill(0),
-      cellState: new Array(this.options.memoryLayerSize).fill(0)
-    };
-    // Initialize gates and networks
-    this.gates = {
-      forget: this.createGateNetwork(this.options.memoryLayerSize),
-      input: this.createGateNetwork(this.options.memoryLayerSize),
-      output: this.createGateNetwork(this.options.memoryLayerSize),
-      candidates: this.createGateNetwork(this.options.memoryLayerSize)
-    };
-    this.memory = [];
-    this.currentState = this.getNetworkState();
-    this.bestWeights = this.cloneWeights(network.weights);
-    this.bestLoss = Infinity;
-    this.epsilon = this.options.epsilon;
-    this.qNetwork = this.createQNetwork();
-    this.outcomePredictor = this.createOutcomePredictor();
-  }
-  createGateNetwork(size) {
-    const gate = new carbono(false);
-    gate.layer(this.getFlattenedStateSize(), size, "sigmoid");
-    return gate;
-  }
-  createQNetwork() {
-    const qNet = new carbono(false);
-    const stateSize = this.getFlattenedStateSize();
-    const actionSize = this.getActionSpaceSize();
-    qNet.layer(stateSize + actionSize, 16, "selu");
-    qNet.layer(16, 16, "selu");
-    qNet.layer(16, 1, "selu");
-    return qNet;
-  }
-  createOutcomePredictor() {
-    const predictor = new carbono(false);
-    const inputSize =
-      this.getFlattenedStateSize() + this.options.memoryLayerSize * 3;
-    predictor.layer(inputSize, 8, "tanh");
-    predictor.layer(8, 8, "tanh");
-    predictor.layer(8, this.options.predictionHorizon, "tanh");
-    return predictor;
-  }
-  getFlattenedStateSize() {
-    let size = 0;
-    this.network.weights.forEach((layer) => {
-      size += layer.flat().length;
-    });
-    return size + 3;
-  }
-  getActionSpaceSize() {
-    let size = 0;
-    this.network.weights.forEach((layer) => {
-      size += layer.flat().length * this.options.actionSpace;
-    });
-    return size;
-  }
-  getNetworkState() {
-    const flatWeights = this.network.weights
-      .map((layer) => layer.flat())
-      .flat();
-    return [...flatWeights, this.bestLoss, this.getCurrentLoss(), this.epsilon];
-  }
-  async getCurrentLoss() {
-    let totalLoss = 0;
-    for (const data of this.network.trainingData) {
-      const prediction = this.network.predict(data.input);
-      totalLoss += Math.abs(prediction[0] - data.output[0]);
-    }
-    return totalLoss / this.network.trainingData.length;
-  }
-  async updateMemoryCells(state) {
-    const forgetGate = this.gates.forget.predict(state);
-    const inputGate = this.gates.input.predict(state);
-    const outputGate = this.gates.output.predict(state);
-    const candidates = this.gates.candidates.predict(state);
-    for (let i = 0; i < this.options.memoryLayerSize; i++) {
-      this.memoryCells.cellState[i] *= forgetGate[i];
-      this.memoryCells.cellState[i] += inputGate[i] * candidates[i];
-      this.memoryCells.shortTerm[i] =
-        Math.tanh(this.memoryCells.cellState[i]) * outputGate[i];
-      this.memoryCells.longTerm[i] =
-        this.memoryCells.longTerm[i] * this.options.memoryCellDecay +
-        this.memoryCells.shortTerm[i] * (1 - this.options.memoryCellDecay);
-    }
-  }
-  async predictOutcomes(state) {
-    const input = [
-      ...state,
-      ...this.memoryCells.shortTerm,
-      ...this.memoryCells.longTerm,
-      ...this.memoryCells.cellState
-    ];
-    return this.outcomePredictor.predict(input);
-  }
-  encodeAction(action) {
-    const encoded = new Array(this.getActionSpaceSize()).fill(0);
-    encoded[action] = 1;
-    return encoded;
-  }
-  async predictQValue(state, action) {
-    const encoded = this.encodeAction(action);
-    const input = [...state, ...encoded];
-    const qValue = this.qNetwork.predict(input);
-    return qValue[0];
-  }
-  simulateAction(state, action) {
-    const simState = [...state];
-    const updates = this.actionToWeightUpdates(action);
-    let stateIndex = 0;
-    for (const layer of updates) {
-      for (const row of layer) {
-        for (const update of row) {
-          simState[stateIndex] += update;
-          stateIndex++;
-        }
-      }
-    }
-    return simState;
-  }
-  async selectAction() {
-    if (Math.random() < this.epsilon) {
-      return Math.floor(Math.random() * this.getActionSpaceSize());
-    }
-    const state = this.getNetworkState();
-    await this.updateMemoryCells(state);
-    let bestAction = 0;
-    let bestOutcome = -Infinity;
-    for (let action = 0; action < this.getActionSpaceSize(); action++) {
-      const simState = this.simulateAction(state, action);
-      const outcomes = await this.predictOutcomes(simState);
-      const expectedValue = outcomes.reduce((sum, val, i) => {
-        return sum + val * Math.pow(this.options.gamma, i);
-      }, 0);
-      if (expectedValue > bestOutcome) {
-        bestOutcome = expectedValue;
-        bestAction = action;
-      }
-    }
-    return bestAction;
-  }
-  actionToWeightUpdates(action) {
-    const updates = [];
-    let actionIndex = action;
-    for (const layer of this.network.weights) {
-      const layerUpdate = [];
-      for (let i = 0; i < layer.length; i++) {
-        const rowUpdate = [];
-        for (let j = 0; j < layer[i].length; j++) {
-          const actionValue = actionIndex % this.options.actionSpace;
-          actionIndex = Math.floor(actionIndex / this.options.actionSpace);
-          const update =
-            ((actionValue / (this.options.actionSpace - 1)) * 2 - 1) *
-            this.options.weightUpdateRange;
-          rowUpdate.push(update);
-        }
-        layerUpdate.push(rowUpdate);
-      }
-      updates.push(layerUpdate);
-    }
-    return updates;
-  }
-  async applyAction(action) {
-    const updates = this.actionToWeightUpdates(action);
-    for (let i = 0; i < this.network.weights.length; i++) {
-      for (let j = 0; j < this.network.weights[i].length; j++) {
-        for (let k = 0; k < this.network.weights[i][j].length; k++) {
-          this.network.weights[i][j][k] += updates[i][j][k];
-        }
-      }
-    }
-  }
-  calculateReward(oldLoss, newLoss) {
-    const improvement = oldLoss - newLoss;
-    const bestReward = newLoss < this.bestLoss ? 1.0 : 0.0;
-    return improvement + bestReward;
-  }
-  async getActualOutcomes(state, steps) {
-    const outcomes = [];
-    let currentState = state;
-    for (let i = 0; i < steps; i++) {
-      const loss = await this.getCurrentLoss();
-      outcomes.push(loss);
-      const action = await this.selectAction();
-      currentState = this.simulateAction(currentState, action);
-    }
-    return outcomes;
-  }
-  async trainOutcomePredictor(experience) {
-    const { state, nextState } = experience;
-    const actualOutcomes = await this.getActualOutcomes(
-      nextState,
-      this.options.predictionHorizon
-    );
-    const input = [
-      ...state,
-      ...this.memoryCells.shortTerm,
-      ...this.memoryCells.longTerm,
-      ...this.memoryCells.cellState
-    ];
-    await this.outcomePredictor.train(
-      [
-        {
-          input: input,
-          output: actualOutcomes
-        }
-      ],
-      {
-        epochs: 10,
-        learningRate: this.options.learningRate
-      }
-    );
-  }
-  async trainQNetwork(batch) {
-    for (const experience of batch) {
-      const { state, action, reward, nextState } = experience;
-      const currentQ = await this.predictQValue(state, action);
-      let maxNextQ = -Infinity;
-      for (let a = 0; a < this.getActionSpaceSize(); a++) {
-        const nextQ = await this.predictQValue(nextState, a);
-        maxNextQ = Math.max(maxNextQ, nextQ);
-      }
-      const targetQ = reward + this.options.gamma * maxNextQ;
-      const input = [...state, ...this.encodeAction(action)];
-      await this.qNetwork.train(
-        [
-          {
-            input: input,
-            output: [targetQ]
-          }
-        ],
-        {
-          epochs: 10,
-          learningRate: this.options.learningRate
-        }
-      );
-    }
-  }
-  async update(currentLoss) {
-    const state = this.getNetworkState();
-    const action = await this.selectAction();
-    await this.applyAction(action);
-    const nextState = this.getNetworkState();
-    const newLoss = await this.getCurrentLoss();
-    const reward = this.calculateReward(currentLoss, newLoss);
-    const experience = {
-      state,
-      action,
-      reward,
-      nextState
-    };
-    this.memory.push(experience);
-    await this.trainOutcomePredictor(experience);
-    if (this.memory.length > this.options.memorySize) {
-      this.memory.shift();
-    }
-    if (this.memory.length >= this.options.batchSize) {
-      const batch = [];
-      for (let i = 0; i < this.options.batchSize; i++) {
-        const index = Math.floor(Math.random() * this.memory.length);
-        batch.push(this.memory[index]);
-      }
-      await this.trainQNetwork(batch);
-    }
-    if (newLoss < this.bestLoss) {
-      this.bestLoss = newLoss;
-      this.bestWeights = this.cloneWeights(this.network.weights);
-    }
-    this.epsilon = Math.max(
-      this.options.epsilonMin,
-      this.epsilon * this.options.epsilonDecay
-    );
-    return {
-      loss: newLoss,
-      bestLoss: this.bestLoss,
-      epsilon: this.epsilon
-    };
-  }
-  cloneWeights(weights) {
-    return weights.map((layer) => layer.map((row) => [...row]));
-  }
-}
-// 🧠 carbono: A Fun and Friendly Neural Network Class 🧠
-// This micro-library wraps everything you need to have
-// This is the simplest yet functional feedforward mlp in js
-class carbono {
   constructor(debug = true) {
-    this.layers = []; // 📚 Stores info about each layer
-    this.weights = []; // ⚖️ Stores weights for each layer
-    this.biases = []; // 🔧 Stores biases for each layer
-    this.activations = []; // 🚀 Stores activation functions for each layer
-    this.details = {}; // 📊 Stores details about the model
-    this.debug = debug; // 🐛 Enables or disables debug messages
   }
-  // 🎮 Initialize reinforcement learning module
-  play(options = {}) {
-    console.log("Reinforcement Learning Activated");
-    this.rl = new ReinforcementModule(this, options);
-    return this.rl;
-  }
-  // 🏗️ Add a new layer to the neural network
-  layer(inputSize, outputSize, activation = "tanh") {
-    // 🧱 Store layer information
     this.layers.push({
       inputSize,
       outputSize,
       activation
     });
-    // 🔍 Check if the new layer's input size matches the previous layer's output size
     if (this.weights.length > 0) {
-      const lastLayerOutputSize = this.layers[this.layers.length - 2]
-        .outputSize;
       if (inputSize !== lastLayerOutputSize) {
-        throw new Error(
-          "Oops! The input size of the new layer must match the output size of the previous layer."
-        );
       }
     }
-    // 🎲 Initialize weights using Xavier/Glorot initialization
     const weights = [];
     for (let i = 0; i < outputSize; i++) {
       const row = [];
       for (let j = 0; j < inputSize; j++) {
-        row.push(
-          (Math.random() - 0.5) * 2 * Math.sqrt(6 / (inputSize + outputSize))
-        );
       }
       weights.push(row);
     }
     this.weights.push(weights);
-    // 🎚️ Initialize biases with small positive values
     const biases = Array(outputSize).fill(0.01);
     this.biases.push(biases);
-    // 🚀 Store the activation function for this layer
     this.activations.push(activation);
   }
-  // 🧮 Apply the activation function
   activationFunction(x, activation) {
     switch (activation) {
-      case "tanh":
-        return Math.tanh(x); // 〰️ Hyperbolic tangent
-      case "sigmoid":
-        return 1 / (1 + Math.exp(-x)); // 📈 S-shaped curve
-      case "relu":
-        return Math.max(0, x); // 📐 Rectified Linear Unit
-      case "selu":
         const alpha = 1.67326;
         const scale = 1.0507;
-        return x > 0 ? scale * x : scale * alpha * (Math.exp(x) - 1); // 🚀 Scaled Exponential Linear Unit
       default:
-        throw new Error("Whoops! We don't know that activation function.");
     }
   }
-  // 📐 Calculate the derivative of the activation function
   activationDerivative(x, activation) {
     switch (activation) {
-      case "tanh":
         return 1 - Math.pow(Math.tanh(x), 2);
-      case "sigmoid":
         const sigmoid = 1 / (1 + Math.exp(-x));
         return sigmoid * (1 - sigmoid);
-      case "relu":
         return x > 0 ? 1 : 0;
-      case "selu":
         const alpha = 1.67326;
         const scale = 1.0507;
         return x > 0 ? scale : scale * alpha * Math.exp(x);
       default:
-        throw new Error(
-          "Oops! We don't know the derivative of that activation function."
-        );
     }
   }
-  // 🏋️‍♀️ Train the neural network
   async train(trainSet, options = {}) {
-    // 🎛️ Set up training options with default values
     const {
-      epochs = 200, // 🔄 Number of times to go through the entire dataset
-      learningRate = 0.212, // 📏 How big of steps to take when adjusting weights
-      batchSize = 16, // 📦 Number of samples to process before updating weights
-      printEveryEpochs = 100, // 🖨️ How often to print progress
-      earlyStopThreshold = 1e-6, // 🛑 When to stop if the error is small enough
-      testSet = null, // 🧪 Optional test set for evaluation
-      callback = null // 📡 Callback function for real-time updates
     } = options;
-    const start = Date.now(); // ⏱️ Start the timer
-    // 🛡️ Make sure batch size is at least 2
     if (batchSize < 1) batchSize = 2;
-    // 🏗️ Automatically create layers if none exist
     if (this.layers.length === 0) {
       const numInputs = trainSet[0].input.length;
-      this.layer(numInputs, numInputs, "tanh");
-      this.layer(numInputs, 1, "tanh");
     }
     let lastTrainLoss = 0;
     let lastTestLoss = null;
-    // 🔄 Main training loop
     for (let epoch = 0; epoch < epochs; epoch++) {
       let trainError = 0;
-      // 📦 Process data in batches
       for (let b = 0; b < trainSet.length; b += batchSize) {
         const batch = trainSet.slice(b, b + batchSize);
         let batchError = 0;
-        // 🧠 Forward pass and backward pass for each item in the batch
         for (const data of batch) {
-          // 🏃‍♂️ Forward pass
           const layerInputs = [data.input];
           for (let i = 0; i < this.weights.length; i++) {
             const inputs = layerInputs[i];
@@ -815,7 +499,6 @@ class carbono {
             }
             layerInputs.push(outputs);
           }
-          // 🔙 Backward pass
           const outputLayerIndex = this.weights.length - 1;
           const outputLayerInputs = layerInputs[layerInputs.length - 1];
           const outputErrors = [];
@@ -835,17 +518,10 @@ class carbono {
               for (let k = 0; k < this.layers[i + 1].outputSize; k++) {
                 error += nextLayerErrors[k] * nextLayerWeights[k][j];
               }
-              errors.push(
-                error *
-                  this.activationDerivative(
-                    currentLayerInputs[j],
-                    currentActivation
-                  )
-              );
             }
             layerErrors.unshift(errors);
           }
-          // 🔧 Update weights and biases
           for (let i = 0; i < this.weights.length; i++) {
             const inputs = layerInputs[i];
             const errors = layerErrors[i];
@@ -859,16 +535,11 @@ class carbono {
               biases[j] += learningRate * errors[j];
             }
           }
-          batchError += Math.abs(outputErrors[0]); // Assuming binary output
         }
         trainError += batchError;
       }
       lastTrainLoss = trainError / trainSet.length;
-      // 🎮 Apply reinforcement learning if initialized
-      if (this.rl) {
-        this.rl.update(lastTrainLoss);
-      }
-      // 🧪 Evaluate on test set if provided
       if (testSet) {
         let testError = 0;
         for (const data of testSet) {
@@ -877,44 +548,38 @@ class carbono {
         }
         lastTestLoss = testError / testSet.length;
       }
-      // 📢 Print progress if needed
       if ((epoch + 1) % printEveryEpochs === 0 && this.debug === true) {
-        console.log(
-          `Epoch ${epoch + 1}, Train Loss: ${lastTrainLoss.toFixed(6)}${
-            testSet ? `, Test Loss: ${lastTestLoss.toFixed(6)}` : ""
-          }`
-        );
       }
-      // 📡 Call the callback function with current progress
       if (callback) {
-        await callback(epoch + 1, lastTrainLoss, lastTestLoss);
       }
-      // Add a small delay to prevent UI freezing
-      await new Promise((resolve) => setTimeout(resolve, 0));
-      // 🛑 Check for early stopping
       if (lastTrainLoss < earlyStopThreshold) {
-        console.log(
-          `We stopped at epoch ${
-            epoch + 1
-          } with train loss: ${lastTrainLoss.toFixed(6)}${
-            testSet ? ` and test loss: ${lastTestLoss.toFixed(6)}` : ""
-          }`
-        );
         break;
       }
     }
-    const end = Date.now(); // ⏱️ Stop the timer
-    // 🧮 Calculate total number of parameters
     let totalParams = 0;
     for (let i = 0; i < this.weights.length; i++) {
       const weightLayer = this.weights[i];
       const biasLayer = this.biases[i];
       totalParams += weightLayer.flat().length + biasLayer.length;
     }
-    // 📊 Create a summary of the training
     const trainingSummary = {
       trainLoss: lastTrainLoss,
       testLoss: lastTestLoss,
       parameters: totalParams,
       training: {
         time: end - start,
@@ -922,7 +587,7 @@ class carbono {
         learningRate,
         batchSize
       },
-      layers: this.layers.map((layer) => ({
         inputSize: layer.inputSize,
         outputSize: layer.outputSize,
         activation: layer.activation
@@ -931,11 +596,12 @@ class carbono {
     this.details = trainingSummary;
     return trainingSummary;
   }
-  // 🔮 Use the trained network to make predictions
   predict(input) {
     let layerInput = input;
-    const allActivations = [input]; // Track all activations through layers
-    const allRawValues = []; // Track pre-activation values
     for (let i = 0; i < this.weights.length; i++) {
       const weights = this.weights[i];
       const biases = this.biases[i];
@@ -955,13 +621,13 @@ class carbono {
       allActivations.push(layerOutput);
       layerInput = layerOutput;
     }
-    // Store last activation values for visualization
     this.lastActivations = allActivations;
     this.lastRawValues = allRawValues;
     return layerInput;
   }
-  // 💾 Save the model to a file
-  save(name = "model") {
     const data = {
       weights: this.weights,
       biases: this.biases,
@@ -970,16 +636,17 @@ class carbono {
       details: this.details
     };
     const blob = new Blob([JSON.stringify(data)], {
-      type: "application/json"
     });
     const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
     a.href = url;
     a.download = `${name}.json`;
     a.click();
     URL.revokeObjectURL(url);
   }
-  // 📂 Load a saved model from a file
   load(callback) {
     const handleListener = (event) => {
       const file = event.target.files[0];
@@ -995,23 +662,23 @@ class carbono {
           this.layers = data.layers;
           this.details = data.details;
           callback();
-          if (this.debug === true) console.log("Model loaded successfully!");
-          input.removeEventListener("change", handleListener);
           input.remove();
         } catch (e) {
-          input.removeEventListener("change", handleListener);
           input.remove();
-          if (this.debug === true) console.error("Failed to load model:", e);
         }
       };
       reader.readAsText(file);
     };
-    const input = document.createElement("input");
-    input.type = "file";
-    input.accept = ".json";
-    input.style.opacity = "0";
     document.body.append(input);
-    input.addEventListener("change", handleListener.bind(this));
     input.click();
   }
 }

   </div>
   <script>
+    class carbono {
   constructor(debug = true) {
+    this.layers = [];
+    this.weights = [];
+    this.biases = [];
+    this.activations = [];
+    this.details = {};
+    this.debug = debug;
+    this.fewShotSamples = [];
   }
+  // Add a new layer to the neural network
+  layer(inputSize, outputSize, activation = 'tanh') {
     this.layers.push({
       inputSize,
       outputSize,
       activation
     });
     if (this.weights.length > 0) {
+      const lastLayerOutputSize = this.layers[this.layers.length - 2].outputSize;
       if (inputSize !== lastLayerOutputSize) {
+        throw new Error('Oops! The input size of the new layer must match the output size of the previous layer.');
       }
     }
     const weights = [];
     for (let i = 0; i < outputSize; i++) {
       const row = [];
       for (let j = 0; j < inputSize; j++) {
+        row.push((Math.random() - 0.5) * 2 * Math.sqrt(6 / (inputSize + outputSize)));
       }
       weights.push(row);
     }
     this.weights.push(weights);
     const biases = Array(outputSize).fill(0.01);
     this.biases.push(biases);
     this.activations.push(activation);
   }
+  // Apply the activation function
   activationFunction(x, activation) {
     switch (activation) {
+      case 'tanh':
+        return Math.tanh(x);
+      case 'sigmoid':
+        return 1 / (1 + Math.exp(-x));
+      case 'relu':
+        return Math.max(0, x);
+      case 'selu':
         const alpha = 1.67326;
         const scale = 1.0507;
+        return x > 0 ? scale * x : scale * alpha * (Math.exp(x) - 1);
       default:
+        throw new Error('Whoops! We don\'t know that activation function.');
     }
   }
+  // Calculate the derivative of the activation function
   activationDerivative(x, activation) {
     switch (activation) {
+      case 'tanh':
         return 1 - Math.pow(Math.tanh(x), 2);
+      case 'sigmoid':
         const sigmoid = 1 / (1 + Math.exp(-x));
         return sigmoid * (1 - sigmoid);
+      case 'relu':
         return x > 0 ? 1 : 0;
+      case 'selu':
         const alpha = 1.67326;
         const scale = 1.0507;
         return x > 0 ? scale : scale * alpha * Math.exp(x);
       default:
+        throw new Error('Oops! We don\'t know the derivative of that activation function.');
+    }
+  }
+  // Generate few-shot samples
+  generateFewShotSamples(trainSet, numSamples = 10) {
+    const fewShotSamples = [];
+    for (let i = 0; i < numSamples; i++) {
+      const randomIndex = Math.floor(Math.random() * trainSet.length);
+      fewShotSamples.push(trainSet[randomIndex]);
+    }
+    return fewShotSamples;
+  }
+  // Positional Encoding
+  positionalEncoding(input, maxLen) {
+    const pe = new Array(maxLen).fill(0).map((_, pos) => {
+      return new Array(input[0].length).fill(0).map((_, i) => {
+        const angle = pos / Math.pow(10000, 2 * i / input[0].length);
+        return pos % 2 === 0 ? Math.sin(angle) : Math.cos(angle);
+      });
+    });
+    return input.map((seq, idx) => seq.map((val, i) => val + pe[idx][i]));
+  }
+  // Simplified Multi-Head Self-Attention
+  multiHeadSelfAttention(input, numHeads = 2) {
+    const headSize = input[0].length / numHeads;
+    const heads = new Array(numHeads).fill(0).map(() => new Array(input.length).fill(0).map(() => new Array(headSize).fill(0)));
+    for (let h = 0; h < numHeads; h++) {
+      for (let i = 0; i < input.length; i++) {
+        for (let j = 0; j < headSize; j++) {
+          heads[h][i][j] = input[i][h * headSize + j];
+        }
+      }
     }
+    const attentionScores = new Array(numHeads).fill(0).map(() => new Array(input.length).fill(0).map(() => new Array(input.length).fill(0)));
+    for (let h = 0; h < numHeads; h++) {
+      for (let i = 0; i < input.length; i++) {
+        for (let j = 0; j < input.length; j++) {
+          let score = 0;
+          for (let k = 0; k < headSize; k++) {
+            score += heads[h][i][k] * heads[h][j][k];
+          }
+          attentionScores[h][i][j] = score;
+        }
+      }
+    }
+    const attentionWeights = attentionScores.map(head => head.map(row => row.map(score => Math.exp(score) / row.reduce((sum, s) => sum + Math.exp(s), 0))));
+    const output = new Array(input.length).fill(0).map(() => new Array(input[0].length).fill(0));
+    for (let h = 0; h < numHeads; h++) {
+      for (let i = 0; i < input.length; i++) {
+        for (let j = 0; j < headSize; j++) {
+          for (let k = 0; k < input.length; k++) {
+            output[i][h * headSize + j] += attentionWeights[h][i][k] * heads[h][k][j];
+          }
+        }
+      }
+    }
+    return output;
   }
+  // Train the neural network
   async train(trainSet, options = {}) {
     const {
+      epochs = 200,
+      learningRate = 0.212,
+      batchSize = 16,
+      printEveryEpochs = 100,
+      earlyStopThreshold = 1e-6,
+      testSet = null,
+      callback = null
     } = options;
+    const start = Date.now();
     if (batchSize < 1) batchSize = 2;
     if (this.layers.length === 0) {
       const numInputs = trainSet[0].input.length;
+      this.layer(numInputs, numInputs, 'tanh');
+      this.layer(numInputs, 1, 'tanh');
     }
     let lastTrainLoss = 0;
     let lastTestLoss = null;
+    let lastFewShotLoss = null;
+    // Generate few-shot samples
+    this.fewShotSamples = this.generateFewShotSamples(trainSet);
     for (let epoch = 0; epoch < epochs; epoch++) {
       let trainError = 0;
       for (let b = 0; b < trainSet.length; b += batchSize) {
         const batch = trainSet.slice(b, b + batchSize);
         let batchError = 0;
         for (const data of batch) {
           const layerInputs = [data.input];
           for (let i = 0; i < this.weights.length; i++) {
             const inputs = layerInputs[i];
             }
             layerInputs.push(outputs);
           }
           const outputLayerIndex = this.weights.length - 1;
           const outputLayerInputs = layerInputs[layerInputs.length - 1];
           const outputErrors = [];
               for (let k = 0; k < this.layers[i + 1].outputSize; k++) {
                 error += nextLayerErrors[k] * nextLayerWeights[k][j];
               }
+              errors.push(error * this.activationDerivative(currentLayerInputs[j], currentActivation));
             }
             layerErrors.unshift(errors);
           }
           for (let i = 0; i < this.weights.length; i++) {
             const inputs = layerInputs[i];
             const errors = layerErrors[i];
               biases[j] += learningRate * errors[j];
             }
           }
+          batchError += Math.abs(outputErrors[0]);
         }
         trainError += batchError;
       }
       lastTrainLoss = trainError / trainSet.length;
       if (testSet) {
         let testError = 0;
         for (const data of testSet) {
         }
         lastTestLoss = testError / testSet.length;
       }
+      // Evaluate on few-shot samples
+      let fewShotError = 0;
+      for (const data of this.fewShotSamples) {
+        const prediction = this.predict(data.input);
+        fewShotError += Math.abs(data.output[0] - prediction[0]);
+      }
+      lastFewShotLoss = fewShotError / this.fewShotSamples.length;
       if ((epoch + 1) % printEveryEpochs === 0 && this.debug === true) {
+        console.log(`Epoch ${epoch + 1}, Train Loss: ${lastTrainLoss.toFixed(6)}${testSet ? `, Test Loss: ${lastTestLoss.toFixed(6)}` : ''}, Few-Shot Loss: ${lastFewShotLoss.toFixed(6)}`);
       }
       if (callback) {
+        await callback(epoch + 1, lastTrainLoss, lastTestLoss, lastFewShotLoss);
       }
+      await new Promise(resolve => setTimeout(resolve, 0));
       if (lastTrainLoss < earlyStopThreshold) {
+        console.log(`We stopped at epoch ${epoch + 1} with train loss: ${lastTrainLoss.toFixed(6)}${testSet ? ` and test loss: ${lastTestLoss.toFixed(6)}` : ''} and few-shot loss: ${lastFewShotLoss.toFixed(6)}`);
         break;
       }
     }
+    const end = Date.now();
     let totalParams = 0;
     for (let i = 0; i < this.weights.length; i++) {
       const weightLayer = this.weights[i];
       const biasLayer = this.biases[i];
       totalParams += weightLayer.flat().length + biasLayer.length;
     }
     const trainingSummary = {
       trainLoss: lastTrainLoss,
       testLoss: lastTestLoss,
+      fewShotLoss: lastFewShotLoss,
       parameters: totalParams,
       training: {
         time: end - start,
         learningRate,
         batchSize
       },
+      layers: this.layers.map(layer => ({
         inputSize: layer.inputSize,
         outputSize: layer.outputSize,
         activation: layer.activation
     this.details = trainingSummary;
     return trainingSummary;
   }
+  // Use the trained network to make predictions
   predict(input) {
     let layerInput = input;
+    const allActivations = [input];
+    const allRawValues = [];
     for (let i = 0; i < this.weights.length; i++) {
       const weights = this.weights[i];
       const biases = this.biases[i];
       allActivations.push(layerOutput);
       layerInput = layerOutput;
     }
     this.lastActivations = allActivations;
     this.lastRawValues = allRawValues;
     return layerInput;
   }
+  // Save the model to a file
+  save(name = 'model') {
     const data = {
       weights: this.weights,
       biases: this.biases,
       details: this.details
     };
     const blob = new Blob([JSON.stringify(data)], {
+      type: 'application/json'
     });
     const url = URL.createObjectURL(blob);
+    const a = document.createElement('a');
     a.href = url;
     a.download = `${name}.json`;
     a.click();
     URL.revokeObjectURL(url);
   }
+  // Load a saved model from a file
   load(callback) {
     const handleListener = (event) => {
       const file = event.target.files[0];
           this.layers = data.layers;
           this.details = data.details;
           callback();
+          if (this.debug === true) console.log('Model loaded successfully!');
+          input.removeEventListener('change', handleListener);
           input.remove();
         } catch (e) {
+          input.removeEventListener('change', handleListener);
           input.remove();
+          if (this.debug === true) console.error('Failed to load model:', e);
         }
       };
       reader.readAsText(file);
     };
+    const input = document.createElement('input');
+    input.type = 'file';
+    input.accept = '.json';
+    input.style.opacity = '0';
     document.body.append(input);
+    input.addEventListener('change', handleListener.bind(this));
     input.click();
   }
 }