Spaces:

josh1234566532
/

brainybot-builder

Running

App Files Files Community

josh1234566532 commited on Oct 8, 2025

Commit

476b208

verified ·

1 Parent(s): f6956ee

Create a LLM from scratch in js, and a tokenizer, that allows the user to input their training data, and labels (text box), and then after training, test it out. And visualize the training loss, etc.

Browse files

Files changed (2) hide show

README.md +8 -5
index.html +473 -18

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
 ---
-title: Brainybot Builder
-emoji: 🔥
-colorFrom: yellow
-colorTo: indigo
 sdk: static
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BrainyBot Builder 🧠
+colorFrom: blue
+colorTo: yellow
+emoji: 🐳
 sdk: static
 pinned: false
+tags:
+  - deepsite-v3
 ---
+# Welcome to your new DeepSite project!
+This project was created with [DeepSite](https://deepsite.hf.co).

index.html CHANGED Viewed

@@ -1,19 +1,474 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
 </html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>BrainyBot Builder - Custom LLM Creator</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <script src="https://unpkg.com/feather-icons"></script>
+    <script src="https://cdn.jsdelivr.net/npm/vanta@latest/dist/vanta.net.min.js"></script>
+    <style>
+        .gradient-bg {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        }
+        .code-block {
+            font-family: 'Courier New', monospace;
+            background-color: rgba(0,0,0,0.7);
+            color: #f8f8f2;
+            border-radius: 0.5rem;
+            padding: 1rem;
+            overflow-x: auto;
+        }
+        .neon-glow {
+            box-shadow: 0 0 10px rgba(59, 130, 246, 0.8);
+        }
+        .token {
+            padding: 0.2rem 0.4rem;
+            background-color: rgba(59, 130, 246, 0.2);
+            border-radius: 0.25rem;
+            margin-right: 0.25rem;
+            display: inline-block;
+            margin-bottom: 0.25rem;
+        }
+    </style>
+</head>
+<body class="min-h-screen bg-gray-900 text-gray-100">
+    <div id="vanta-bg" class="fixed inset-0 opacity-20"></div>
+    <div class="relative z-10 container mx-auto px-4 py-12">
+        <header class="text-center mb-12">
+            <h1 class="text-5xl font-bold mb-4 bg-clip-text text-transparent gradient-bg">BrainyBot Builder</h1>
+            <p class="text-xl text-gray-300">Create your own miniature LLM from scratch!</p>
+        </header>
+        <div class="grid grid-cols-1 lg:grid-cols-2 gap-8">
+            <!-- Training Section -->
+            <div class="bg-gray-800 rounded-xl p-6 shadow-lg neon-glow">
+                <h2 class="text-2xl font-semibold mb-4 flex items-center">
+                    <i data-feather="cpu" class="mr-2"></i> Model Training
+                </h2>
+                <div class="mb-6">
+                    <label class="block mb-2 text-sm font-medium">Training Data (one sample per line)</label>
+                    <textarea id="training-data" rows="6" class="w-full bg-gray-700 rounded-lg p-4 text-gray-100 border border-gray-600 focus:border-blue-500 focus:ring-blue-500" placeholder="Enter your training text here..."></textarea>
+                </div>
+                <div class="mb-6">
+                    <label class="block mb-2 text-sm font-medium">Labels (one per line, matching training data)</label>
+                    <textarea id="training-labels" rows="3" class="w-full bg-gray-700 rounded-lg p-4 text-gray-100 border border-gray-600 focus:border-blue-500 focus:ring-blue-500" placeholder="Enter corresponding labels..."></textarea>
+                </div>
+                <div class="grid grid-cols-2 gap-4 mb-6">
+                    <div>
+                        <label class="block mb-2 text-sm font-medium">Epochs</label>
+                        <input id="epochs" type="number" min="1" max="1000" value="10" class="w-full bg-gray-700 rounded-lg p-2 text-gray-100 border border-gray-600">
+                    </div>
+                    <div>
+                        <label class="block mb-2 text-sm font-medium">Learning Rate</label>
+                        <input id="learning-rate" type="number" step="0.001" min="0.0001" max="1" value="0.01" class="w-full bg-gray-700 rounded-lg p-2 text-gray-100 border border-gray-600">
+                    </div>
+                </div>
+                <button id="train-btn" class="w-full py-3 px-4 bg-blue-600 hover:bg-blue-700 rounded-lg font-medium transition-colors flex items-center justify-center">
+                    <i data-feather="activity" class="mr-2"></i> Train Model
+                </button>
+            </div>
+            <!-- Tokenizer & Testing Section -->
+            <div class="bg-gray-800 rounded-xl p-6 shadow-lg neon-glow">
+                <h2 class="text-2xl font-semibold mb-4 flex items-center">
+                    <i data-feather="code" class="mr-2"></i> Tokenizer & Testing
+                </h2>
+                <div class="mb-6">
+                    <label class="block mb-2 text-sm font-medium">Tokenizer Output</label>
+                    <div id="tokenizer-output" class="code-block min-h-20 p-4">
+                        Tokens will appear here...
+                    </div>
+                </div>
+                <div class="mb-6">
+                    <label class="block mb-2 text-sm font-medium">Test Input</label>
+                    <input id="test-input" class="w-full bg-gray-700 rounded-lg p-3 text-gray-100 border border-gray-600 focus:border-blue-500 focus:ring-blue-500" placeholder="Type something to test...">
+                </div>
+                <div class="mb-6">
+                    <label class="block mb-2 text-sm font-medium">Model Prediction</label>
+                    <div id="model-output" class="code-block min-h-20 p-4">
+                        Predictions will appear here...
+                    </div>
+                </div>
+                <button id="test-btn" class="w-full py-3 px-4 bg-purple-600 hover:bg-purple-700 rounded-lg font-medium transition-colors flex items-center justify-center">
+                    <i data-feather="play" class="mr-2"></i> Test Model
+                </button>
+            </div>
+        </div>
+        <!-- Training Progress Section -->
+        <div id="progress-section" class="mt-8 bg-gray-800 rounded-xl p-6 shadow-lg neon-glow hidden">
+            <h2 class="text-2xl font-semibold mb-4 flex items-center">
+                <i data-feather="bar-chart-2" class="mr-2"></i> Training Progress
+            </h2>
+            <div class="mb-4">
+                <div class="flex justify-between mb-1">
+                    <span id="epoch-progress" class="text-sm font-medium">Epoch: 0/0</span>
+                    <span id="loss-value" class="text-sm font-medium">Loss: -</span>
+                </div>
+                <div class="w-full bg-gray-700 rounded-full h-2.5">
+                    <div id="progress-bar" class="bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
+                </div>
+            </div>
+            <canvas id="loss-chart" class="w-full h-64"></canvas>
+        </div>
+    </div>
+    <script>
+        // Initialize Vanta.js background
+        VANTA.NET({
+            el: "#vanta-bg",
+            mouseControls: true,
+            touchControls: true,
+            gyroControls: false,
+            minHeight: 200.00,
+            minWidth: 200.00,
+            scale: 1.00,
+            scaleMobile: 1.00,
+            color: 0x3b82f6,
+            backgroundColor: 0x111827,
+            points: 10.00,
+            maxDistance: 20.00,
+            spacing: 15.00
+        });
+        // Initialize Feather Icons
+        feather.replace();
+        // Simple Tokenizer
+        class SimpleTokenizer {
+            constructor() {
+                this.vocab = {};
+                this.inverseVocab = {};
+                this.vocabSize = 0;
+            }
+            fit(texts) {
+                const allText = texts.join(' ');
+                const tokens = allText.toLowerCase().match(/\b\w+\b/g) || [];
+                const uniqueTokens = [...new Set(tokens)];
+                uniqueTokens.forEach((token, index) => {
+                    this.vocab[token] = index;
+                    this.inverseVocab[index] = token;
+                });
+                this.vocabSize = uniqueTokens.length;
+            }
+            tokenize(text) {
+                const tokens = text.toLowerCase().match(/\b\w+\b/g) || [];
+                return tokens.map(token => this.vocab[token] || -1);
+            }
+            detokenize(indices) {
+                return indices.map(idx => this.inverseVocab[idx] || '[UNK]').join(' ');
+            }
+        }
+        // Simple Neural Network
+        class SimpleLLM {
+            constructor(inputSize, outputSize) {
+                this.inputSize = inputSize;
+                this.outputSize = outputSize;
+                this.weights = Array(inputSize).fill().map(() =>
+                    Array(outputSize).fill().map(() => Math.random() * 0.2 - 0.1)
+                );
+                this.bias = Array(outputSize).fill(0);
+            }
+            softmax(logits) {
+                const maxLogit = Math.max(...logits);
+                const exps = logits.map(l => Math.exp(l - maxLogit));
+                const sumExps = exps.reduce((a, b) => a + b, 0);
+                return exps.map(exp => exp / sumExps);
+            }
+            forward(input) {
+                const output = Array(this.outputSize).fill(0);
+                for (let j = 0; j < this.outputSize; j++) {
+                    for (let i = 0; i < this.inputSize; i++) {
+                        if (input[i]) {
+                            output[j] += input[i] * this.weights[i][j];
+                        }
+                    }
+                    output[j] += this.bias[j];
+                }
+                return this.softmax(output);
+            }
+            trainStep(input, target, learningRate) {
+                const prediction = this.forward(input);
+                const error = prediction.map((p, i) => p - (i === target ? 1 : 0));
+                // Update weights
+                for (let i = 0; i < this.inputSize; i++) {
+                    for (let j = 0; j < this.outputSize; j++) {
+                        if (input[i]) {
+                            this.weights[i][j] -= learningRate * error[j] * input[i];
+                        }
+                    }
+                }
+                // Update bias
+                for (let j = 0; j < this.outputSize; j++) {
+                    this.bias[j] -= learningRate * error[j];
+                }
+                // Calculate loss (cross entropy)
+                const loss = -Math.log(prediction[target] + 1e-10);
+                return loss;
+            }
+        }
+        // DOM Elements
+        const trainBtn = document.getElementById('train-btn');
+        const testBtn = document.getElementById('test-btn');
+        const trainingData = document.getElementById('training-data');
+        const trainingLabels = document.getElementById('training-labels');
+        const testInput = document.getElementById('test-input');
+        const tokenizerOutput = document.getElementById('tokenizer-output');
+        const modelOutput = document.getElementById('model-output');
+        const progressSection = document.getElementById('progress-section');
+        const progressBar = document.getElementById('progress-bar');
+        const epochProgress = document.getElementById('epoch-progress');
+        const lossValue = document.getElementById('loss-value');
+        // Initialize chart
+        const ctx = document.getElementById('loss-chart').getContext('2d');
+        const lossChart = new Chart(ctx, {
+            type: 'line',
+            data: {
+                labels: [],
+                datasets: [{
+                    label: 'Training Loss',
+                    data: [],
+                    borderColor: 'rgb(59, 130, 246)',
+                    tension: 0.1,
+                    fill: false
+                }]
+            },
+            options: {
+                responsive: true,
+                plugins: {
+                    legend: {
+                        position: 'top',
+                        labels: {
+                            color: 'rgb(209, 213, 219)'
+                        }
+                    }
+                },
+                scales: {
+                    y: {
+                        beginAtZero: true,
+                        grid: {
+                            color: 'rgba(255, 255, 255, 0.1)'
+                        },
+                        ticks: {
+                            color: 'rgb(209, 213, 219)'
+                        }
+                    },
+                    x: {
+                        grid: {
+                            color: 'rgba(255, 255, 255, 0.1)'
+                        },
+                        ticks: {
+                            color: 'rgb(209, 213, 219)'
+                        }
+                    }
+                }
+            }
+        });
+        // Global variables
+        let tokenizer = new SimpleTokenizer();
+        let model = null;
+        let labelMap = {};
+        let inverseLabelMap = {};
+        let isTraining = false;
+        // Event Listeners
+        trainBtn.addEventListener('click', async () => {
+            if (isTraining) return;
+            const dataText = trainingData.value.trim();
+            const labelsText = trainingLabels.value.trim();
+            if (!dataText || !labelsText) {
+                alert('Please provide both training data and labels');
+                return;
+            }
+            const dataLines = dataText.split('\n').filter(line => line.trim());
+            const labelLines = labelsText.split('\n').filter(line => line.trim());
+            if (dataLines.length !== labelLines.length) {
+                alert('Number of training samples must match number of labels');
+                return;
+            }
+            // Create label mapping
+            const uniqueLabels = [...new Set(labelLines)];
+            labelMap = {};
+            inverseLabelMap = {};
+            uniqueLabels.forEach((label, idx) => {
+                labelMap[label] = idx;
+                inverseLabelMap[idx] = label;
+            });
+            // Initialize tokenizer and model
+            tokenizer.fit(dataLines);
+            model = new SimpleLLM(tokenizer.vocabSize, uniqueLabels.length);
+            // Prepare training data
+            const trainingSet = dataLines.map((text, idx) => ({
+                input: tokenizer.tokenize(text),
+                label: labelMap[labelLines[idx]]
+            }));
+            // Training parameters
+            const epochs = parseInt(document.getElementById('epochs').value);
+            const learningRate = parseFloat(document.getElementById('learning-rate').value);
+            // Show progress section
+            progressSection.classList.remove('hidden');
+            lossChart.data.labels = [];
+            lossChart.data.datasets[0].data = [];
+            lossChart.update();
+            // Train model
+            isTraining = true;
+            trainBtn.disabled = true;
+            trainBtn.innerHTML = '<i data-feather="loader" class="animate-spin mr-2"></i> Training...';
+            feather.replace();
+            let totalLoss = 0;
+            let totalSteps = 0;
+            for (let epoch = 0; epoch < epochs; epoch++) {
+                epochProgress.textContent = `Epoch: ${epoch + 1}/${epochs}`;
+                let epochLoss = 0;
+                const shuffledSet = [...trainingSet].sort(() => Math.random() - 0.5);
+                for (let i = 0; i < shuffledSet.length; i++) {
+                    const {input, label} = shuffledSet[i];
+                    const loss = model.trainStep(input, label, learningRate);
+                    epochLoss += loss;
+                    totalLoss += loss;
+                    totalSteps++;
+                    // Update progress bar
+                    const progress = ((i + 1) / shuffledSet.length) * 100;
+                    progressBar.style.width = `${progress}%`;
+                    // Update loss value periodically
+                    if (i % 5 === 0 || i === shuffledSet.length - 1) {
+                        lossValue.textContent = `Loss: ${(epochLoss / (i + 1)).toFixed(4)}`;
+                        // Add data point to chart every 5 epochs or last epoch
+                        if (epoch % 5 === 0 || epoch === epochs - 1) {
+                            lossChart.data.labels.push(`Epoch ${epoch + 1}`);
+                            lossChart.data.datasets[0].data.push(epochLoss / (i + 1));
+                            lossChart.update();
+                        }
+                        // Small delay to allow UI updates
+                        await new Promise(resolve => setTimeout(resolve, 0));
+                    }
+                }
+            }
+            // Training complete
+            isTraining = false;
+            trainBtn.disabled = false;
+            trainBtn.innerHTML = '<i data-feather="activity" class="mr-2"></i> Train Model';
+            feather.replace();
+            // Show tokenizer output
+            updateTokenizerOutput();
+        });
+        testBtn.addEventListener('click', () => {
+            if (!model) {
+                alert('Please train the model first');
+                return;
+            }
+            const testText = testInput.value.trim();
+            if (!testText) {
+                alert('Please enter some text to test');
+                return;
+            }
+            // Tokenize input
+            const tokens = tokenizer.tokenize(testText);
+            // Make prediction
+            const prediction = model.forward(tokens);
+            const maxIdx = prediction.indexOf(Math.max(...prediction));
+            const predictedLabel = inverseLabelMap[maxIdx];
+            const confidence = prediction[maxIdx];
+            // Display results
+            modelOutput.innerHTML = `
+                <div class="mb-2">Predicted: <span class="font-bold">${predictedLabel}</span></div>
+                <div class="mb-2">Confidence: <span class="font-bold">${(confidence * 100).toFixed(2)}%</span></div>
+                <div class="text-sm">Probabilities:</div>
+                <div class="mt-2">
+                    ${prediction.map((p, idx) => `
+                        <div class="flex items-center mb-1">
+                            <div class="w-24">${inverseLabelMap[idx]}:</div>
+                            <div class="flex-1 bg-gray-700 h-4 rounded-full overflow-hidden">
+                                <div class="bg-blue-500 h-full" style="width: ${p * 100}%"></div>
+                            </div>
+                            <div class="w-16 text-right">${(p * 100).toFixed(1)}%</div>
+                        </div>
+                    `).join('')}
+                </div>
+            `;
+        });
+        function updateTokenizerOutput() {
+            const testText = testInput.value.trim() || "example text to tokenize";
+            const tokens = tokenizer.tokenize(testText);
+            tokenizerOutput.innerHTML = `
+                <div class="mb-2">Text: "${testText}"</div>
+                <div class="mb-2">Tokens:</div>
+                <div class="flex flex-wrap">
+                    ${tokens.map(t => t === -1 ?
+                        '<span class="token bg-red-900">[UNK]</span>' :
+                        `<span class="token">${t}</span>`
+                    ).join('')}
+                </div>
+                <div class="mt-4">Vocabulary size: ${tokenizer.vocabSize}</div>
+            `;
+        }
+        testInput.addEventListener('input', () => {
+            if (tokenizer.vocabSize > 0) {
+                updateTokenizerOutput();
+            }
+        });
+        // Initialize tokenizer output with default text
+        updateTokenizerOutput();
+    </script>
+</body>
 </html>