voice-command / index.html
LukasBe's picture
Add 2 files
c58894c verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Neural Audio Command Recognizer</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
@keyframes pulse {
0% { transform: scale(1); }
50% { transform: scale(1.05); }
100% { transform: scale(1); }
}
.pulse-animation {
animation: pulse 2s infinite;
}
.gradient-bg {
background: linear-gradient(135deg, #6e8efb, #a777e3);
}
.command-card {
transition: all 0.3s ease;
transform-style: preserve-3d;
}
.command-card:hover {
transform: translateY(-5px);
box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
}
.waveform {
height: 60px;
position: relative;
overflow: hidden;
}
.confidence-meter {
height: 6px;
background: rgba(255, 255, 255, 0.2);
border-radius: 3px;
overflow: hidden;
}
.confidence-fill {
height: 100%;
background: linear-gradient(90deg, #4ade80, #3b82f6);
transition: width 0.5s ease;
}
.glow {
box-shadow: 0 0 15px rgba(167, 119, 227, 0.5);
}
.spectrogram {
height: 120px;
background: #1f2937;
border-radius: 6px;
margin-top: 10px;
}
.progress-bar {
height: 8px;
background: rgba(255, 255, 255, 0.1);
border-radius: 4px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, #a777e3, #6e8efb);
}
.neuron {
display: inline-block;
width: 20px;
height: 20px;
border-radius: 50%;
background: linear-gradient(135deg, #6e8efb, #a777e3);
margin: 0 2px;
transition: all 0.3s;
}
.neuron.active {
transform: scale(1.3);
box-shadow: 0 0 10px rgba(167, 119, 227, 0.7);
}
.network-visualization {
display: flex;
justify-content: center;
align-items: center;
height: 200px;
margin: 20px 0;
position: relative;
}
.connection {
position: absolute;
background: rgba(110, 142, 251, 0.4);
transform-origin: left center;
height: 2px;
}
</style>
</head>
<body class="bg-gray-900 text-white min-h-screen">
<div class="container mx-auto px-4 py-8">
<!-- Header -->
<header class="flex justify-between items-center mb-8">
<div class="flex items-center space-x-2">
<div class="gradient-bg rounded-full w-10 h-10 flex items-center justify-center">
<i class="fas fa-robot text-xl"></i>
</div>
<h1 class="text-2xl font-bold">Neural Audio Command Recognizer</h1>
</div>
<div class="flex space-x-4">
<button id="clearStorageBtn" class="bg-gray-700 hover:bg-gray-600 px-4 py-2 rounded-lg transition">
<i class="fas fa-trash-alt mr-2"></i>Clear Data
</button>
</div>
</header>
<!-- Main Content -->
<div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
<!-- Left Panel - Command List -->
<div class="lg:col-span-1 bg-gray-800 rounded-xl p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i class="fas fa-list-ul mr-2"></i> Your Commands
</h2>
<div id="commandList" class="space-y-4">
<!-- Commands will be dynamically added here -->
</div>
<div class="mt-6">
<h3 class="font-medium mb-2">Add New Command</h3>
<div class="flex">
<input id="newCommandInput" type="text" placeholder="Command word" class="flex-1 bg-gray-700 border border-gray-600 rounded-l-lg px-4 py-2 focus:outline-none focus:border-purple-500">
<button id="addCommandBtn" class="gradient-bg hover:opacity-90 px-4 py-2 rounded-r-lg font-medium transition">
<i class="fas fa-plus"></i>
</button>
</div>
</div>
<div class="mt-6 bg-gray-700 rounded-lg p-4">
<h3 class="font-medium mb-2">Model Status</h3>
<div class="flex items-center mb-2">
<span class="text-sm">Training Progress:</span>
<span id="trainingProgressText" class="ml-auto text-sm">No data</span>
</div>
<div class="progress-bar">
<div id="trainingProgressBar" class="progress-fill" style="width: 0%"></div>
</div>
</div>
</div>
<!-- Center Panel - Training Interface -->
<div class="lg:col-span-2 space-y-6">
<div class="bg-gray-800 rounded-xl p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i class="fas fa-microphone-alt mr-2"></i> Training Mode
</h2>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-6">
<div id="currentCommandDisplay" class="bg-gray-700 rounded-lg p-4">
<h3 class="font-medium mb-2">Training Command</h3>
<div id="currentCommand" class="text-2xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-blue-400 to-purple-500">
None selected
</div>
</div>
<div class="bg-gray-700 rounded-lg p-4">
<h3 class="font-medium mb-2">Training Samples</h3>
<div id="sampleCount" class="text-2xl font-bold">0</div>
<div class="text-sm text-gray-300">Minimum 5 samples needed</div>
</div>
</div>
<div id="audioVisualization" class="spectrogram relative">
<canvas id="waveformCanvas" class="absolute inset-0 w-full h-full"></canvas>
<canvas id="spectrogramCanvas" class="absolute inset-0 w-full h-full"></canvas>
</div>
<div class="network-visualization" id="networkVisualization">
<!-- Network visualization will be dynamically generated here -->
</div>
<div class="flex flex-col sm:flex-row space-y-4 sm:space-y-0 sm:space-x-4 mt-4">
<button id="recordTrainBtn" class="gradient-bg hover:opacity-90 flex-1 py-3 rounded-lg font-medium transition flex items-center justify-center">
<i class="fas fa-microphone mr-2"></i> Record Sample
</button>
<button id="trainBtn" class="bg-gray-700 hover:bg-gray-600 flex-1 py-3 rounded-lg font-medium transition flex items-center justify-center">
<i class="fas fa-brain mr-2"></i> Train Model
</button>
<button id="testBtn" class="border border-purple-500 text-purple-400 hover:bg-purple-900 hover:bg-opacity-30 flex-1 py-3 rounded-lg font-medium transition flex items-center justify-center">
<i class="fas fa-vial mr-2"></i> Test Model
</button>
</div>
</div>
<!-- Recognition Panel -->
<div class="bg-gray-800 rounded-xl p-6">
<h2 class="text-xl font-semibold mb-4 flex items-center">
<i class="fas fa-robot mr-2"></i> Recognition Mode
</h2>
<div id="predictionResult" class="bg-gray-700 rounded-lg p-4 mb-4">
<div class="flex justify-between items-center mb-2">
<h3 class="font-medium">Predicted Command</h3>
<div id="predictionConfidence" class="text-sm font-medium">--% confidence</div>
</div>
<div id="recognizedCommand" class="text-3xl font-bold text-center py-4">
Waiting for command...
</div>
<div class="progress-bar">
<div id="confidenceBar" class="progress-fill" style="width: 0%"></div>
</div>
</div>
<div class="flex flex-col sm:flex-row space-y-4 sm:space-y-0 sm:space-x-4">
<button id="recordPredictBtn" class="gradient-bg hover:opacity-90 flex-1 py-3 rounded-lg font-medium transition flex items-center justify-center pulse-animation">
<i class="fas fa-microphone mr-2"></i> Record Command
</button>
<button id="continuousBtn" class="bg-gray-700 hover:bg-gray-600 flex-1 py-3 rounded-lg font-medium transition flex items-center justify-center">
<i class="fas fa-circle-notch mr-2"></i> Continuous Mode
</button>
</div>
</div>
</div>
</div>
</div>
<script>
// Neural Network Implementation
class NeuralNetwork {
constructor(inputSize, hiddenSize, outputSize) {
this.inputSize = inputSize;
this.hiddenSize = hiddenSize;
this.outputSize = outputSize;
// Initialize weights and biases
const xavierInit = (size) => Math.sqrt(1.0 / size[0]);
// Input to hidden layer
this.weights1 = Array(hiddenSize).fill().map(() =>
Array(inputSize).fill().map(() => xavierInit([inputSize, hiddenSize]) * (Math.random() * 2 - 1))
);
this.bias1 = Array(hiddenSize).fill(0.1);
// Hidden to output layer
this.weights2 = Array(outputSize).fill().map(() =>
Array(hiddenSize).fill().map(() => xavierInit([hiddenSize, outputSize]) * (Math.random() * 2 - 1))
);
this.bias2 = Array(outputSize).fill(0.1);
this.learningRate = 0.01;
}
// Sigmoid activation function
sigmoid(x) {
return 1 / (1 + Math.exp(-x));
}
// Derivative of sigmoid
sigmoidDerivative(x) {
const s = this.sigmoid(x);
return s * (1 - s);
}
// Forward propagation
forward(input) {
// Input to hidden
const hiddenInput = Array(this.hiddenSize).fill(0);
for (let i = 0; i < this.hiddenSize; i++) {
for (let j = 0; j < this.inputSize; j++) {
hiddenInput[i] += this.weights1[i][j] * input[j];
}
hiddenInput[i] += this.bias1[i];
hiddenInput[i] = this.sigmoid(hiddenInput[i]);
}
// Hidden to output
const output = Array(this.outputSize).fill(0);
for (let i = 0; i < this.outputSize; i++) {
for (let j = 0; j < this.hiddenSize; j++) {
output[i] += this.weights2[i][j] * hiddenInput[j];
}
output[i] += this.bias2[i];
output[i] = this.sigmoid(output[i]);
}
return {
output,
hidden: hiddenInput
};
}
// Train the network with one sample
train(input, target) {
// Forward pass
const { output, hidden } = this.forward(input);
// Backpropagation
// Output layer error
const outputErrors = Array(this.outputSize).fill(0);
const outputDeltas = Array(this.outputSize).fill(0);
for (let i = 0; i < this.outputSize; i++) {
outputErrors[i] = target[i] - output[i];
outputDeltas[i] = outputErrors[i] * this.sigmoidDerivative(output[i]);
}
// Hidden layer error
const hiddenErrors = Array(this.hiddenSize).fill(0);
const hiddenDeltas = Array(this.hiddenSize).fill(0);
for (let i = 0; i < this.hiddenSize; i++) {
for (let j = 0; j < this.outputSize; j++) {
hiddenErrors[i] += outputDeltas[j] * this.weights2[j][i];
}
hiddenDeltas[i] = hiddenErrors[i] * this.sigmoidDerivative(hidden[i]);
}
// Update weights and biases
for (let i = 0; i < this.outputSize; i++) {
for (let j = 0; j < this.hiddenSize; j++) {
this.weights2[i][j] += this.learningRate * outputDeltas[i] * hidden[j];
}
this.bias2[i] += this.learningRate * outputDeltas[i];
}
for (let i = 0; i < this.hiddenSize; i++) {
for (let j = 0; j < this.inputSize; j++) {
this.weights1[i][j] += this.learningRate * hiddenDeltas[i] * input[j];
}
this.bias1[i] += this.learningRate * hiddenDeltas[i];
}
// Return error
return outputErrors.reduce((sum, err) => sum + Math.abs(err), 0) / outputErrors.length;
}
// Save model to JSON
toJSON() {
return {
inputSize: this.inputSize,
hiddenSize: this.hiddenSize,
outputSize: this.outputSize,
weights1: this.weights1,
weights2: this.weights2,
bias1: this.bias1,
bias2: this.bias2
};
}
// Load model from JSON
static fromJSON(json) {
const net = new NeuralNetwork(json.inputSize, json.hiddenSize, json.outputSize);
net.weights1 = json.weights1;
net.weights2 = json.weights2;
net.bias1 = json.bias1;
net.bias2 = json.bias2;
return net;
}
}
// Audio Feature Extractor
class AudioFeatureExtractor {
constructor() {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
this.analyser = this.audioContext.createAnalyser();
this.analyser.fftSize = 512;
this.bufferLength = this.analyser.frequencyBinCount;
this.dataArray = new Uint8Array(this.bufferLength);
this.sampleRate = this.audioContext.sampleRate;
// For spectrogram
this.spectrogramBuffer = [];
this.maxSpectrogramLength = 30; // Number of frames to keep
}
async startRecording(stream, onAudioProcess) {
this.audioSource = this.audioContext.createMediaStreamSource(stream);
this.audioSource.connect(this.analyser);
// For recording audio data
this.recorder = new MediaRecorder(stream);
this.chunks = [];
this.recorder.ondataavailable = e => this.chunks.push(e.data);
this.recorder.start();
// Process audio
const process = () => {
this.analyser.getByteFrequencyData(this.dataArray);
// Add to spectrogram buffer
this.spectrogramBuffer.push(new Uint8Array(this.dataArray));
if (this.spectrogramBuffer.length > this.maxSpectrogramLength) {
this.spectrogramBuffer.shift();
}
onAudioProcess(this.dataArray);
this.rafId = requestAnimationFrame(process);
};
process();
}
stopRecording() {
if (this.rafId) {
cancelAnimationFrame(this.rafId);
}
return new Promise((resolve) => {
if (!this.recorder) {
resolve(null);
return;
}
this.recorder.onstop = async () => {
const blob = new Blob(this.chunks, { type: 'audio/wav' });
const audioBuffer = await this.decodeAudioData(blob);
resolve(audioBuffer);
};
this.recorder.stop();
if (this.audioSource) {
this.audioSource.disconnect();
}
});
}
async decodeAudioData(blob) {
const arrayBuffer = await blob.arrayBuffer();
return new Promise((resolve, reject) => {
this.audioContext.decodeAudioData(arrayBuffer, resolve, reject);
});
}
extractMFCC(audioBuffer) {
// Simplified MFCC feature extraction
// In a real application, you'd want a full MFCC implementation
// First get FFT data
this.analyser.getByteFrequencyData(this.dataArray);
// Convert to power spectrum
const powerSpectrum = Array.from(this.dataArray).map(val => val / 255);
// Simple feature extraction - using mean of bands as approximation
const bands = 13; // Standard number of MFCC coefficients
const bandSize = Math.floor(powerSpectrum.length / bands);
const features = [];
for (let i = 0; i < bands; i++) {
const start = i * bandSize;
const end = (i + 1) * bandSize;
const band = powerSpectrum.slice(start, end);
const mean = band.reduce((sum, val) => sum + val, 0) / band.length;
features.push(mean);
}
// Add delta features (approximation)
if (features.length > 1) {
for (let i = 1; i < features.length; i++) {
features.push(features[i] - features[i-1]);
}
}
return features;
}
getSpectrogramData() {
return this.spectrogramBuffer;
}
}
// Main Application
class AudioCommandApp {
constructor() {
this.featureExtractor = new AudioFeatureExtractor();
this.model = null;
this.commands = [];
this.trainingData = {};
this.currentCommand = null;
this.isRecording = false;
this.isTraining = false;
this.isPredicting = false;
this.minSamples = 5; // Minimum samples per command needed for training
this.inputSize = 26; // Number of MFCC features (13 + 13 deltas)
this.hiddenSize = 16; // Size of hidden layer
// DOM elements
this.commandList = document.getElementById('commandList');
this.newCommandInput = document.getElementById('newCommandInput');
this.addCommandBtn = document.getElementById('addCommandBtn');
this.recordTrainBtn = document.getElementById('recordTrainBtn');
this.trainBtn = document.getElementById('trainBtn');
this.testBtn = document.getElementById('testBtn');
this.recordPredictBtn = document.getElementById('recordPredictBtn');
this.continuousBtn = document.getElementById('continuousBtn');
this.currentCommandDisplay = document.getElementById('currentCommand');
this.sampleCount = document.getElementById('sampleCount');
this.trainingProgressBar = document.getElementById('trainingProgressBar');
this.trainingProgressText = document.getElementById('trainingProgressText');
this.recognizedCommand = document.getElementById('recognizedCommand');
this.predictionConfidence = document.getElementById('predictionConfidence');
this.confidenceBar = document.getElementById('confidenceBar');
this.clearStorageBtn = document.getElementById('clearStorageBtn');
// Visualization canvases
this.waveformCanvas = document.getElementById('waveformCanvas');
this.waveformCtx = this.waveformCanvas.getContext('2d');
this.spectrogramCanvas = document.getElementById('spectrogramCanvas');
this.spectrogramCtx = this.spectrogramCanvas.getContext('2d');
this.networkVisualization = document.getElementById('networkVisualization');
// Setup UI
this.setupCanvas();
this.setupEventListeners();
this.loadFromStorage();
this.renderCommandList();
this.visualizeNetwork();
}
setupCanvas() {
const width = this.audioVisualization.clientWidth;
const height = this.audioVisualization.clientHeight;
this.waveformCanvas.width = width;
this.waveformCanvas.height = height;
this.spectrogramCanvas.width = width;
this.spectrogramCanvas.height = height;
// Initially clear canvases
this.clearVisualizations();
}
setupEventListeners() {
// Add new command
this.addCommandBtn.addEventListener('click', () => {
const command = this.newCommandInput.value.trim().toLowerCase();
if (command && !this.commands.includes(command)) {
this.commands.push(command);
this.trainingData[command] = [];
this.newCommandInput.value = '';
this.saveToStorage();
this.renderCommandList();
}
});
// Record training sample
this.recordTrainBtn.addEventListener('click', () => {
if (this.currentCommand) {
this.toggleTrainRecording();
} else {
alert('Please select a command to train first');
}
});
// Train model
this.trainBtn.addEventListener('click', () => this.trainModel());
// Test model
this.testBtn.addEventListener('click', () => this.testModel());
// Record prediction
this.recordPredictBtn.addEventListener('click', () => this.togglePredictRecording());
// Continuous recognition mode
this.continuousBtn.addEventListener('click', () => this.toggleContinuousMode());
// Clear storage
this.clearStorageBtn.addEventListener('click', () => {
if (confirm('Clear all training data and commands?')) {
localStorage.clear();
this.commands = [];
this.trainingData = {};
this.model = null;
this.currentCommand = null;
this.saveToStorage();
this.renderCommandList();
this.updateTrainingUI();
this.clearVisualizations();
this.visualizeNetwork();
}
});
// Handle window resize
window.addEventListener('resize', () => {
this.setupCanvas();
if (this.isRecording) {
this.drawVisualizations(this.featureExtractor.getSpectrogramData());
}
});
}
async toggleTrainRecording() {
try {
if (this.isRecording) {
// Stop recording
this.isRecording = false;
this.recordTrainBtn.innerHTML = '<i class="fas fa-microphone mr-2"></i> Record Sample';
this.recordTrainBtn.classList.remove('bg-red-600', 'hover:bg-red-500');
this.recordTrainBtn.classList.add('gradient-bg');
const audioBuffer = await this.featureExtractor.stopRecording();
if (audioBuffer) {
const features = this.featureExtractor.extractMFCC(audioBuffer);
this.trainingData[this.currentCommand].push(features);
this.saveToStorage();
this.updateTrainingUI();
// Show notification
const notification = document.createElement('div');
notification.className = 'fixed bottom-4 right-4 bg-green-600 text-white px-4 py-2 rounded-lg shadow-lg transition transform translate-y-10 opacity-0';
notification.innerHTML = 'Sample recorded successfully';
document.body.appendChild(notification);
setTimeout(() => {
notification.classList.add('opacity-100', 'translate-y-0');
setTimeout(() => {
notification.classList.remove('opacity-100', 'translate-y-0');
setTimeout(() => notification.remove(), 300);
}, 2000);
}, 10);
}
this.clearVisualizations();
} else {
// Start recording
this.isRecording = true;
this.recordTrainBtn.innerHTML = '<i class="fas fa-stop mr-2"></i> Stop Recording';
this.recordTrainBtn.classList.add('bg-red-600', 'hover:bg-red-500');
this.recordTrainBtn.classList.remove('gradient-bg');
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.featureExtractor.startRecording(stream, (data) => {
this.drawVisualizations(this.featureExtractor.getSpectrogramData());
});
}
} catch (error) {
console.error('Recording error:', error);
this.isRecording = false;
this.recordTrainBtn.innerHTML = '<i class="fas fa-microphone mr-2"></i> Record Sample';
this.recordTrainBtn.classList.add('gradient-bg');
this.recordTrainBtn.classList.remove('bg-red-600', 'hover:bg-red-500');
alert('Error accessing microphone: ' + error.message);
}
}
async togglePredictRecording() {
try {
if (this.isPredicting) {
// Stop recording
this.isPredicting = false;
this.recordPredictBtn.innerHTML = '<i class="fas fa-microphone mr-2"></i> Record Command';
this.recordPredictBtn.classList.remove('bg-red-600', 'hover:bg-red-500');
this.recordPredictBtn.classList.add('gradient-bg', 'pulse-animation');
await this.featureExtractor.stopRecording();
this.clearVisualizations();
} else {
// Start recording
this.isPredicting = true;
this.recordPredictBtn.innerHTML = '<i class="fas fa-stop mr-2"></i> Stop Recording';
this.recordPredictBtn.classList.add('bg-red-600', 'hover:bg-red-500');
this.recordPredictBtn.classList.remove('gradient-bg', 'pulse-animation');
this.recognizedCommand.textContent = 'Listening...';
this.predictionConfidence.textContent = '--% confidence';
this.confidenceBar.style.width = '0%';
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.featureExtractor.startRecording(stream, (data) => {
this.drawVisualizations(this.featureExtractor.getSpectrogramData());
if (this.model) {
const features = this.featureExtractor.extractMFCC();
this.predictCommand(features);
}
});
}
} catch (error) {
console.error('Prediction error:', error);
this.isPredicting = false;
this.recordPredictBtn.innerHTML = '<i class="fas fa-microphone mr-2"></i> Record Command';
this.recordPredictBtn.classList.add('gradient-bg', 'pulse-animation');
this.recordPredictBtn.classList.remove('bg-red-600', 'hover:bg-red-500');
alert('Error accessing microphone: ' + error.message);
}
}
toggleContinuousMode() {
// To be implemented
alert('Continuous mode coming soon!');
}
trainModel() {
if (this.commands.length < 1) {
alert('Please add at least one command first');
return;
}
// Check if we have enough samples for each command
const commandsWithEnoughSamples = this.commands.filter(cmd =>
this.trainingData[cmd] && this.trainingData[cmd].length >= this.minSamples
);
if (commandsWithEnoughSamples.length < 1) {
alert(`Please record at least ${this.minSamples} samples for each command you want to train`);
return;
}
this.isTraining = true;
this.trainBtn.disabled = true;
this.recordTrainBtn.disabled = true;
// Prepare training data
const trainingData = [];
const targets = [];
const commandIndex = {};
commandsWithEnoughSamples.forEach((cmd, idx) => {
commandIndex[cmd] = idx;
this.trainingData[cmd].forEach(features => {
trainingData.push(features);
// One-hot encoded target
const target = Array(commandsWithEnoughSamples.length).fill(0);
target[idx] = 1;
targets.push(target);
});
});
// Initialize or reset model
if (!this.model) {
this.model = new NeuralNetwork(this.inputSize, this.hiddenSize, commandsWithEnoughSamples.length);
}
// Train the model
const epochs = 200;
const batchSize = 16;
const progressStep = Math.ceil(epochs / 20);
const train = async (epoch = 0) => {
if (epoch >= epochs) {
// Training complete
this.isTraining = false;
this.trainBtn.disabled = false;
this.recordTrainBtn.disabled = false;
// Visualize the trained network
this.visualizeNetwork();
// Show notification
const notification = document.createElement('div');
notification.className = 'fixed bottom-4 right-4 bg-green-600 text-white px-4 py-2 rounded-lg shadow-lg transition transform translate-y-10 opacity-0';
notification.innerHTML = 'Training complete! Model is ready';
document.body.appendChild(notification);
setTimeout(() => {
notification.classList.add('opacity-100', 'translate-y-0');
setTimeout(() => {
notification.classList.remove('opacity-100', 'translate-y-0');
setTimeout(() => notification.remove(), 300);
}, 2000);
}, 10);
return;
}
// Shuffle training data
const shuffledIndices = Array.from({ length: trainingData.length }, (_, i) => i);
for (let i = shuffledIndices.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[shuffledIndices[i], shuffledIndices[j]] = [shuffledIndices[j], shuffledIndices[i]];
}
// Train in mini-batches
let totalError = 0;
for (let i = 0; i < Math.ceil(trainingData.length / batchSize); i++) {
const batchIndices = shuffledIndices.slice(i * batchSize, (i + 1) * batchSize);
for (const idx of batchIndices) {
const error = this.model.train(trainingData[idx], targets[idx]);
totalError += error;
}
}
const avgError = totalError / trainingData.length;
// Update UI
if (epoch % progressStep === 0 || epoch === epochs - 1) {
const progress = Math.floor((epoch / epochs) * 100);
this.trainingProgressBar.style.width = `${progress}%`;
this.trainingProgressText.textContent = `Epoch ${epoch + 1}/${epochs} (Error: ${avgError.toFixed(4)})`;
// Visualize network occasionally
if (epoch % (progressStep * 2) === 0) {
this.visualizeNetwork();
}
}
// Schedule next epoch
await new Promise(resolve => setTimeout(resolve, 0));
requestAnimationFrame(() => train(epoch + 1));
};
// Start training
train();
}
testModel() {
if (!this.model || this.commands.length < 1) {
alert('Please train at least one command first');
return;
}
// Simple test of the model with training data
const summary = {};
let totalCorrect = 0;
let totalSamples = 0;
this.commands.forEach(cmd => {
if (!this.trainingData[cmd] || this.trainingData[cmd].length === 0) return;
summary[cmd] = { correct: 0, total: this.trainingData[cmd].length };
totalSamples += this.trainingData[cmd].length;
this.trainingData[cmd].forEach(features => {
const prediction = this.model.forward(features).output;
const predictedIndex = prediction.indexOf(Math.max(...prediction));
const actualIndex = this.commands.indexOf(cmd);
if (predictedIndex === actualIndex) {
summary[cmd].correct++;
totalCorrect++;
}
});
});
// Display test results
let resultText = 'Model Test Results\n\n';
this.commands.forEach(cmd => {
if (!summary[cmd]) return;
const accuracy = Math.round((summary[cmd].correct / summary[cmd].total) * 100);
resultText += `${cmd}: ${summary[cmd].correct}/${summary[cmd].total} (${accuracy}%)\n`;
});
resultText += `\nOverall Accuracy: ${Math.round((totalCorrect / totalSamples) * 100)}%`;
alert(resultText);
}
predictCommand(features) {
if (!this.model || this.commands.length < 1) return;
const { output, hidden } = this.model.forward(features);
const maxConfidence = Math.max(...output);
const predictedIndex = output.indexOf(maxConfidence);
const confidence = Math.round(maxConfidence * 100);
if (confidence > 30) { // Minimum confidence threshold
const predictedCommand = this.commands[predictedIndex];
this.recognizedCommand.textContent = predictedCommand;
this.predictionConfidence.textContent = `${confidence}% confidence`;
this.confidenceBar.style.width = `${confidence}%`;
// Visualize network activation
this.visualizeNetwork(hidden, predictedIndex, confidence);
} else {
this.recognizedCommand.textContent = 'Not recognized';
this.predictionConfidence.textContent = 'Low confidence';
this.confidenceBar.style.width = '0%';
}
}
// Visualization methods
drawVisualizations(spectrogramBuffer) {
if (!spectrogramBuffer || spectrogramBuffer.length === 0) return;
const width = this.waveformCanvas.width;
const height = this.waveformCanvas.height;
// Clear canvases
this.waveformCtx.clearRect(0, 0, width, height);
this.spectrogramCtx.clearRect(0, 0, width, height);
// Draw waveform (simplified)
this.waveformCtx.beginPath();
this.waveformCtx.strokeStyle = '#a777e3';
this.waveformCtx.lineWidth = 2;
const currentData = spectrogramBuffer[spectrogramBuffer.length - 1];
const sliceWidth = width / currentData.length;
for (let i = 0; i < currentData.length; i++) {
const v = currentData[i] / 255.0;
const y = (1 - v) * height;
if (i === 0) {
this.waveformCtx.moveTo(0, y);
} else {
this.waveformCtx.lineTo(i * sliceWidth, y);
}
}
this.waveformCtx.stroke();
// Draw spectrogram
const spectrogramHeight = height;
const spectrogramWidth = width;
const binHeight = spectrogramHeight / currentData.length;
for (let i = 0; i < spectrogramBuffer.length; i++) {
const colData = spectrogramBuffer[i];
const x = spectrogramWidth - (spectrogramBuffer.length - i);
for (let j = 0; j < colData.length; j++) {
const value = colData[j] / 255;
const h = 240; // Hue (blue)
const s = 100; // Saturation
const l = value * 100; // Lightness
this.spectrogramCtx.fillStyle = `hsl(${h}, ${s}%, ${l}%)`;
this.spectrogramCtx.fillRect(x, j * binHeight, 1, binHeight);
}
}
}
clearVisualizations() {
this.waveformCtx.clearRect(0, 0, this.waveformCanvas.width, this.waveformCanvas.height);
this.spectrogramCtx.clearRect(0, 0, this.spectrogramCanvas.width, this.spectrogramCanvas.height);
// Draw empty state
this.waveformCtx.fillStyle = 'rgba(255, 255, 255, 0.05)';
this.waveformCtx.fillRect(0, 0, this.waveformCanvas.width, this.waveformCanvas.height);
this.spectrogramCtx.fillStyle = 'rgba(255, 255, 255, 0.05)';
this.spectrogramCtx.fillRect(0, 0, this.spectrogramCanvas.width, this.spectrogramCanvas.height);
this.waveformCtx.fillStyle = 'white';
this.waveformCtx.font = '14px Arial';
this.waveformCtx.textAlign = 'center';
this.waveformCtx.fillText('No audio data', this.waveformCanvas.width / 2, this.waveformCanvas.height / 2);
}
visualizeNetwork(hiddenActivations = null, outputIndex = -1, confidence = 0) {
// Clear network visualization
this.networkVisualization.innerHTML = '';
if (!this.model) {
// Show placeholder if no model exists
const placeholder = document.createElement('div');
placeholder.className = 'text-gray-400 text-center py-12';
placeholder.textContent = 'No trained model. Train with at least 5 samples per command.';
this.networkVisualization.appendChild(placeholder);
return;
}
// Create layers container
const layersContainer = document.createElement('div');
layersContainer.className = 'flex items-center justify-center h-full';
this.networkVisualization.appendChild(layersContainer);
// Input layer
const inputLayer = document.createElement('div');
inputLayer.className = 'flex flex-col items-center mx-2';
const inputLabel = document.createElement('div');
inputLabel.className = 'text-xs text-gray-400 mb-1';
inputLabel.textContent = 'Input Features';
inputLayer.appendChild(inputLabel);
const inputNeurons = document.createElement('div');
inputNeurons.className = 'flex flex-col items-center';
for (let i = 0; i < this.model.inputSize; i++) {
const neuron = document.createElement('div');
neuron.className = 'neuron';
inputNeurons.appendChild(neuron);
}
inputLayer.appendChild(inputNeurons);
layersContainer.appendChild(inputLayer);
// Connections between input and hidden
for (let i = 0; i < this.model.inputSize; i++) {
for (let j = 0; j < this.model.hiddenSize; j++) {
const connection = document.createElement('div');
connection.className = 'connection';
connection.style.width = '60px';
connection.style.left = (30 + i * 0) + 'px'; // Adjusted for display
connection.style.top = (20 + i * 10) + 'px'; // Simplified positioning
layersContainer.appendChild(connection);
}
}
// Hidden layer
const hiddenLayer = document.createElement('div');
hiddenLayer.className = 'flex flex-col items-center mx-2';
const hiddenLabel = document.createElement('div');
hiddenLabel.className = 'text-xs text-gray-400 mb-1';
hiddenLabel.textContent = 'Hidden Layer';
hiddenLayer.appendChild(hiddenLabel);
const hiddenNeurons = document.createElement('div');
hiddenNeurons.className = 'flex flex-col items-center';
for (let i = 0; i < this.model.hiddenSize; i++) {
const neuron = document.createElement('div');
neuron.className = 'neuron';
if (hiddenActivations) {
const activation = hiddenActivations[i];
const intensity = Math.min(255, Math.floor(activation * 200));
neuron.style.backgroundColor = `rgba(167, 119, 227, ${activation})`;
if (activation > 0.6) neuron.classList.add('active');
}
hiddenNeurons.appendChild(neuron);
}
hiddenLayer.appendChild(hiddenNeurons);
layersContainer.appendChild(hiddenLayer);
// Connections between hidden and output
for (let i = 0; i < this.model.hiddenSize; i++) {
for (let j = 0; j < this.model.outputSize; j++) {
const connection = document.createElement('div');
connection.className = 'connection';
connection.style.width = '60px';
layersContainer.appendChild(connection);
}
}
// Output layer
const outputLayer = document.createElement('div');
outputLayer.className = 'flex flex-col items-center mx-2';
const outputLabel = document.createElement('div');
outputLabel.className = 'text-xs text-gray-400 mb-1';
outputLabel.textContent = 'Output';
outputLayer.appendChild(outputLabel);
const outputNeurons = document.createElement('div');
outputNeurons.className = 'flex flex-col items-center';
for (let i = 0; i < this.model.outputSize; i++) {
const neuron = document.createElement('div');
neuron.className = 'neuron';
if (outputIndex >= 0) {
if (i === outputIndex) {
neuron.style.backgroundColor = `rgba(74, 222, 128, ${confidence / 100})`;
if (confidence > 50) neuron.classList.add('active');
} else {
neuron.style.opacity = '0.3';
}
}
outputNeurons.appendChild(neuron);
// Add command labels
if (this.commands[i]) {
const label = document.createElement('div');
label.className = 'text-xs text-center mt-1';
label.textContent = this.commands[i];
outputNeurons.appendChild(label);
}
}
outputLayer.appendChild(outputNeurons);
layersContainer.appendChild(outputLayer);
}
// Command list rendering
renderCommandList() {
this.commandList.innerHTML = '';
this.commands.forEach(cmd => {
const samples = this.trainingData[cmd] ? this.trainingData[cmd].length : 0;
const statusColor = samples >= this.minSamples ? 'bg-green-500' :
samples > 0 ? 'bg-yellow-500' : 'bg-red-500';
const statusText = samples >= this.minSamples ? 'Ready' :
samples > 0 ? `${samples}/${this.minSamples}` : 'New';
const card = document.createElement('div');
card.className = `command-card bg-gray-700 rounded-lg p-4 cursor-pointer ${this.currentCommand === cmd ? 'glow' : ''}`;
card.innerHTML = `
<div class="flex justify-between items-center">
<h3 class="font-medium">${cmd}</h3>
<span class="text-xs ${statusColor} px-2 py-1 rounded-full">${statusText}</span>
</div>
<div class="waveform mt-2 rounded"></div>
<div class="confidence-meter mt-2">
<div class="confidence-fill" style="width: ${samples / this.minSamples * 100}%"></div>
</div>
<div class="text-xs text-gray-400 mt-1">${samples} samples</div>
`;
card.addEventListener('click', () => {
this.currentCommand = cmd;
this.currentCommandDisplay.textContent = `"${cmd}"`;
this.updateTrainingUI();
// Highlight selected card
document.querySelectorAll('.command-card').forEach(c => c.classList.remove('glow'));
card.classList.add('glow');
});
this.commandList.appendChild(card);
});
if (this.commands.length === 0) {
this.commandList.innerHTML = '<div class="text-center py-8 text-gray-400">No commands added yet</div>';
}
}
updateTrainingUI() {
if (!this.currentCommand) {
this.sampleCount.textContent = '0';
return;
}
const samples = this.trainingData[this.currentCommand] ? this.trainingData[this.currentCommand].length : 0;
this.sampleCount.textContent = samples;
// Update training button state
this.trainBtn.disabled = this.commands.every(cmd =>
!this.trainingData[cmd] || this.trainingData[cmd].length < this.minSamples
);
}
// Storage methods
saveToStorage() {
try {
localStorage.setItem('audioCommands', JSON.stringify(this.commands));
localStorage.setItem('trainingData', JSON.stringify(this.trainingData));
if (this.model) {
localStorage.setItem('nnModel', JSON.stringify(this.model.toJSON()));
}
} catch (e) {
console.error('Failed to save data:', e);
}
}
loadFromStorage() {
try {
const commands = localStorage.getItem('audioCommands');
const trainingData = localStorage.getItem('trainingData');
const modelData = localStorage.getItem('nnModel');
if (commands) this.commands = JSON.parse(commands);
if (trainingData) this.trainingData = JSON.parse(trainingData);
if (modelData) this.model = NeuralNetwork.fromJSON(JSON.parse(modelData));
} catch (e) {
console.error('Failed to load data:', e);
}
}
}
// Initialize the app when DOM is loaded
document.addEventListener('DOMContentLoaded', () => {
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
alert('Your browser doesn\'t support audio recording. Please try Chrome or Firefox.');
return;
}
const app = new AudioCommandApp();
window.app = app; // For debugging
});
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;display:inline-block;margin-right:3px;filter:brightness(0) invert(1);"><a href="https://enzostvs-deepsite.hf.space" style="color: #fff;text-decoration: underline;" target="_blank" >DeepSite</a> - <a href="https://enzostvs-deepsite.hf.space?remix=LukasBe/voice-command" style="color: #fff;text-decoration: underline;" target="_blank" >🧬 Remix</a></p></body>
</html>