|
|
const path = require("path"); |
|
|
const { whisper } = require(path.join( |
|
|
__dirname, |
|
|
"../../build/Release/addon.node" |
|
|
)); |
|
|
const { promisify } = require("util"); |
|
|
|
|
|
const whisperAsync = promisify(whisper); |
|
|
|
|
|
|
|
|
const vadParams = { |
|
|
language: "en", |
|
|
model: path.join(__dirname, "../../models/ggml-base.en.bin"), |
|
|
fname_inp: path.join(__dirname, "../../samples/jfk.wav"), |
|
|
use_gpu: true, |
|
|
flash_attn: false, |
|
|
no_prints: false, |
|
|
comma_in_time: true, |
|
|
translate: false, |
|
|
no_timestamps: false, |
|
|
detect_language: false, |
|
|
audio_ctx: 0, |
|
|
max_len: 0, |
|
|
|
|
|
vad: true, |
|
|
vad_model: path.join(__dirname, "../../models/ggml-silero-v6.2.0.bin"), |
|
|
vad_threshold: 0.5, |
|
|
vad_min_speech_duration_ms: 250, |
|
|
vad_min_silence_duration_ms: 100, |
|
|
vad_max_speech_duration_s: 30.0, |
|
|
vad_speech_pad_ms: 30, |
|
|
vad_samples_overlap: 0.1, |
|
|
progress_callback: (progress) => { |
|
|
console.log(`VAD Transcription progress: ${progress}%`); |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
const traditionalParams = { |
|
|
language: "en", |
|
|
model: path.join(__dirname, "../../models/ggml-base.en.bin"), |
|
|
fname_inp: path.join(__dirname, "../../samples/jfk.wav"), |
|
|
use_gpu: true, |
|
|
flash_attn: false, |
|
|
no_prints: false, |
|
|
comma_in_time: true, |
|
|
translate: false, |
|
|
no_timestamps: false, |
|
|
detect_language: false, |
|
|
audio_ctx: 0, |
|
|
max_len: 0, |
|
|
vad: false, |
|
|
progress_callback: (progress) => { |
|
|
console.log(`Traditional transcription progress: ${progress}%`); |
|
|
} |
|
|
}; |
|
|
|
|
|
async function runVADExample() { |
|
|
try { |
|
|
console.log("=== Whisper.cpp Node.js VAD Example ===\n"); |
|
|
|
|
|
|
|
|
const fs = require('fs'); |
|
|
if (!fs.existsSync(vadParams.vad_model)) { |
|
|
console.log("โ ๏ธ VAD model not found. Please download the VAD model first:"); |
|
|
console.log(" ./models/download-vad-model.sh silero-v6.2.0"); |
|
|
console.log(" Or run: python models/convert-silero-vad-to-ggml.py"); |
|
|
console.log("\n Falling back to traditional transcription without VAD...\n"); |
|
|
|
|
|
|
|
|
console.log("๐ต Running traditional transcription..."); |
|
|
const traditionalResult = await whisperAsync(traditionalParams); |
|
|
console.log("\n๐ Traditional transcription result:"); |
|
|
console.log(traditionalResult); |
|
|
return; |
|
|
} |
|
|
|
|
|
console.log("๐ต Running transcription with VAD enabled..."); |
|
|
console.log("VAD Parameters:"); |
|
|
console.log(` - Threshold: ${vadParams.vad_threshold}`); |
|
|
console.log(` - Min speech duration: ${vadParams.vad_min_speech_duration_ms}ms`); |
|
|
console.log(` - Min silence duration: ${vadParams.vad_min_silence_duration_ms}ms`); |
|
|
console.log(` - Max speech duration: ${vadParams.vad_max_speech_duration_s}s`); |
|
|
console.log(` - Speech padding: ${vadParams.vad_speech_pad_ms}ms`); |
|
|
console.log(` - Samples overlap: ${vadParams.vad_samples_overlap}\n`); |
|
|
|
|
|
const startTime = Date.now(); |
|
|
const vadResult = await whisperAsync(vadParams); |
|
|
const vadDuration = Date.now() - startTime; |
|
|
|
|
|
console.log("\nโ
VAD transcription completed!"); |
|
|
console.log(`โฑ๏ธ Processing time: ${vadDuration}ms`); |
|
|
console.log("\n๐ VAD transcription result:"); |
|
|
console.log(vadResult); |
|
|
|
|
|
|
|
|
console.log("\n๐ Running traditional transcription for comparison..."); |
|
|
const traditionalStartTime = Date.now(); |
|
|
const traditionalResult = await whisperAsync(traditionalParams); |
|
|
const traditionalDuration = Date.now() - traditionalStartTime; |
|
|
|
|
|
console.log("\nโ
Traditional transcription completed!"); |
|
|
console.log(`โฑ๏ธ Processing time: ${traditionalDuration}ms`); |
|
|
console.log("\n๐ Traditional transcription result:"); |
|
|
console.log(traditionalResult); |
|
|
|
|
|
|
|
|
console.log("\n๐ Performance Comparison:"); |
|
|
console.log(`VAD: ${vadDuration}ms`); |
|
|
console.log(`Traditional: ${traditionalDuration}ms`); |
|
|
const speedup = traditionalDuration / vadDuration; |
|
|
if (speedup > 1) { |
|
|
console.log(`๐ VAD is ${speedup.toFixed(2)}x faster!`); |
|
|
} else { |
|
|
console.log(`โน๏ธ Traditional approach was ${(1/speedup).toFixed(2)}x faster in this case.`); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
console.error("โ Error during transcription:", error); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (require.main === module) { |
|
|
runVADExample(); |
|
|
} |
|
|
|
|
|
module.exports = { |
|
|
runVADExample, |
|
|
vadParams, |
|
|
traditionalParams |
|
|
}; |