Node_Server / index.js
freealise's picture
Update index.js
ba90d1c verified
class MyPipeline {
static task = 'automatic-speech-recognition';
static model = 'Xenova/whisper-tiny.en';
static instance = null;
static async getInstance(progress_callback = null) {
if (this.instance === null) {
let { pipeline, env } = await import('@huggingface/transformers');
// NOTE: Uncomment this to change the cache directory
env.cacheDir = './.cache';
this.instance = pipeline(this.task, this.model, { progress_callback });
}
return this.instance;
}
}
//MyPipeline.getInstance();
const http = require('http');
const url = require('url');
const wavefile = require('wavefile');
http.createServer(async (req, res) => {
res.writeHead(200, {'Content-Type': 'text/html'});
var u = url.parse(req.url, true);
if (u.query.q) {
const transcriber = await MyPipeline.getInstance();
//let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
let buffer = Buffer.from(await fetch(u.query.q).then(x => x.arrayBuffer()))
// Read .wav file and convert it to required format
let wav = new wavefile.WaveFile(buffer);
wav.toBitDepth('32f'); // Pipeline expects input as a Float32Array
wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000
let audioData = wav.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels (into first channel to save memory)
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2;
}
}
// Select first channel
audioData = audioData[0];
}
// Run model
let start = performance.now();
let output = await transcriber(audioData);
let end = performance.now();
res.write(`Execution duration: ${(end - start) / 1000} seconds <br/>`);
res.end(JSON.stringify(output));
} else {
res.end("Empty query");
}
}).listen(8080);