File size: 2,070 Bytes
ba90d1c
 
 
05437e3
 
 
 
 
d854861
 
05437e3
31d6614
05437e3
 
 
 
 
 
 
ba90d1c
05437e3
e4d4409
d854861
 
ba90d1c
d60b4e5
0db4df0
07a209a
05437e3
 
 
ba90d1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05437e3
ba90d1c
05437e3
 
0db4df0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class MyPipeline {
  static task = 'automatic-speech-recognition';
  static model = 'Xenova/whisper-tiny.en';
  static instance = null;

  static async getInstance(progress_callback = null) {
    if (this.instance === null) {

      let { pipeline, env } = await import('@huggingface/transformers');

      // NOTE: Uncomment this to change the cache directory
      env.cacheDir = './.cache';

      this.instance = pipeline(this.task, this.model, { progress_callback });
    }

    return this.instance;
  }
}
//MyPipeline.getInstance();


const http = require('http');
const url = require('url');
const wavefile = require('wavefile');

http.createServer(async (req, res) => {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var u = url.parse(req.url, true);

  if (u.query.q) {
    const transcriber = await MyPipeline.getInstance();

    //let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
    let buffer = Buffer.from(await fetch(u.query.q).then(x => x.arrayBuffer()))

    // Read .wav file and convert it to required format
    let wav = new wavefile.WaveFile(buffer);
    wav.toBitDepth('32f'); // Pipeline expects input as a Float32Array
    wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000
    let audioData = wav.getSamples();
    if (Array.isArray(audioData)) {
        if (audioData.length > 1) {
            const SCALING_FACTOR = Math.sqrt(2);

            // Merge channels (into first channel to save memory)
            for (let i = 0; i < audioData[0].length; ++i) {
                audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2;
            }
        }

        // Select first channel
        audioData = audioData[0];
    }

    // Run model
    let start = performance.now();
    let output = await transcriber(audioData);
    let end = performance.now();
    
    res.write(`Execution duration: ${(end - start) / 1000} seconds <br/>`);
    res.end(JSON.stringify(output));
  } else {
    res.end("Empty query");
  }
  
}).listen(8080);