File size: 2,082 Bytes
5b59318
 
 
3072378
6f2690e
 
 
 
 
5b59318
 
 
 
 
 
3072378
144e087
 
 
6f2690e
 
144e087
 
 
 
447139a
 
 
 
144e087
 
 
 
 
 
 
 
447139a
144e087
 
 
 
 
447139a
144e087
 
 
447139a
144e087
 
3072378
ed03d47
144e087
447139a
144e087
 
447139a
144e087
 
 
 
 
5b59318
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Set single-thread BEFORE anything else loads
process.env.ORT_WASM_NUM_THREADS = '1';

import express from 'express';
import path from 'path';
import { fileURLToPath } from 'url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));

// Set ort env before pipeline
const ort = await import('onnxruntime-web');
ort.default.env.wasm.numThreads = 1;
ort.default.env.wasm.simd = true;

const { pipeline, env } = await import('@xenova/transformers');

const PORT = process.env.PORT || 3000;
const MODEL = process.env.MODEL || 'Xenova/bge-small-en-v1.5';

env.cacheDir = path.join(__dirname, '.cache');

let extractor = null;

async function getExtractor() {
  if (!extractor) {
    console.log(`Loading ${MODEL}...`);
    const t = Date.now();
    extractor = await pipeline('feature-extraction', MODEL);
    console.log(`Loaded in ${Date.now() - t}ms`);
  }
  return extractor;
}

const app = express();
app.use(express.json({ limit: '10mb' }));

app.get('/health', (req, res) => {
  res.json({ status: extractor ? 'ready' : 'loading', model: MODEL });
});

app.post('/embed', async (req, res) => {
  try {
    const { text } = req.body;
    if (!text) return res.status(400).json({ error: 'Missing "text"' });

    const inputs = Array.isArray(text) ? text : [text];
    const pipe = await getExtractor();
    const t = Date.now();
    const output = await pipe(inputs, { pooling: 'mean', normalize: true });

    const embDim = output.dims[output.dims.length - 1];
    const embeddings = [];
    for (let i = 0; i < inputs.length; i++) {
      embeddings.push(Array.from(output.data.slice(i * embDim, (i + 1) * embDim)));
    }

    res.json({ embeddings, dims: embDim, model: MODEL, elapsed_ms: Date.now() - t });
  } catch (err) {
    res.status(500).json({ error: err.message });
  }
});

// Start server immediately, model loads on first request
app.listen(PORT, '0.0.0.0', () => {
  console.log(`Server on :${PORT} — model will load on first request`);
  // Preload in background
  getExtractor().then(() => console.log('Model ready!')).catch(console.error);
});