File size: 2,082 Bytes
5b59318 3072378 6f2690e 5b59318 3072378 144e087 6f2690e 144e087 447139a 144e087 447139a 144e087 447139a 144e087 447139a 144e087 3072378 ed03d47 144e087 447139a 144e087 447139a 144e087 5b59318 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | // Set single-thread BEFORE anything else loads
process.env.ORT_WASM_NUM_THREADS = '1';
import express from 'express';
import path from 'path';
import { fileURLToPath } from 'url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
// Set ort env before pipeline
const ort = await import('onnxruntime-web');
ort.default.env.wasm.numThreads = 1;
ort.default.env.wasm.simd = true;
const { pipeline, env } = await import('@xenova/transformers');
const PORT = process.env.PORT || 3000;
const MODEL = process.env.MODEL || 'Xenova/bge-small-en-v1.5';
env.cacheDir = path.join(__dirname, '.cache');
let extractor = null;
async function getExtractor() {
if (!extractor) {
console.log(`Loading ${MODEL}...`);
const t = Date.now();
extractor = await pipeline('feature-extraction', MODEL);
console.log(`Loaded in ${Date.now() - t}ms`);
}
return extractor;
}
const app = express();
app.use(express.json({ limit: '10mb' }));
app.get('/health', (req, res) => {
res.json({ status: extractor ? 'ready' : 'loading', model: MODEL });
});
app.post('/embed', async (req, res) => {
try {
const { text } = req.body;
if (!text) return res.status(400).json({ error: 'Missing "text"' });
const inputs = Array.isArray(text) ? text : [text];
const pipe = await getExtractor();
const t = Date.now();
const output = await pipe(inputs, { pooling: 'mean', normalize: true });
const embDim = output.dims[output.dims.length - 1];
const embeddings = [];
for (let i = 0; i < inputs.length; i++) {
embeddings.push(Array.from(output.data.slice(i * embDim, (i + 1) * embDim)));
}
res.json({ embeddings, dims: embDim, model: MODEL, elapsed_ms: Date.now() - t });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Start server immediately, model loads on first request
app.listen(PORT, '0.0.0.0', () => {
console.log(`Server on :${PORT} — model will load on first request`);
// Preload in background
getExtractor().then(() => console.log('Model ready!')).catch(console.error);
});
|