// Set single-thread BEFORE anything else loads process.env.ORT_WASM_NUM_THREADS = '1'; import express from 'express'; import path from 'path'; import { fileURLToPath } from 'url'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); // Set ort env before pipeline const ort = await import('onnxruntime-web'); ort.default.env.wasm.numThreads = 1; ort.default.env.wasm.simd = true; const { pipeline, env } = await import('@xenova/transformers'); const PORT = process.env.PORT || 3000; const MODEL = process.env.MODEL || 'Xenova/bge-small-en-v1.5'; env.cacheDir = path.join(__dirname, '.cache'); let extractor = null; async function getExtractor() { if (!extractor) { console.log(`Loading ${MODEL}...`); const t = Date.now(); extractor = await pipeline('feature-extraction', MODEL); console.log(`Loaded in ${Date.now() - t}ms`); } return extractor; } const app = express(); app.use(express.json({ limit: '10mb' })); app.get('/health', (req, res) => { res.json({ status: extractor ? 'ready' : 'loading', model: MODEL }); }); app.post('/embed', async (req, res) => { try { const { text } = req.body; if (!text) return res.status(400).json({ error: 'Missing "text"' }); const inputs = Array.isArray(text) ? text : [text]; const pipe = await getExtractor(); const t = Date.now(); const output = await pipe(inputs, { pooling: 'mean', normalize: true }); const embDim = output.dims[output.dims.length - 1]; const embeddings = []; for (let i = 0; i < inputs.length; i++) { embeddings.push(Array.from(output.data.slice(i * embDim, (i + 1) * embDim))); } res.json({ embeddings, dims: embDim, model: MODEL, elapsed_ms: Date.now() - t }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Start server immediately, model loads on first request app.listen(PORT, '0.0.0.0', () => { console.log(`Server on :${PORT} — model will load on first request`); // Preload in background getExtractor().then(() => console.log('Model ready!')).catch(console.error); });