Upload serve_gemma.js with huggingface_hub
Browse files- serve_gemma.js +158 -0
serve_gemma.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const http = require('http'), fs = require('fs'), path = require('path');
|
| 2 |
+
const fetch = globalThis.fetch || require('node-fetch'); // Node 18+ has built-in fetch
|
| 3 |
+
const PORT = 8150;
|
| 4 |
+
const ROOT = __dirname;
|
| 5 |
+
const SPLITS = path.join(ROOT, 'model_splits');
|
| 6 |
+
|
| 7 |
+
const MIME = {
|
| 8 |
+
'.html': 'text/html', '.js': 'text/javascript', '.mjs': 'text/javascript',
|
| 9 |
+
'.json': 'application/json', '.gguf': 'application/octet-stream',
|
| 10 |
+
'.wasm': 'application/wasm', '.ts': 'text/javascript',
|
| 11 |
+
};
|
| 12 |
+
|
| 13 |
+
const ENTITY_DIR = path.join(ROOT, '..', 'entity');
|
| 14 |
+
|
| 15 |
+
async function extractKeywords(text) {
|
| 16 |
+
// Try local xLAM at :8093 for intent-based keyword extraction
|
| 17 |
+
try {
|
| 18 |
+
const resp = await fetch('http://127.0.0.1:8093/v1/chat/completions', {
|
| 19 |
+
method: 'POST',
|
| 20 |
+
headers: { 'Content-Type': 'application/json' },
|
| 21 |
+
body: JSON.stringify({
|
| 22 |
+
model: 'xlam',
|
| 23 |
+
messages: [{ role: 'user', content: `Extract 3-5 keyword phrases (1-3 words each) from this message that capture the user's intent. Return JSON: {"keywords":["k1","k2","k3"]}\n\nMessage: "${text}"` }],
|
| 24 |
+
temperature: 0.1,
|
| 25 |
+
max_tokens: 60,
|
| 26 |
+
}),
|
| 27 |
+
});
|
| 28 |
+
if (resp.ok) {
|
| 29 |
+
const data = await resp.json();
|
| 30 |
+
const content = data.choices?.[0]?.message?.content || '';
|
| 31 |
+
const match = content.match(/\{[\s\S]*"keywords"[\s\S]*\}/);
|
| 32 |
+
if (match) {
|
| 33 |
+
const parsed = JSON.parse(match[0]);
|
| 34 |
+
if (Array.isArray(parsed.keywords)) return parsed.keywords.map(k => k.toLowerCase());
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
} catch (e) { /* xLAM not available, fall back */ }
|
| 38 |
+
// Fallback: extract names and nouns (4+ chars, skip common words)
|
| 39 |
+
const stop = new Set(['what','that','this','with','from','have','your','been','does','were','they','their','about','would','could','should','there','where','which','these','those','before','after','other','being','still','never','always','remember']);
|
| 40 |
+
return (text.toLowerCase().match(/[a-z]{4,}/g) || []).filter(w => !stop.has(w)).slice(0, 5);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
function grepEntity(text, keywords) {
|
| 44 |
+
const unique = [...new Set(keywords)];
|
| 45 |
+
if (unique.length === 0) return [];
|
| 46 |
+
|
| 47 |
+
const results = [];
|
| 48 |
+
function scanDir(dir) {
|
| 49 |
+
try {
|
| 50 |
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
| 51 |
+
for (const e of entries) {
|
| 52 |
+
const fp = path.join(dir, e.name);
|
| 53 |
+
if (e.isDirectory()) { scanDir(fp); continue; }
|
| 54 |
+
if (!/\.(md|json|txt)$/.test(e.name)) continue;
|
| 55 |
+
try {
|
| 56 |
+
const content = fs.readFileSync(fp, 'utf8');
|
| 57 |
+
const lines = content.split('\n');
|
| 58 |
+
for (let i = 0; i < lines.length; i++) {
|
| 59 |
+
const line = lines[i].trim();
|
| 60 |
+
if (line.length < 10 || line.length > 300) continue;
|
| 61 |
+
const lower = line.toLowerCase();
|
| 62 |
+
for (const kw of unique) {
|
| 63 |
+
if (lower.includes(kw)) {
|
| 64 |
+
results.push(line);
|
| 65 |
+
break;
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
} catch (e) {}
|
| 70 |
+
}
|
| 71 |
+
} catch (e) {}
|
| 72 |
+
}
|
| 73 |
+
scanDir(ENTITY_DIR);
|
| 74 |
+
return [...new Set(results)].slice(0, 6);
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
http.createServer((req, res) => {
|
| 78 |
+
// Handle grep API
|
| 79 |
+
if (req.method === 'POST' && req.url === '/api/grep') {
|
| 80 |
+
let body = '';
|
| 81 |
+
req.on('data', c => body += c);
|
| 82 |
+
req.on('end', async () => {
|
| 83 |
+
try {
|
| 84 |
+
const { text } = JSON.parse(body);
|
| 85 |
+
const keywords = await extractKeywords(text);
|
| 86 |
+
console.log('[grep] keywords:', keywords);
|
| 87 |
+
const results = grepEntity(text, keywords);
|
| 88 |
+
console.log('[grep] found:', results.length, 'snippets');
|
| 89 |
+
res.writeHead(200, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
|
| 90 |
+
res.end(JSON.stringify({ results, keywords }));
|
| 91 |
+
} catch (e) {
|
| 92 |
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
| 93 |
+
res.end(JSON.stringify({ error: e.message }));
|
| 94 |
+
}
|
| 95 |
+
});
|
| 96 |
+
return;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
if (req.method === 'OPTIONS') {
|
| 100 |
+
res.writeHead(200, {
|
| 101 |
+
'Access-Control-Allow-Origin': '*',
|
| 102 |
+
'Access-Control-Allow-Methods': 'GET,HEAD',
|
| 103 |
+
'Access-Control-Allow-Headers': 'Content-Type,Range',
|
| 104 |
+
'Cross-Origin-Embedder-Policy': 'require-corp',
|
| 105 |
+
'Cross-Origin-Opener-Policy': 'same-origin',
|
| 106 |
+
});
|
| 107 |
+
return res.end();
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
let p = decodeURIComponent(req.url.split('?')[0]);
|
| 111 |
+
if (p === '/') p = '/index.html';
|
| 112 |
+
|
| 113 |
+
let fp;
|
| 114 |
+
if (p.startsWith('/model/')) {
|
| 115 |
+
fp = path.join(SPLITS, p.slice('/model/'.length));
|
| 116 |
+
} else {
|
| 117 |
+
// Resolve relative to project root, allowing ../mamba_webgpu/ paths
|
| 118 |
+
fp = path.resolve(ROOT, '.' + p);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
fs.stat(fp, (e, st) => {
|
| 122 |
+
if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
|
| 123 |
+
const total = st.size;
|
| 124 |
+
const range = req.headers.range;
|
| 125 |
+
const ct = MIME[path.extname(fp)] || 'application/octet-stream';
|
| 126 |
+
|
| 127 |
+
const headers = {
|
| 128 |
+
'Content-Type': ct,
|
| 129 |
+
'Accept-Ranges': 'bytes',
|
| 130 |
+
'Access-Control-Allow-Origin': '*',
|
| 131 |
+
'Cross-Origin-Embedder-Policy': 'require-corp',
|
| 132 |
+
'Cross-Origin-Opener-Policy': 'same-origin',
|
| 133 |
+
};
|
| 134 |
+
|
| 135 |
+
if (range) {
|
| 136 |
+
const m = range.match(/bytes=(\d+)-(\d*)/);
|
| 137 |
+
if (m) {
|
| 138 |
+
const start = parseInt(m[1]);
|
| 139 |
+
const end = m[2] ? parseInt(m[2]) : total - 1;
|
| 140 |
+
headers['Content-Range'] = 'bytes ' + start + '-' + end + '/' + total;
|
| 141 |
+
headers['Content-Length'] = end - start + 1;
|
| 142 |
+
res.writeHead(206, headers);
|
| 143 |
+
fs.createReadStream(fp, { start, end }).pipe(res);
|
| 144 |
+
return;
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
headers['Content-Length'] = total;
|
| 149 |
+
res.writeHead(200, headers);
|
| 150 |
+
fs.createReadStream(fp).pipe(res);
|
| 151 |
+
});
|
| 152 |
+
}).listen(PORT, () => {
|
| 153 |
+
console.log('Gemma WebGPU on :' + PORT);
|
| 154 |
+
console.log('Model splits: ' + SPLITS);
|
| 155 |
+
const files = fs.readdirSync(SPLITS).filter(f => f.endsWith('.gguf'));
|
| 156 |
+
console.log('Split files: ' + files.length);
|
| 157 |
+
console.log('CORS + COEP/COOP headers enabled for wllama multi-threading');
|
| 158 |
+
});
|