LJTSG commited on
Commit
f510c6d
·
verified ·
1 Parent(s): 211644e

Upload serve_gemma.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. serve_gemma.js +158 -0
serve_gemma.js ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const http = require('http'), fs = require('fs'), path = require('path');
2
+ const fetch = globalThis.fetch || require('node-fetch'); // Node 18+ has built-in fetch
3
+ const PORT = 8150;
4
+ const ROOT = __dirname;
5
+ const SPLITS = path.join(ROOT, 'model_splits');
6
+
7
+ const MIME = {
8
+ '.html': 'text/html', '.js': 'text/javascript', '.mjs': 'text/javascript',
9
+ '.json': 'application/json', '.gguf': 'application/octet-stream',
10
+ '.wasm': 'application/wasm', '.ts': 'text/javascript',
11
+ };
12
+
13
+ const ENTITY_DIR = path.join(ROOT, '..', 'entity');
14
+
15
+ async function extractKeywords(text) {
16
+ // Try local xLAM at :8093 for intent-based keyword extraction
17
+ try {
18
+ const resp = await fetch('http://127.0.0.1:8093/v1/chat/completions', {
19
+ method: 'POST',
20
+ headers: { 'Content-Type': 'application/json' },
21
+ body: JSON.stringify({
22
+ model: 'xlam',
23
+ messages: [{ role: 'user', content: `Extract 3-5 keyword phrases (1-3 words each) from this message that capture the user's intent. Return JSON: {"keywords":["k1","k2","k3"]}\n\nMessage: "${text}"` }],
24
+ temperature: 0.1,
25
+ max_tokens: 60,
26
+ }),
27
+ });
28
+ if (resp.ok) {
29
+ const data = await resp.json();
30
+ const content = data.choices?.[0]?.message?.content || '';
31
+ const match = content.match(/\{[\s\S]*"keywords"[\s\S]*\}/);
32
+ if (match) {
33
+ const parsed = JSON.parse(match[0]);
34
+ if (Array.isArray(parsed.keywords)) return parsed.keywords.map(k => k.toLowerCase());
35
+ }
36
+ }
37
+ } catch (e) { /* xLAM not available, fall back */ }
38
+ // Fallback: extract names and nouns (4+ chars, skip common words)
39
+ const stop = new Set(['what','that','this','with','from','have','your','been','does','were','they','their','about','would','could','should','there','where','which','these','those','before','after','other','being','still','never','always','remember']);
40
+ return (text.toLowerCase().match(/[a-z]{4,}/g) || []).filter(w => !stop.has(w)).slice(0, 5);
41
+ }
42
+
43
+ function grepEntity(text, keywords) {
44
+ const unique = [...new Set(keywords)];
45
+ if (unique.length === 0) return [];
46
+
47
+ const results = [];
48
+ function scanDir(dir) {
49
+ try {
50
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
51
+ for (const e of entries) {
52
+ const fp = path.join(dir, e.name);
53
+ if (e.isDirectory()) { scanDir(fp); continue; }
54
+ if (!/\.(md|json|txt)$/.test(e.name)) continue;
55
+ try {
56
+ const content = fs.readFileSync(fp, 'utf8');
57
+ const lines = content.split('\n');
58
+ for (let i = 0; i < lines.length; i++) {
59
+ const line = lines[i].trim();
60
+ if (line.length < 10 || line.length > 300) continue;
61
+ const lower = line.toLowerCase();
62
+ for (const kw of unique) {
63
+ if (lower.includes(kw)) {
64
+ results.push(line);
65
+ break;
66
+ }
67
+ }
68
+ }
69
+ } catch (e) {}
70
+ }
71
+ } catch (e) {}
72
+ }
73
+ scanDir(ENTITY_DIR);
74
+ return [...new Set(results)].slice(0, 6);
75
+ }
76
+
77
+ http.createServer((req, res) => {
78
+ // Handle grep API
79
+ if (req.method === 'POST' && req.url === '/api/grep') {
80
+ let body = '';
81
+ req.on('data', c => body += c);
82
+ req.on('end', async () => {
83
+ try {
84
+ const { text } = JSON.parse(body);
85
+ const keywords = await extractKeywords(text);
86
+ console.log('[grep] keywords:', keywords);
87
+ const results = grepEntity(text, keywords);
88
+ console.log('[grep] found:', results.length, 'snippets');
89
+ res.writeHead(200, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
90
+ res.end(JSON.stringify({ results, keywords }));
91
+ } catch (e) {
92
+ res.writeHead(500, { 'Content-Type': 'application/json' });
93
+ res.end(JSON.stringify({ error: e.message }));
94
+ }
95
+ });
96
+ return;
97
+ }
98
+
99
+ if (req.method === 'OPTIONS') {
100
+ res.writeHead(200, {
101
+ 'Access-Control-Allow-Origin': '*',
102
+ 'Access-Control-Allow-Methods': 'GET,HEAD',
103
+ 'Access-Control-Allow-Headers': 'Content-Type,Range',
104
+ 'Cross-Origin-Embedder-Policy': 'require-corp',
105
+ 'Cross-Origin-Opener-Policy': 'same-origin',
106
+ });
107
+ return res.end();
108
+ }
109
+
110
+ let p = decodeURIComponent(req.url.split('?')[0]);
111
+ if (p === '/') p = '/index.html';
112
+
113
+ let fp;
114
+ if (p.startsWith('/model/')) {
115
+ fp = path.join(SPLITS, p.slice('/model/'.length));
116
+ } else {
117
+ // Resolve relative to project root, allowing ../mamba_webgpu/ paths
118
+ fp = path.resolve(ROOT, '.' + p);
119
+ }
120
+
121
+ fs.stat(fp, (e, st) => {
122
+ if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
123
+ const total = st.size;
124
+ const range = req.headers.range;
125
+ const ct = MIME[path.extname(fp)] || 'application/octet-stream';
126
+
127
+ const headers = {
128
+ 'Content-Type': ct,
129
+ 'Accept-Ranges': 'bytes',
130
+ 'Access-Control-Allow-Origin': '*',
131
+ 'Cross-Origin-Embedder-Policy': 'require-corp',
132
+ 'Cross-Origin-Opener-Policy': 'same-origin',
133
+ };
134
+
135
+ if (range) {
136
+ const m = range.match(/bytes=(\d+)-(\d*)/);
137
+ if (m) {
138
+ const start = parseInt(m[1]);
139
+ const end = m[2] ? parseInt(m[2]) : total - 1;
140
+ headers['Content-Range'] = 'bytes ' + start + '-' + end + '/' + total;
141
+ headers['Content-Length'] = end - start + 1;
142
+ res.writeHead(206, headers);
143
+ fs.createReadStream(fp, { start, end }).pipe(res);
144
+ return;
145
+ }
146
+ }
147
+
148
+ headers['Content-Length'] = total;
149
+ res.writeHead(200, headers);
150
+ fs.createReadStream(fp).pipe(res);
151
+ });
152
+ }).listen(PORT, () => {
153
+ console.log('Gemma WebGPU on :' + PORT);
154
+ console.log('Model splits: ' + SPLITS);
155
+ const files = fs.readdirSync(SPLITS).filter(f => f.endsWith('.gguf'));
156
+ console.log('Split files: ' + files.length);
157
+ console.log('CORS + COEP/COOP headers enabled for wllama multi-threading');
158
+ });