Upload serve_mamba.js with huggingface_hub
Browse files- serve_mamba.js +86 -0
serve_mamba.js
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const http = require('http'), fs = require('fs'), path = require('path');
|
| 2 |
+
const PORT = 8140;
|
| 3 |
+
const ROOT = __dirname;
|
| 4 |
+
const HF = path.join(process.env.USERPROFILE, '.cache', 'huggingface', 'hub',
|
| 5 |
+
'models--tiiuae--falcon-mamba-7b-instruct', 'snapshots',
|
| 6 |
+
'b250fc9399d14f56aca18e9ea70bbfb1f73479eb');
|
| 7 |
+
|
| 8 |
+
const MIME = {
|
| 9 |
+
'.html': 'text/html', '.js': 'text/javascript', '.wgsl': 'text/plain',
|
| 10 |
+
'.json': 'application/json', '.safetensors': 'application/octet-stream'
|
| 11 |
+
};
|
| 12 |
+
|
| 13 |
+
const { execFile } = require('child_process');
|
| 14 |
+
|
| 15 |
+
function handleAPI(req, res) {
|
| 16 |
+
let body = '';
|
| 17 |
+
req.on('data', c => body += c);
|
| 18 |
+
req.on('end', () => {
|
| 19 |
+
const data = JSON.parse(body);
|
| 20 |
+
const p = req.url;
|
| 21 |
+
const script = p === '/tokenize'
|
| 22 |
+
? `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');d=json.loads(sys.stdin.read());text=d['text'];msgs=[{'role':'user','content':text}];templated=t.apply_chat_template(msgs,tokenize=False,add_generation_prompt=True);print(json.dumps(t.encode(templated)))`
|
| 23 |
+
: `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');print(json.dumps(t.decode(json.loads(sys.stdin.read())['tokens'])))`;
|
| 24 |
+
const child = execFile('python', ['-c', script], { maxBuffer: 10*1024*1024 }, (err, stdout) => {
|
| 25 |
+
res.writeHead(200, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
|
| 26 |
+
if (err) { res.end(JSON.stringify({ error: err.message })); return; }
|
| 27 |
+
res.end(JSON.stringify({ result: JSON.parse(stdout.trim()) }));
|
| 28 |
+
});
|
| 29 |
+
child.stdin.write(JSON.stringify(data));
|
| 30 |
+
child.stdin.end();
|
| 31 |
+
});
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
http.createServer((req, res) => {
|
| 35 |
+
if (req.method === 'OPTIONS') {
|
| 36 |
+
res.writeHead(200, { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET,POST', 'Access-Control-Allow-Headers': 'Content-Type' });
|
| 37 |
+
return res.end();
|
| 38 |
+
}
|
| 39 |
+
if (req.method === 'POST' && (req.url === '/tokenize' || req.url === '/detokenize')) {
|
| 40 |
+
return handleAPI(req, res);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
let p = decodeURIComponent(req.url.split('?')[0]);
|
| 44 |
+
if (p === '/') p = '/index.html';
|
| 45 |
+
|
| 46 |
+
let fp;
|
| 47 |
+
if (p.startsWith('/weights/')) {
|
| 48 |
+
fp = path.join(HF, p.slice('/weights/'.length));
|
| 49 |
+
} else {
|
| 50 |
+
fp = path.join(ROOT, p);
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
fs.stat(fp, (e, st) => {
|
| 54 |
+
if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
|
| 55 |
+
const total = st.size;
|
| 56 |
+
const range = req.headers.range;
|
| 57 |
+
const ct = MIME[path.extname(fp)] || 'application/octet-stream';
|
| 58 |
+
|
| 59 |
+
if (range) {
|
| 60 |
+
const m = range.match(/bytes=(\d+)-(\d*)/);
|
| 61 |
+
if (m) {
|
| 62 |
+
const start = parseInt(m[1]);
|
| 63 |
+
const end = m[2] ? parseInt(m[2]) : total - 1;
|
| 64 |
+
res.writeHead(206, {
|
| 65 |
+
'Content-Type': ct,
|
| 66 |
+
'Content-Range': 'bytes ' + start + '-' + end + '/' + total,
|
| 67 |
+
'Content-Length': end - start + 1,
|
| 68 |
+
'Accept-Ranges': 'bytes',
|
| 69 |
+
'Access-Control-Allow-Origin': '*'
|
| 70 |
+
});
|
| 71 |
+
fs.createReadStream(fp, { start, end }).pipe(res);
|
| 72 |
+
return;
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
res.writeHead(200, {
|
| 77 |
+
'Content-Type': ct, 'Content-Length': total,
|
| 78 |
+
'Accept-Ranges': 'bytes', 'Access-Control-Allow-Origin': '*'
|
| 79 |
+
});
|
| 80 |
+
fs.createReadStream(fp).pipe(res);
|
| 81 |
+
});
|
| 82 |
+
}).listen(PORT, () => {
|
| 83 |
+
console.log('Mamba WebGPU on :' + PORT);
|
| 84 |
+
console.log('Weights from: ' + HF);
|
| 85 |
+
console.log('HF exists: ' + fs.existsSync(HF));
|
| 86 |
+
});
|