LJTSG commited on
Commit
48d826b
·
verified ·
1 Parent(s): 12bf339

Upload serve_mamba.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. serve_mamba.js +86 -0
serve_mamba.js ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const http = require('http'), fs = require('fs'), path = require('path');
2
+ const PORT = 8140;
3
+ const ROOT = __dirname;
4
+ const HF = path.join(process.env.USERPROFILE, '.cache', 'huggingface', 'hub',
5
+ 'models--tiiuae--falcon-mamba-7b-instruct', 'snapshots',
6
+ 'b250fc9399d14f56aca18e9ea70bbfb1f73479eb');
7
+
8
+ const MIME = {
9
+ '.html': 'text/html', '.js': 'text/javascript', '.wgsl': 'text/plain',
10
+ '.json': 'application/json', '.safetensors': 'application/octet-stream'
11
+ };
12
+
13
+ const { execFile } = require('child_process');
14
+
15
+ function handleAPI(req, res) {
16
+ let body = '';
17
+ req.on('data', c => body += c);
18
+ req.on('end', () => {
19
+ const data = JSON.parse(body);
20
+ const p = req.url;
21
+ const script = p === '/tokenize'
22
+ ? `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');d=json.loads(sys.stdin.read());text=d['text'];msgs=[{'role':'user','content':text}];templated=t.apply_chat_template(msgs,tokenize=False,add_generation_prompt=True);print(json.dumps(t.encode(templated)))`
23
+ : `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');print(json.dumps(t.decode(json.loads(sys.stdin.read())['tokens'])))`;
24
+ const child = execFile('python', ['-c', script], { maxBuffer: 10*1024*1024 }, (err, stdout) => {
25
+ res.writeHead(200, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
26
+ if (err) { res.end(JSON.stringify({ error: err.message })); return; }
27
+ res.end(JSON.stringify({ result: JSON.parse(stdout.trim()) }));
28
+ });
29
+ child.stdin.write(JSON.stringify(data));
30
+ child.stdin.end();
31
+ });
32
+ }
33
+
34
+ http.createServer((req, res) => {
35
+ if (req.method === 'OPTIONS') {
36
+ res.writeHead(200, { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET,POST', 'Access-Control-Allow-Headers': 'Content-Type' });
37
+ return res.end();
38
+ }
39
+ if (req.method === 'POST' && (req.url === '/tokenize' || req.url === '/detokenize')) {
40
+ return handleAPI(req, res);
41
+ }
42
+
43
+ let p = decodeURIComponent(req.url.split('?')[0]);
44
+ if (p === '/') p = '/index.html';
45
+
46
+ let fp;
47
+ if (p.startsWith('/weights/')) {
48
+ fp = path.join(HF, p.slice('/weights/'.length));
49
+ } else {
50
+ fp = path.join(ROOT, p);
51
+ }
52
+
53
+ fs.stat(fp, (e, st) => {
54
+ if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
55
+ const total = st.size;
56
+ const range = req.headers.range;
57
+ const ct = MIME[path.extname(fp)] || 'application/octet-stream';
58
+
59
+ if (range) {
60
+ const m = range.match(/bytes=(\d+)-(\d*)/);
61
+ if (m) {
62
+ const start = parseInt(m[1]);
63
+ const end = m[2] ? parseInt(m[2]) : total - 1;
64
+ res.writeHead(206, {
65
+ 'Content-Type': ct,
66
+ 'Content-Range': 'bytes ' + start + '-' + end + '/' + total,
67
+ 'Content-Length': end - start + 1,
68
+ 'Accept-Ranges': 'bytes',
69
+ 'Access-Control-Allow-Origin': '*'
70
+ });
71
+ fs.createReadStream(fp, { start, end }).pipe(res);
72
+ return;
73
+ }
74
+ }
75
+
76
+ res.writeHead(200, {
77
+ 'Content-Type': ct, 'Content-Length': total,
78
+ 'Accept-Ranges': 'bytes', 'Access-Control-Allow-Origin': '*'
79
+ });
80
+ fs.createReadStream(fp).pipe(res);
81
+ });
82
+ }).listen(PORT, () => {
83
+ console.log('Mamba WebGPU on :' + PORT);
84
+ console.log('Weights from: ' + HF);
85
+ console.log('HF exists: ' + fs.existsSync(HF));
86
+ });