File size: 3,515 Bytes
48d826b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f15bae9
 
 
48d826b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
const http = require('http'), fs = require('fs'), path = require('path');
const PORT = 8140;
const ROOT = __dirname;
const HF = path.join(process.env.USERPROFILE, '.cache', 'huggingface', 'hub',
  'models--tiiuae--falcon-mamba-7b-instruct', 'snapshots',
  'b250fc9399d14f56aca18e9ea70bbfb1f73479eb');

const MIME = {
  '.html': 'text/html', '.js': 'text/javascript', '.wgsl': 'text/plain',
  '.json': 'application/json', '.safetensors': 'application/octet-stream'
};

const { execFile } = require('child_process');

function handleAPI(req, res) {
  let body = '';
  req.on('data', c => body += c);
  req.on('end', () => {
    const data = JSON.parse(body);
    const p = req.url;
    const script = p === '/tokenize'
      ? `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');d=json.loads(sys.stdin.read());text=d['text'];system=d.get('system','');msgs=[];
if system:msgs.append({'role':'system','content':system})
msgs.append({'role':'user','content':text});templated=t.apply_chat_template(msgs,tokenize=False,add_generation_prompt=True);print(json.dumps(t.encode(templated)))`
      : `import sys,json;from transformers import AutoTokenizer;t=AutoTokenizer.from_pretrained('tiiuae/falcon-mamba-7b-instruct');print(json.dumps(t.decode(json.loads(sys.stdin.read())['tokens'])))`;
    const child = execFile('python', ['-c', script], { maxBuffer: 10*1024*1024 }, (err, stdout) => {
      res.writeHead(200, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
      if (err) { res.end(JSON.stringify({ error: err.message })); return; }
      res.end(JSON.stringify({ result: JSON.parse(stdout.trim()) }));
    });
    child.stdin.write(JSON.stringify(data));
    child.stdin.end();
  });
}

http.createServer((req, res) => {
  if (req.method === 'OPTIONS') {
    res.writeHead(200, { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET,POST', 'Access-Control-Allow-Headers': 'Content-Type' });
    return res.end();
  }
  if (req.method === 'POST' && (req.url === '/tokenize' || req.url === '/detokenize')) {
    return handleAPI(req, res);
  }

  let p = decodeURIComponent(req.url.split('?')[0]);
  if (p === '/') p = '/index.html';

  let fp;
  if (p.startsWith('/weights/')) {
    fp = path.join(HF, p.slice('/weights/'.length));
  } else {
    fp = path.join(ROOT, p);
  }

  fs.stat(fp, (e, st) => {
    if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
    const total = st.size;
    const range = req.headers.range;
    const ct = MIME[path.extname(fp)] || 'application/octet-stream';

    if (range) {
      const m = range.match(/bytes=(\d+)-(\d*)/);
      if (m) {
        const start = parseInt(m[1]);
        const end = m[2] ? parseInt(m[2]) : total - 1;
        res.writeHead(206, {
          'Content-Type': ct,
          'Content-Range': 'bytes ' + start + '-' + end + '/' + total,
          'Content-Length': end - start + 1,
          'Accept-Ranges': 'bytes',
          'Access-Control-Allow-Origin': '*'
        });
        fs.createReadStream(fp, { start, end }).pipe(res);
        return;
      }
    }

    res.writeHead(200, {
      'Content-Type': ct, 'Content-Length': total,
      'Accept-Ranges': 'bytes', 'Access-Control-Allow-Origin': '*'
    });
    fs.createReadStream(fp).pipe(res);
  });
}).listen(PORT, () => {
  console.log('Mamba WebGPU on :' + PORT);
  console.log('Weights from: ' + HF);
  console.log('HF exists: ' + fs.existsSync(HF));
});