gemma-webgpu / serve_gemma.js
LJTSG's picture
Upload serve_gemma.js with huggingface_hub
e8efdb5 verified
const http = require('http'), fs = require('fs'), path = require('path');
const PORT = 8150;
const ROOT = __dirname;
const SPLITS = path.join(ROOT, 'model_splits');
const MIME = {
'.html': 'text/html', '.js': 'text/javascript', '.mjs': 'text/javascript',
'.json': 'application/json', '.gguf': 'application/octet-stream',
'.wasm': 'application/wasm', '.ts': 'text/javascript',
};
http.createServer((req, res) => {
if (req.method === 'OPTIONS') {
res.writeHead(200, {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET,HEAD',
'Access-Control-Allow-Headers': 'Content-Type,Range',
'Cross-Origin-Embedder-Policy': 'require-corp',
'Cross-Origin-Opener-Policy': 'same-origin',
});
return res.end();
}
let p = decodeURIComponent(req.url.split('?')[0]);
if (p === '/') p = '/index.html';
let fp;
if (p.startsWith('/model/')) {
fp = path.join(SPLITS, p.slice('/model/'.length));
} else {
fp = path.join(ROOT, p);
}
fs.stat(fp, (e, st) => {
if (e) { res.writeHead(404); return res.end('not found: ' + fp); }
const total = st.size;
const range = req.headers.range;
const ct = MIME[path.extname(fp)] || 'application/octet-stream';
const headers = {
'Content-Type': ct,
'Accept-Ranges': 'bytes',
'Access-Control-Allow-Origin': '*',
'Cross-Origin-Embedder-Policy': 'require-corp',
'Cross-Origin-Opener-Policy': 'same-origin',
};
if (range) {
const m = range.match(/bytes=(\d+)-(\d*)/);
if (m) {
const start = parseInt(m[1]);
const end = m[2] ? parseInt(m[2]) : total - 1;
headers['Content-Range'] = 'bytes ' + start + '-' + end + '/' + total;
headers['Content-Length'] = end - start + 1;
res.writeHead(206, headers);
fs.createReadStream(fp, { start, end }).pipe(res);
return;
}
}
headers['Content-Length'] = total;
res.writeHead(200, headers);
fs.createReadStream(fp).pipe(res);
});
}).listen(PORT, () => {
console.log('Gemma WebGPU on :' + PORT);
console.log('Model splits: ' + SPLITS);
const files = fs.readdirSync(SPLITS).filter(f => f.endsWith('.gguf'));
console.log('Split files: ' + files.length);
console.log('CORS + COEP/COOP headers enabled for wllama multi-threading');
});