Spaces:
Running
Running
Yang Gu
commited on
Commit
·
e4aee93
1
Parent(s):
1322fe0
Add ort-phi2
Browse files
demo/ort-phi2/index.html
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<title>Example</title>
|
| 6 |
+
</head>
|
| 7 |
+
|
| 8 |
+
<body>
|
| 9 |
+
<!-- <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@dev/dist/ort.webgpu.min.js"> </script> -->
|
| 10 |
+
<script src="https://wp-27.sh.intel.com/workspace/project/onnxruntime/js/web/dist/ort.webgpu.min.js"> </script>
|
| 11 |
+
|
| 12 |
+
<script type="module">
|
| 13 |
+
import { AutoTokenizer, env } from '../../transformers/transformers.js';
|
| 14 |
+
|
| 15 |
+
function log(i) { console.log(i); document.getElementById('status').innerText += `\n${i}`; }
|
| 16 |
+
|
| 17 |
+
const MODELS = {
|
| 18 |
+
"tinyllama": { name: "tinyllama", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-int4" },
|
| 19 |
+
"tinyllama_fp16": { name: "tinyllama-fp16", path: "schmuell/TinyLlama-1.1B-Chat-v1.0-fp16", externaldata: true },
|
| 20 |
+
"phi2": { name: "phi2", path: "phi2-int4" },
|
| 21 |
+
"phi2-mb": { name: "phi2-mb", path: "schmuell/phi2-mb", externaldata: true },
|
| 22 |
+
"stablelm": { name: "stablelm", path: "schmuell/stablelm-2-zephyr-1_6b-int4" },
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
function getConfig() {
|
| 26 |
+
const query = window.location.search.substring(1);
|
| 27 |
+
var config = {
|
| 28 |
+
model: "phi2",
|
| 29 |
+
provider: "webgpu",
|
| 30 |
+
profiler: 0,
|
| 31 |
+
verbose: 0,
|
| 32 |
+
threads: 1,
|
| 33 |
+
trace: 0,
|
| 34 |
+
csv: 0,
|
| 35 |
+
max_tokens: 256,
|
| 36 |
+
local: 1,
|
| 37 |
+
}
|
| 38 |
+
let vars = query.split("&");
|
| 39 |
+
for (var i = 0; i < vars.length; i++) {
|
| 40 |
+
let pair = vars[i].split("=");
|
| 41 |
+
if (pair[0] in config) {
|
| 42 |
+
const key = pair[0];
|
| 43 |
+
const value = decodeURIComponent(pair[1]);
|
| 44 |
+
if (typeof config[key] == "number") {
|
| 45 |
+
config[key] = parseInt(value);
|
| 46 |
+
}
|
| 47 |
+
else {
|
| 48 |
+
config[key] = value;
|
| 49 |
+
}
|
| 50 |
+
} else if (pair[0].length > 0) {
|
| 51 |
+
throw new Error("unknown argument: " + pair[0]);
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
if (MODELS[config.model] !== undefined) {
|
| 55 |
+
config.model = MODELS[config.model];
|
| 56 |
+
}
|
| 57 |
+
return config;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
class LLM {
|
| 61 |
+
sess = undefined;
|
| 62 |
+
profiler = false;
|
| 63 |
+
trace = false;
|
| 64 |
+
feed = {};
|
| 65 |
+
output_tokens = [];
|
| 66 |
+
eos = 2;
|
| 67 |
+
need_position_ids = true;
|
| 68 |
+
stop = false;
|
| 69 |
+
kv_dims = [];
|
| 70 |
+
dtype = "float16";
|
| 71 |
+
|
| 72 |
+
constructor() {
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
async load(model, options) {
|
| 76 |
+
const provider = options.provider || "webgpu";
|
| 77 |
+
const verbose = options.verbose;
|
| 78 |
+
const local = options.local;
|
| 79 |
+
this.profiler = options.profiler;
|
| 80 |
+
this.trace = options.trace;
|
| 81 |
+
|
| 82 |
+
const model_path = (local) ? "models/" + model.path : "https://huggingface.co/" + model.path + "/resolve/main";
|
| 83 |
+
|
| 84 |
+
log(`loading... ${model.name}, ${provider}`);
|
| 85 |
+
const json_bytes = await fetchAndCache(model_path + "/config.json");
|
| 86 |
+
let textDecoder = new TextDecoder();
|
| 87 |
+
const model_config = JSON.parse(textDecoder.decode(json_bytes));
|
| 88 |
+
|
| 89 |
+
const model_bytes = await fetchAndCache(model_path + "/phi2-int4.onnx");
|
| 90 |
+
const externaldata = (model.externaldata) ? await fetchAndCache(model_path + '/onnx/decoder_model_merged.onnx.data') : false;
|
| 91 |
+
let modelSize = model_bytes.byteLength;
|
| 92 |
+
if (externaldata) {
|
| 93 |
+
modelSize += externaldata.byteLength;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
log(`model size ${Math.round(modelSize / 1024 / 1024)} MB`);
|
| 97 |
+
|
| 98 |
+
const opt = {
|
| 99 |
+
executionProviders: [provider],
|
| 100 |
+
preferredOutputLocation: {},
|
| 101 |
+
};
|
| 102 |
+
|
| 103 |
+
switch (provider) {
|
| 104 |
+
case "webgpu":
|
| 105 |
+
if (!("gpu" in navigator)) {
|
| 106 |
+
throw new Error("webgpu is NOT supported");
|
| 107 |
+
}
|
| 108 |
+
for (let i = 0; i < model_config.num_hidden_layers; ++i) {
|
| 109 |
+
opt.preferredOutputLocation[`present.${i}.key`] = 'gpu-buffer';
|
| 110 |
+
opt.preferredOutputLocation[`present.${i}.value`] = 'gpu-buffer';
|
| 111 |
+
}
|
| 112 |
+
break;
|
| 113 |
+
case "webnn":
|
| 114 |
+
if (!("ml" in navigator)) {
|
| 115 |
+
throw new Error("webnn is NOT supported");
|
| 116 |
+
}
|
| 117 |
+
break;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
if (externaldata !== undefined) {
|
| 121 |
+
opt.externalData = [
|
| 122 |
+
{
|
| 123 |
+
data: externaldata,
|
| 124 |
+
path: 'decoder_model_merged.onnx.data'
|
| 125 |
+
},
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
if (verbose) {
|
| 129 |
+
opt.logSeverityLevel = 0;
|
| 130 |
+
opt.logVerbosityLevel = 0;
|
| 131 |
+
ort.env.logLevel = "verbose";
|
| 132 |
+
ort.env.debug = true;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
ort.env.webgpu.profiling = {};
|
| 136 |
+
if (this.profiler) {
|
| 137 |
+
opt.enableProfiling = true;
|
| 138 |
+
ort.env.webgpu.profilingMode = 'default';
|
| 139 |
+
ort.env.webgpu.profiling.mode = 'default';
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
this.sess = await ort.InferenceSession.create(model_bytes, opt);
|
| 143 |
+
|
| 144 |
+
if (this.trace) {
|
| 145 |
+
ort.env.trace = true;
|
| 146 |
+
ort.env.webgpu.profiling.ondata = (version, inputsMetadata, outputsMetadata, kernelId, kernelType,
|
| 147 |
+
kernelName, programName, startTime, endTime) => { };
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
this.eos = model_config.eos_token_id;
|
| 151 |
+
this.kv_dims = [1, model_config.num_key_value_heads, 0, model_config.hidden_size / model_config.num_attention_heads];
|
| 152 |
+
this.dtype = config.model.dtype || "float16";
|
| 153 |
+
this.num_layers = model_config.num_hidden_layers;
|
| 154 |
+
this.initilize_feed();
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
initilize_feed() {
|
| 158 |
+
this.feed = {};
|
| 159 |
+
const empty = (this.dtype === "float16") ? new Uint16Array() : [];
|
| 160 |
+
for (let i = 0; i < this.num_layers; ++i) {
|
| 161 |
+
this.feed[`past_key_values.${i}.key`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
| 162 |
+
this.feed[`past_key_values.${i}.value`] = new ort.Tensor(this.dtype, empty, this.kv_dims)
|
| 163 |
+
}
|
| 164 |
+
this.output_tokens = [];
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
argmax(t) {
|
| 169 |
+
const arr = t.data;
|
| 170 |
+
const start = t.dims[2] * (t.dims[1] - 1);
|
| 171 |
+
let max = arr[start];
|
| 172 |
+
let maxidx = 0;
|
| 173 |
+
|
| 174 |
+
for (let i = 0; i < t.dims[2]; i++) {
|
| 175 |
+
const val = arr[i + start];
|
| 176 |
+
if (!isFinite(val)) {
|
| 177 |
+
throw new Error("found infinitive in logits");
|
| 178 |
+
}
|
| 179 |
+
if (val > max) {
|
| 180 |
+
max = arr[i + start];
|
| 181 |
+
maxidx = i;
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
return maxidx;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
update_kv_cache(feed, outputs) {
|
| 188 |
+
for (const name in outputs) {
|
| 189 |
+
if (name.startsWith('present')) {
|
| 190 |
+
let newName = name.replace('present', 'past_key_values');
|
| 191 |
+
// free old gpu buffer
|
| 192 |
+
const t = feed[newName];
|
| 193 |
+
if (t.location === 'gpu-buffer') {
|
| 194 |
+
t.dispose();
|
| 195 |
+
}
|
| 196 |
+
feed[newName] = outputs[name];
|
| 197 |
+
}
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
abort() {
|
| 202 |
+
this.stop = true;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
async generate(tokens, callback, options) {
|
| 206 |
+
const keep_cache = options.keep_cache;
|
| 207 |
+
const max_tokens = options.max_tokens || 256;
|
| 208 |
+
const feed = this.feed;
|
| 209 |
+
const input_ids = new ort.Tensor('int64', BigInt64Array.from(tokens.map(BigInt)), [1, tokens.length]);
|
| 210 |
+
feed['input_ids'] = input_ids;
|
| 211 |
+
this.stop = false;
|
| 212 |
+
|
| 213 |
+
if (keep_cache) {
|
| 214 |
+
this.output_tokens.push(...input_ids)
|
| 215 |
+
} else {
|
| 216 |
+
this.initilize_feed();
|
| 217 |
+
this.output_tokens = Array.from(feed['input_ids'].data);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
let last_token = 0n;
|
| 221 |
+
let seqlen = this.output_tokens.length;
|
| 222 |
+
if (this.need_position_ids) {
|
| 223 |
+
if (keep_cache) {
|
| 224 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, input_ids.length]);
|
| 225 |
+
} else {
|
| 226 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, (_, i) => BigInt(i)), [1, seqlen]);
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
while (last_token != this.eos && seqlen < max_tokens && !this.stop) {
|
| 231 |
+
seqlen = this.output_tokens.length;
|
| 232 |
+
feed['attention_mask'] = new ort.Tensor('int64', BigInt64Array.from({ length: seqlen }, () => 1n), [1, seqlen]);
|
| 233 |
+
let outputs;
|
| 234 |
+
if (this.trace) {
|
| 235 |
+
console.timeStamp("RUN-BEGIN");
|
| 236 |
+
outputs = await this.sess.run(feed);
|
| 237 |
+
console.timeStamp("RUN-END");
|
| 238 |
+
} else {
|
| 239 |
+
outputs = await this.sess.run(feed);
|
| 240 |
+
}
|
| 241 |
+
last_token = BigInt(this.argmax(outputs.logits));
|
| 242 |
+
this.output_tokens.push(last_token);
|
| 243 |
+
if (callback && !this.profiler) {
|
| 244 |
+
callback(this.output_tokens);
|
| 245 |
+
}
|
| 246 |
+
this.update_kv_cache(feed, outputs);
|
| 247 |
+
feed['input_ids'] = new ort.Tensor('int64', BigInt64Array.from([last_token]), [1, 1]);
|
| 248 |
+
if (this.need_position_ids) {
|
| 249 |
+
feed['position_ids'] = new ort.Tensor('int64', BigInt64Array.from([BigInt(seqlen)]), [1, 1]);
|
| 250 |
+
}
|
| 251 |
+
}
|
| 252 |
+
if (this.profiler) {
|
| 253 |
+
this.sess.endProfiling();
|
| 254 |
+
}
|
| 255 |
+
return this.output_tokens;
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
const config = getConfig();
|
| 260 |
+
env.localModelPath = 'models';
|
| 261 |
+
env.allowRemoteModels = config.local == 0;
|
| 262 |
+
env.allowLocalModels = config.local == 1;
|
| 263 |
+
ort.env.wasm.numThreads = config.threads;
|
| 264 |
+
ort.env.wasm.simd = true;
|
| 265 |
+
|
| 266 |
+
const cons_log = [];
|
| 267 |
+
|
| 268 |
+
if (config.profiler === 2) {
|
| 269 |
+
console.log = function (message) {
|
| 270 |
+
if (!message.includes('_fence_')) {
|
| 271 |
+
cons_log.push(message);
|
| 272 |
+
}
|
| 273 |
+
};
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
const tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
|
| 277 |
+
|
| 278 |
+
function create_download_link(cons_log) {
|
| 279 |
+
if (cons_log.length > 0) {
|
| 280 |
+
let link = document.getElementById('download').childNodes[0];
|
| 281 |
+
if (link === undefined) {
|
| 282 |
+
link = document.createElement("a", "download-link");
|
| 283 |
+
link.download = "profiler.log";
|
| 284 |
+
link.innerText = "Download";
|
| 285 |
+
document.getElementById('download').appendChild(link);
|
| 286 |
+
}
|
| 287 |
+
const base64 = btoa(cons_log.join('\n'));
|
| 288 |
+
link.href = `data:application/json;base64,${base64}`;
|
| 289 |
+
}
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
async function fetchAndCache(url) {
|
| 293 |
+
try {
|
| 294 |
+
const cache = await caches.open("onnx");
|
| 295 |
+
let cachedResponse = await cache.match(url);
|
| 296 |
+
if (cachedResponse == undefined) {
|
| 297 |
+
await cache.add(url);
|
| 298 |
+
cachedResponse = await cache.match(url);
|
| 299 |
+
log(`${url} (network)`);
|
| 300 |
+
} else {
|
| 301 |
+
log(`${url} (cached)`);
|
| 302 |
+
}
|
| 303 |
+
const data = await cachedResponse.arrayBuffer();
|
| 304 |
+
return data;
|
| 305 |
+
} catch (error) {
|
| 306 |
+
log(`${url} (network)`);
|
| 307 |
+
return await fetch(url).then(response => response.arrayBuffer());
|
| 308 |
+
}
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
function token_to_text(tokenizer, tokens, startidx) {
|
| 312 |
+
const txt = tokenizer.decode(tokens.slice(startidx), { skip_special_tokens: true, });
|
| 313 |
+
return txt;
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
const llm = new LLM();
|
| 317 |
+
|
| 318 |
+
async function main() {
|
| 319 |
+
|
| 320 |
+
const model = config.model;
|
| 321 |
+
|
| 322 |
+
await llm.load(model, {
|
| 323 |
+
provider: config.provider,
|
| 324 |
+
verbose: config.verbose,
|
| 325 |
+
profiler: config.profiler,
|
| 326 |
+
trace: config.trace,
|
| 327 |
+
local: config.local,
|
| 328 |
+
});
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
document.getElementById('status').innerText = "";
|
| 332 |
+
const query = "Tell me about Constantinople.";
|
| 333 |
+
let prompt;
|
| 334 |
+
|
| 335 |
+
if (model.name.includes('phi2')) {
|
| 336 |
+
prompt = `User:${query}\nAssistant:`;
|
| 337 |
+
} else {
|
| 338 |
+
prompt = `"<|system|>\nYou are a friendly assistant.</s>\n<|user|>\n${query}</s>\n<|assistant|>\n`;
|
| 339 |
+
}
|
| 340 |
+
const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
|
| 341 |
+
|
| 342 |
+
const start_timer = performance.now();
|
| 343 |
+
const output_tokens = await llm.generate(input_ids, (output_tokens) => {
|
| 344 |
+
document.getElementById('result').innerText = token_to_text(tokenizer, output_tokens, input_ids.length);
|
| 345 |
+
}, {});
|
| 346 |
+
const took = (performance.now() - start_timer) / 1000;
|
| 347 |
+
const txt = token_to_text(tokenizer, output_tokens, input_ids.length);
|
| 348 |
+
const seqlen = output_tokens.length;
|
| 349 |
+
document.getElementById('result').innerText = txt;
|
| 350 |
+
const perf = `${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`;
|
| 351 |
+
console.log(perf + " @@1");
|
| 352 |
+
document.getElementById('perf').innerText = perf;
|
| 353 |
+
if (config.csv) {
|
| 354 |
+
log(`${model.name},${took.toFixed(2)},${(seqlen / took).toFixed(3)},${seqlen},@@2`);
|
| 355 |
+
}
|
| 356 |
+
}
|
| 357 |
+
try {
|
| 358 |
+
await main();
|
| 359 |
+
} catch (error) {
|
| 360 |
+
console.error(error);
|
| 361 |
+
document.getElementById('result').innerText = error.message;
|
| 362 |
+
} finally {
|
| 363 |
+
create_download_link(cons_log);
|
| 364 |
+
}
|
| 365 |
+
</script>
|
| 366 |
+
|
| 367 |
+
<div id="status"></div>
|
| 368 |
+
<br />
|
| 369 |
+
<div id="result"></div>
|
| 370 |
+
<br />
|
| 371 |
+
<div id="perf"></div>
|
| 372 |
+
<br />
|
| 373 |
+
<div id="download"></div>
|
| 374 |
+
<br />
|
| 375 |
+
|
| 376 |
+
</body>
|
| 377 |
+
|
| 378 |
+
</html>
|
demo/ort-phi2/models/phi2-int4/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "microsoft/phi-2",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"PhiForCausalLM"
|
| 5 |
+
],
|
| 6 |
+
"auto_map": {
|
| 7 |
+
"AutoConfig": "configuration_phi.PhiConfig",
|
| 8 |
+
"AutoModelForCausalLM": "modeling_phi.PhiForCausalLM"
|
| 9 |
+
},
|
| 10 |
+
"attention_dropout": 0.0,
|
| 11 |
+
"bos_token_id": 50256,
|
| 12 |
+
"embd_pdrop": 0.0,
|
| 13 |
+
"eos_token_id": 50256,
|
| 14 |
+
"hidden_act": "gelu_new",
|
| 15 |
+
"hidden_size": 2560,
|
| 16 |
+
"initializer_range": 0.02,
|
| 17 |
+
"intermediate_size": 10240,
|
| 18 |
+
"layer_norm_eps": 1e-05,
|
| 19 |
+
"max_position_embeddings": 2048,
|
| 20 |
+
"model_type": "phi",
|
| 21 |
+
"num_attention_heads": 32,
|
| 22 |
+
"num_hidden_layers": 32,
|
| 23 |
+
"num_key_value_heads": 32,
|
| 24 |
+
"partial_rotary_factor": 0.4,
|
| 25 |
+
"qk_layernorm": false,
|
| 26 |
+
"resid_pdrop": 0.1,
|
| 27 |
+
"rope_scaling": null,
|
| 28 |
+
"rope_theta": 10000.0,
|
| 29 |
+
"tie_word_embeddings": false,
|
| 30 |
+
"torch_dtype": "float16",
|
| 31 |
+
"transformers_version": "4.37.0",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 51200
|
| 34 |
+
}
|
demo/ort-phi2/models/phi2-int4/phi2-int4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d4321d1b34279940c9ba43aa984f6090ea5656380f415b7c87e71b6e3cbf977
|
| 3 |
+
size 1770018731
|
demo/ort-phi2/models/phi2-int4/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
demo/ort-phi2/models/phi2-int4/tokenizer_config.json
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"50256": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"50257": {
|
| 13 |
+
"content": " ",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": false
|
| 19 |
+
},
|
| 20 |
+
"50258": {
|
| 21 |
+
"content": " ",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": true,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": false
|
| 27 |
+
},
|
| 28 |
+
"50259": {
|
| 29 |
+
"content": " ",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": false
|
| 35 |
+
},
|
| 36 |
+
"50260": {
|
| 37 |
+
"content": " ",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": true,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": false
|
| 43 |
+
},
|
| 44 |
+
"50261": {
|
| 45 |
+
"content": " ",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": true,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": false
|
| 51 |
+
},
|
| 52 |
+
"50262": {
|
| 53 |
+
"content": " ",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": true,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": false
|
| 59 |
+
},
|
| 60 |
+
"50263": {
|
| 61 |
+
"content": " ",
|
| 62 |
+
"lstrip": false,
|
| 63 |
+
"normalized": true,
|
| 64 |
+
"rstrip": false,
|
| 65 |
+
"single_word": false,
|
| 66 |
+
"special": false
|
| 67 |
+
},
|
| 68 |
+
"50264": {
|
| 69 |
+
"content": " ",
|
| 70 |
+
"lstrip": false,
|
| 71 |
+
"normalized": true,
|
| 72 |
+
"rstrip": false,
|
| 73 |
+
"single_word": false,
|
| 74 |
+
"special": false
|
| 75 |
+
},
|
| 76 |
+
"50265": {
|
| 77 |
+
"content": " ",
|
| 78 |
+
"lstrip": false,
|
| 79 |
+
"normalized": true,
|
| 80 |
+
"rstrip": false,
|
| 81 |
+
"single_word": false,
|
| 82 |
+
"special": false
|
| 83 |
+
},
|
| 84 |
+
"50266": {
|
| 85 |
+
"content": " ",
|
| 86 |
+
"lstrip": false,
|
| 87 |
+
"normalized": true,
|
| 88 |
+
"rstrip": false,
|
| 89 |
+
"single_word": false,
|
| 90 |
+
"special": false
|
| 91 |
+
},
|
| 92 |
+
"50267": {
|
| 93 |
+
"content": " ",
|
| 94 |
+
"lstrip": false,
|
| 95 |
+
"normalized": true,
|
| 96 |
+
"rstrip": false,
|
| 97 |
+
"single_word": false,
|
| 98 |
+
"special": false
|
| 99 |
+
},
|
| 100 |
+
"50268": {
|
| 101 |
+
"content": " ",
|
| 102 |
+
"lstrip": false,
|
| 103 |
+
"normalized": true,
|
| 104 |
+
"rstrip": false,
|
| 105 |
+
"single_word": false,
|
| 106 |
+
"special": false
|
| 107 |
+
},
|
| 108 |
+
"50269": {
|
| 109 |
+
"content": " ",
|
| 110 |
+
"lstrip": false,
|
| 111 |
+
"normalized": true,
|
| 112 |
+
"rstrip": false,
|
| 113 |
+
"single_word": false,
|
| 114 |
+
"special": false
|
| 115 |
+
},
|
| 116 |
+
"50270": {
|
| 117 |
+
"content": " ",
|
| 118 |
+
"lstrip": false,
|
| 119 |
+
"normalized": true,
|
| 120 |
+
"rstrip": false,
|
| 121 |
+
"single_word": false,
|
| 122 |
+
"special": false
|
| 123 |
+
},
|
| 124 |
+
"50271": {
|
| 125 |
+
"content": " ",
|
| 126 |
+
"lstrip": false,
|
| 127 |
+
"normalized": true,
|
| 128 |
+
"rstrip": false,
|
| 129 |
+
"single_word": false,
|
| 130 |
+
"special": false
|
| 131 |
+
},
|
| 132 |
+
"50272": {
|
| 133 |
+
"content": " ",
|
| 134 |
+
"lstrip": false,
|
| 135 |
+
"normalized": true,
|
| 136 |
+
"rstrip": false,
|
| 137 |
+
"single_word": false,
|
| 138 |
+
"special": false
|
| 139 |
+
},
|
| 140 |
+
"50273": {
|
| 141 |
+
"content": " ",
|
| 142 |
+
"lstrip": false,
|
| 143 |
+
"normalized": true,
|
| 144 |
+
"rstrip": false,
|
| 145 |
+
"single_word": false,
|
| 146 |
+
"special": false
|
| 147 |
+
},
|
| 148 |
+
"50274": {
|
| 149 |
+
"content": " ",
|
| 150 |
+
"lstrip": false,
|
| 151 |
+
"normalized": true,
|
| 152 |
+
"rstrip": false,
|
| 153 |
+
"single_word": false,
|
| 154 |
+
"special": false
|
| 155 |
+
},
|
| 156 |
+
"50275": {
|
| 157 |
+
"content": " ",
|
| 158 |
+
"lstrip": false,
|
| 159 |
+
"normalized": true,
|
| 160 |
+
"rstrip": false,
|
| 161 |
+
"single_word": false,
|
| 162 |
+
"special": false
|
| 163 |
+
},
|
| 164 |
+
"50276": {
|
| 165 |
+
"content": " ",
|
| 166 |
+
"lstrip": false,
|
| 167 |
+
"normalized": true,
|
| 168 |
+
"rstrip": false,
|
| 169 |
+
"single_word": false,
|
| 170 |
+
"special": false
|
| 171 |
+
},
|
| 172 |
+
"50277": {
|
| 173 |
+
"content": " ",
|
| 174 |
+
"lstrip": false,
|
| 175 |
+
"normalized": true,
|
| 176 |
+
"rstrip": false,
|
| 177 |
+
"single_word": false,
|
| 178 |
+
"special": false
|
| 179 |
+
},
|
| 180 |
+
"50278": {
|
| 181 |
+
"content": " ",
|
| 182 |
+
"lstrip": false,
|
| 183 |
+
"normalized": true,
|
| 184 |
+
"rstrip": false,
|
| 185 |
+
"single_word": false,
|
| 186 |
+
"special": false
|
| 187 |
+
},
|
| 188 |
+
"50279": {
|
| 189 |
+
"content": " ",
|
| 190 |
+
"lstrip": false,
|
| 191 |
+
"normalized": true,
|
| 192 |
+
"rstrip": false,
|
| 193 |
+
"single_word": false,
|
| 194 |
+
"special": false
|
| 195 |
+
},
|
| 196 |
+
"50280": {
|
| 197 |
+
"content": " ",
|
| 198 |
+
"lstrip": false,
|
| 199 |
+
"normalized": true,
|
| 200 |
+
"rstrip": false,
|
| 201 |
+
"single_word": false,
|
| 202 |
+
"special": false
|
| 203 |
+
},
|
| 204 |
+
"50281": {
|
| 205 |
+
"content": " ",
|
| 206 |
+
"lstrip": false,
|
| 207 |
+
"normalized": true,
|
| 208 |
+
"rstrip": false,
|
| 209 |
+
"single_word": false,
|
| 210 |
+
"special": false
|
| 211 |
+
},
|
| 212 |
+
"50282": {
|
| 213 |
+
"content": " ",
|
| 214 |
+
"lstrip": false,
|
| 215 |
+
"normalized": true,
|
| 216 |
+
"rstrip": false,
|
| 217 |
+
"single_word": false,
|
| 218 |
+
"special": false
|
| 219 |
+
},
|
| 220 |
+
"50283": {
|
| 221 |
+
"content": " ",
|
| 222 |
+
"lstrip": false,
|
| 223 |
+
"normalized": true,
|
| 224 |
+
"rstrip": false,
|
| 225 |
+
"single_word": false,
|
| 226 |
+
"special": false
|
| 227 |
+
},
|
| 228 |
+
"50284": {
|
| 229 |
+
"content": " ",
|
| 230 |
+
"lstrip": false,
|
| 231 |
+
"normalized": true,
|
| 232 |
+
"rstrip": false,
|
| 233 |
+
"single_word": false,
|
| 234 |
+
"special": false
|
| 235 |
+
},
|
| 236 |
+
"50285": {
|
| 237 |
+
"content": " ",
|
| 238 |
+
"lstrip": false,
|
| 239 |
+
"normalized": true,
|
| 240 |
+
"rstrip": false,
|
| 241 |
+
"single_word": false,
|
| 242 |
+
"special": false
|
| 243 |
+
},
|
| 244 |
+
"50286": {
|
| 245 |
+
"content": " ",
|
| 246 |
+
"lstrip": false,
|
| 247 |
+
"normalized": true,
|
| 248 |
+
"rstrip": false,
|
| 249 |
+
"single_word": false,
|
| 250 |
+
"special": false
|
| 251 |
+
},
|
| 252 |
+
"50287": {
|
| 253 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
| 254 |
+
"lstrip": false,
|
| 255 |
+
"normalized": true,
|
| 256 |
+
"rstrip": false,
|
| 257 |
+
"single_word": false,
|
| 258 |
+
"special": false
|
| 259 |
+
},
|
| 260 |
+
"50288": {
|
| 261 |
+
"content": "\t\t\t\t\t\t\t\t",
|
| 262 |
+
"lstrip": false,
|
| 263 |
+
"normalized": true,
|
| 264 |
+
"rstrip": false,
|
| 265 |
+
"single_word": false,
|
| 266 |
+
"special": false
|
| 267 |
+
},
|
| 268 |
+
"50289": {
|
| 269 |
+
"content": "\t\t\t\t\t\t\t",
|
| 270 |
+
"lstrip": false,
|
| 271 |
+
"normalized": true,
|
| 272 |
+
"rstrip": false,
|
| 273 |
+
"single_word": false,
|
| 274 |
+
"special": false
|
| 275 |
+
},
|
| 276 |
+
"50290": {
|
| 277 |
+
"content": "\t\t\t\t\t\t",
|
| 278 |
+
"lstrip": false,
|
| 279 |
+
"normalized": true,
|
| 280 |
+
"rstrip": false,
|
| 281 |
+
"single_word": false,
|
| 282 |
+
"special": false
|
| 283 |
+
},
|
| 284 |
+
"50291": {
|
| 285 |
+
"content": "\t\t\t\t\t",
|
| 286 |
+
"lstrip": false,
|
| 287 |
+
"normalized": true,
|
| 288 |
+
"rstrip": false,
|
| 289 |
+
"single_word": false,
|
| 290 |
+
"special": false
|
| 291 |
+
},
|
| 292 |
+
"50292": {
|
| 293 |
+
"content": "\t\t\t\t",
|
| 294 |
+
"lstrip": false,
|
| 295 |
+
"normalized": true,
|
| 296 |
+
"rstrip": false,
|
| 297 |
+
"single_word": false,
|
| 298 |
+
"special": false
|
| 299 |
+
},
|
| 300 |
+
"50293": {
|
| 301 |
+
"content": "\t\t\t",
|
| 302 |
+
"lstrip": false,
|
| 303 |
+
"normalized": true,
|
| 304 |
+
"rstrip": false,
|
| 305 |
+
"single_word": false,
|
| 306 |
+
"special": false
|
| 307 |
+
},
|
| 308 |
+
"50294": {
|
| 309 |
+
"content": "\t\t",
|
| 310 |
+
"lstrip": false,
|
| 311 |
+
"normalized": true,
|
| 312 |
+
"rstrip": false,
|
| 313 |
+
"single_word": false,
|
| 314 |
+
"special": false
|
| 315 |
+
}
|
| 316 |
+
},
|
| 317 |
+
"bos_token": "<|endoftext|>",
|
| 318 |
+
"clean_up_tokenization_spaces": true,
|
| 319 |
+
"eos_token": "<|endoftext|>",
|
| 320 |
+
"model_max_length": 2048,
|
| 321 |
+
"tokenizer_class": "CodeGenTokenizer",
|
| 322 |
+
"unk_token": "<|endoftext|>"
|
| 323 |
+
}
|
main.js
CHANGED
|
@@ -53,51 +53,45 @@ function createElem(tag, attrs = {}, children = []) {
|
|
| 53 |
|
| 54 |
const pageCategories = [
|
| 55 |
{
|
| 56 |
-
title: `
|
| 57 |
-
description: `
|
| 58 |
demos: {
|
| 59 |
sam: {
|
| 60 |
name: 'Segment Anything',
|
| 61 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
| 62 |
-
filename: "sam",
|
| 63 |
},
|
| 64 |
sdturbo: {
|
| 65 |
name: 'Stable Diffusion Turbo',
|
| 66 |
description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
|
| 67 |
-
filename: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
},
|
| 69 |
yolo: {
|
| 70 |
name: 'Yolo',
|
| 71 |
description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
|
| 72 |
-
filename: "yolo",
|
| 73 |
},
|
| 74 |
},
|
| 75 |
},
|
| 76 |
{
|
| 77 |
-
title: `
|
| 78 |
-
description: `
|
| 79 |
demos: {
|
| 80 |
gemma: {
|
| 81 |
name: 'Gemma',
|
| 82 |
-
description: `Gemma with
|
| 83 |
-
filename: "
|
| 84 |
-
},
|
| 85 |
-
},
|
| 86 |
-
},
|
| 87 |
-
{
|
| 88 |
-
title: `Natural Language Processing`,
|
| 89 |
-
description: `Natural Language Processing`,
|
| 90 |
-
demos: {
|
| 91 |
-
tinyllama: {
|
| 92 |
-
name: 'Tiny Llama',
|
| 93 |
-
description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
|
| 94 |
-
filename: "tinyllama",
|
| 95 |
},
|
| 96 |
},
|
| 97 |
},
|
| 98 |
{
|
| 99 |
-
title: 'Transformers.js
|
| 100 |
-
description: 'Transformers.js
|
| 101 |
demos: {
|
| 102 |
benchmark: {
|
| 103 |
name: 'Benchmark',
|
|
@@ -126,8 +120,8 @@ const pageCategories = [
|
|
| 126 |
},
|
| 127 |
},
|
| 128 |
{
|
| 129 |
-
title: 'TVM
|
| 130 |
-
description: 'TVM
|
| 131 |
demos: {
|
| 132 |
sd: {
|
| 133 |
name: 'Web Stable Diffusion',
|
|
|
|
| 53 |
|
| 54 |
const pageCategories = [
|
| 55 |
{
|
| 56 |
+
title: `ONNX Runtime`,
|
| 57 |
+
description: `ONNX Runtime`,
|
| 58 |
demos: {
|
| 59 |
sam: {
|
| 60 |
name: 'Segment Anything',
|
| 61 |
description: `Segment Anything from https://github.com/guschmue/ort-webgpu/tree/master/segment-anything`,
|
| 62 |
+
filename: "ort-sam",
|
| 63 |
},
|
| 64 |
sdturbo: {
|
| 65 |
name: 'Stable Diffusion Turbo',
|
| 66 |
description: `Stable Diffusion Turbo from https://github.com/guschmue/ort-webgpu/tree/master/sd-turbo`,
|
| 67 |
+
filename: "ort-sd-turbo",
|
| 68 |
+
},
|
| 69 |
+
tinyllama: {
|
| 70 |
+
name: 'Tiny Llama',
|
| 71 |
+
description: `Tiny Llama from https://github.com/guschmue/ort-webgpu/tree/master/chat`,
|
| 72 |
+
filename: "ort-tiny-llama",
|
| 73 |
},
|
| 74 |
yolo: {
|
| 75 |
name: 'Yolo',
|
| 76 |
description: `Yolo V9 from https://github.com/guschmue/ort-webgpu/tree/master/yolov9`,
|
| 77 |
+
filename: "ort-yolo",
|
| 78 |
},
|
| 79 |
},
|
| 80 |
},
|
| 81 |
{
|
| 82 |
+
title: `TFLite`,
|
| 83 |
+
description: `TFLite`,
|
| 84 |
demos: {
|
| 85 |
gemma: {
|
| 86 |
name: 'Gemma',
|
| 87 |
+
description: `Gemma with TFLite and MediaPipe from https://github.com/googlesamples/mediapipe/tree/main/examples/llm_inference/js, <a href=https://developers.googleblog.com/2024/03/running-large-language-models-on-device-with-mediapipe-andtensorflow-lite.html>more info.</a>`,
|
| 88 |
+
filename: "tflite-gemma",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
},
|
| 90 |
},
|
| 91 |
},
|
| 92 |
{
|
| 93 |
+
title: 'Transformers.js',
|
| 94 |
+
description: 'Transformers.js',
|
| 95 |
demos: {
|
| 96 |
benchmark: {
|
| 97 |
name: 'Benchmark',
|
|
|
|
| 120 |
},
|
| 121 |
},
|
| 122 |
{
|
| 123 |
+
title: 'TVM',
|
| 124 |
+
description: 'TVM',
|
| 125 |
demos: {
|
| 126 |
sd: {
|
| 127 |
name: 'Web Stable Diffusion',
|
transformers/transformers.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|