diff --git a/web-llm-models/v0_2_80/Llama-2-13b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-2-13b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..095f7585905d89c4c6849f878860ad3b8198ba0f --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-2-13b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a52a6a8d8904cb56cacdbdc0ff78a90cd195fda92c66a2640ef1f2742451e6 +size 6685344 diff --git a/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..75b4e2b89d05aaa40effa2721440508395ab04e5 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5988d0a28cbf992cf462a7ef0d1ce5dc7586f086418f3cd350f98ab88c4b880f +size 6257536 diff --git a/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..6255e6780a7c64836d9df4344273ee25eb3192b5 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:993a3996fff1139069e8265bc784d7f2768d3d562a8fe871b437b0633a47614c +size 6041086 diff --git a/web-llm-models/v0_2_80/Llama-3-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..62c5ba77b1568e8d29166f00e9dd65109ea55371 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace8e299187b5f33a9b70d21f5d7fa5a3e57f2385297588d61e729cd52c1954a +size 8675576 diff --git a/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c9baa18b7d00f46aa12ca25c882f01754548fabc --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addfad8cc99535a23184b965427baf58ac0125f1aa572d8a080a8bce9f7bd594 +size 6287983 diff --git a/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..861b67a6c20d8c299b50538c31ff5dd2e82aefa7 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356464745dff82129d4c1e2ed46d6601d533f583ab2c7ce5ebea41d83e2c206e +size 6071871 diff --git a/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c32aa1bec249b6eaa4c3ba45c73037bc9a4dcf6a --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b5c56181fcbbea0bb6059fcccb5df29d1ec7c24dc2e8eb7a4a2f3da2afc652 +size 5259462 diff --git a/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..788900112bdf8a29ffc2e425f785153b240935b5 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21496566a0dc6c76cb7d7442a325de8d6ecced5b3b75e5b33279a7031a5672b +size 5168895 diff --git a/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..b844931949a90a31e1bbe12396c1a3544c3f70ce --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b8dad5e6e136819f726235ddd8f99da6086f1f4d4b9b2b035249d764a31cc1 +size 5498539 diff --git a/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..2a68a13a465e0015dc811a3cd11229c41464621a --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4c5654098a833b6367a97b977f355de0a8c8b4cbec3322d2d91f7ef838c5a9 +size 5383379 diff --git a/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..86571c2802835b751c1c8dbf9e769a5d963065c5 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34de0d60ab598c6a85ae882b48474f250193076f902057a21070bb2daae96d5b +size 6131270 diff --git a/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..43680fec72f0333e6ba5df6ec83389c068016f27 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224e4cd5c235621684257aad3ebfdaf6e337d50f783614d1e7ed63e78d693ad6 +size 5935137 diff --git a/web-llm-models/v0_2_80/Llama-3_1-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3_1-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..a6859e5306797817fe1405f0e91cc0f954c3887c --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3_1-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0479882a8d99f1a24cb4638169b502de6c004fc9ad2b758f2a47aaa7ac241208 +size 8684357 diff --git a/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..5a859befc721288ac748a02fde3704f78402a985 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfaaeb06478b598990176e57564ef7e36f7a47e8cb1b8bbf888e27305489679 +size 6296764 diff --git a/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..0a18f6910b9936dfcbf5439946543933516a35a0 --- /dev/null +++ b/web-llm-models/v0_2_80/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c9e1d68be51144d0f21b42d9e863d6662b59e339af3d46a73da368b814e992 +size 6078595 diff --git a/web-llm-models/v0_2_80/Ministral-3-3B-Base-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Ministral-3-3B-Base-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..9b202ce8a4549993ed0879e5b36c2caaa2142915 --- /dev/null +++ b/web-llm-models/v0_2_80/Ministral-3-3B-Base-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9cfbdc5df6d8c4f194201896e1d4514056bbc045c07f073efa9c60d905a03f +size 5427883 diff --git a/web-llm-models/v0_2_80/Ministral-3-3B-Instruct-2512-BF16-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Ministral-3-3B-Instruct-2512-BF16-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..9b202ce8a4549993ed0879e5b36c2caaa2142915 --- /dev/null +++ b/web-llm-models/v0_2_80/Ministral-3-3B-Instruct-2512-BF16-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9cfbdc5df6d8c4f194201896e1d4514056bbc045c07f073efa9c60d905a03f +size 5427883 diff --git a/web-llm-models/v0_2_80/Ministral-3-3B-Reasoning-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Ministral-3-3B-Reasoning-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..9b202ce8a4549993ed0879e5b36c2caaa2142915 --- /dev/null +++ b/web-llm-models/v0_2_80/Ministral-3-3B-Reasoning-2512-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9cfbdc5df6d8c4f194201896e1d4514056bbc045c07f073efa9c60d905a03f +size 5427883 diff --git a/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..0ede375cd35c63eea065365e97fc5130ccb821f4 --- /dev/null +++ b/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bbdc492f08c21911868c55f23fc4a6b8932adf2179bcf26ccf2ad7fbd0561f7 +size 5456419 diff --git a/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..591ca530c150a97efa1ab6bb2a0e845b7df37eb6 --- /dev/null +++ b/web-llm-models/v0_2_80/Mistral-7B-Instruct-v0.3-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900df8f5036b014cfd15639ebde77c3ef998bdbab412e98f4f616b64462ddeda +size 5320391 diff --git a/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-old-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-old-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..f0eea34ad0c89c1a9bbf710b2b750b66e70c1b54 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-old-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9c6c6043d7049da13c9cffce0d59aae91e1824729d84ad41920e98e627164d +size 5461675 diff --git a/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..f0eea34ad0c89c1a9bbf710b2b750b66e70c1b54 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9c6c6043d7049da13c9cffce0d59aae91e1824729d84ad41920e98e627164d +size 5461675 diff --git a/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..09d6afd70ffc8bcce187e36211a3bdfb1da3d124 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3-mini-4k-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad8cf53263e1db187baa9835e0bfe6ac1c1adfc5c79ace44378cb579bc0c648 +size 5340809 diff --git a/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..4ad4482dbc5b09c000628061a429ff171a495621 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f58ffc1ed1dc812e436e73f3507a9ffdf9c564dd3c55e0ae3be789d4a2baa6f +size 5471501 diff --git a/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..52e1be5de55aca4eba9d6b5d5db93d07ea21e307 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca419625e62d6b2a5980725523b077465f810fcf03080adf03366f6b28ddb1f +size 5350232 diff --git a/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f16_1-ctx4k_cs2k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f16_1-ctx4k_cs2k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..a9fcb29ee090ece9a17dedfaebd7e094037c0275 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f16_1-ctx4k_cs2k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279661c096bcabdcaaa4652b6586ab1aa9b31acf546364af7bfb087b0862b205 +size 7017449 diff --git a/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f32_1-ctx4k_cs2k-webgpu.wasm b/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f32_1-ctx4k_cs2k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..2890793d90f2dc798f7da5d2c4c383ee76b4d874 --- /dev/null +++ b/web-llm-models/v0_2_80/Phi-3.5-vision-instruct-q4f32_1-ctx4k_cs2k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa07be8fb7d24b4f730131a250539f347d694ed42e6bd8d141ce71849745190 +size 6882723 diff --git a/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..06157fb0b2a904e3ee71fb222aa7a8832535e053 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3b3d81d8c6a15c1bd166a93a1e726fdd898736b855c7aa71f8379df56bd102 +size 4934604 diff --git a/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..ac6d7e8ee3f641443500746e4683fd4411613ccb --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ff63b9ac16e775e4ad3ecc3416931424b9b969281ed5963cb9006a35399e88 +size 4905353 diff --git a/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..16bbd3c6b4ae7b464245e2219bfdaae1997a508b --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25495a03bf7ac80a1925bc91e236c74c3ad52be89a9ed0d0eeb18e4a9d135234 +size 5008576 diff --git a/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..1d605e81953f19ea582eb06025476a7e7ad672e9 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-0.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e8485cdb34adba3afa3b01ea31d323a96fc95e2cf04535f5f5841336efbe94 +size 4980266 diff --git a/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..4a25d63601b48ec6ca0dc88a959aae4270df14ad --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd9f132ad258d2017291bb62e955e337f8ae4af74650f4b1ea5a803a7cec538 +size 5383844 diff --git a/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..88f73f72b3dd85b913480b1bcf5cc2bc6ee0ab19 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a7c95f4dd67ae11c91e7ebe8e849e9701be34ecf9b83963bb45a6ba1aaa402 +size 5265971 diff --git a/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..5dc71ab079ac06829dfac870bf79c7f7455684d7 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1189537b68f7c3a9a1535cf9fb74d9b550b9b86fe7f61d895bec9645af31f2c +size 5498734 diff --git a/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..3dd2cdef91abcd193039ee93c1b758bb93ef598f --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1bb3c1c6148a07fe0c3bc8903d432c9ef213c1e8455d318e5b7fa8f90bfc57 +size 5372989 diff --git a/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..9c9fd38fbb66563aca305c253d71915e4a569df6 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba07594162783740a4426007351cbc0ee1c30d6d0da9617561b6a3a27fe3b63f +size 5599880 diff --git a/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..50d89a82d5812a1283a0c4b65ec647fba6547744 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen2.5-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5719a9167313fb4fa3d379be8eed8b6920e47d1f5467609003b85197b1a74b +size 5461029 diff --git a/web-llm-models/v0_2_80/Qwen3-0.6B-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-0.6B-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..deaca9a8a1b5405ab020bd1813f3e359d9287974 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-0.6B-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f72edc58397cee4f593dba539c4b944ac25cf691eb9f614b8c8b3ae7765e94c5 +size 5564970 diff --git a/web-llm-models/v0_2_80/Qwen3-0.6B-q0f32-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-0.6B-q0f32-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..ac8fb5a6681da1c4d2e2a076921565cbb35d7c26 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-0.6B-q0f32-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bacc57e62992b78c4270fb648ac5a48512224888a74ba259f68d22bfa74f86 +size 5474695 diff --git a/web-llm-models/v0_2_80/Qwen3-0.6B-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-0.6B-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..ceafec5e049fc9abb5a820323e3720d34c411bff --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-0.6B-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a51cff80ad5fe1539e033a83b9a7d240957d37e3d8e620c6b43648aba90a0cc +size 5760161 diff --git a/web-llm-models/v0_2_80/Qwen3-0.6B-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-0.6B-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..b55f061bba90f19721a24e10b29e00a75db83029 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-0.6B-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6608536eb7173c62652e2186f53af5b908fd66542c12eaa7860a9b7f4c7db298 +size 5646002 diff --git a/web-llm-models/v0_2_80/Qwen3-1.7B-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-1.7B-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..04aebede42f02d12e4de4ed2ed5f59af2bbc7bf4 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-1.7B-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0acc85b34ee45019d8df373afdba0da889caa32970169d34710b4fe43d3b06 +size 5790731 diff --git a/web-llm-models/v0_2_80/Qwen3-1.7B-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-1.7B-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..6f37c98ee54ec188ddc12af5b669d39bfe47054c --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-1.7B-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555e177216386434e8de9a7ca1f01706de41b33762e40b1f8b5d54a0732dbdf0 +size 5671614 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..3d3e60410ff4f47d14be6e2cb00df3113b32c26e --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a739267ccbb44057a7886797ed0855a5cdba206ab25bcc73732ec03e1738d32 +size 6070240 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..7cf68e11f99b34152386d5282005b66413250c32 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-Instruct-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ea680da66e6a4cd51ba067c5d3d9be8bf365c0477803bccec3f261f9b303a6 +size 5929497 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..3d3e60410ff4f47d14be6e2cb00df3113b32c26e --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a739267ccbb44057a7886797ed0855a5cdba206ab25bcc73732ec03e1738d32 +size 6070240 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..7cf68e11f99b34152386d5282005b66413250c32 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-Thinking-2507-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ea680da66e6a4cd51ba067c5d3d9be8bf365c0477803bccec3f261f9b303a6 +size 5929497 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..623ef079066fd0703fb8d63a0c94ef29316bcb20 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ddf44e49b03e53e24fd29a45591850924346140452f60c29280190388571340 +size 6070240 diff --git a/web-llm-models/v0_2_80/Qwen3-4B-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-4B-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..fbf5c294175dd9e176bfcd2d6db09d9eccd20ab9 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-4B-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6484148ff499c15433ab85e4d2c376aef453a6d797f48047e6bd6fd495a59e39 +size 5929497 diff --git a/web-llm-models/v0_2_80/Qwen3-8B-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-8B-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..d5e3fc9bd42904110562c164ff0fd6867ac46c19 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-8B-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0b332c81212e484ba56c15aac5af2548f14f849d3352039ad68d3c4663b6dd +size 6079574 diff --git a/web-llm-models/v0_2_80/Qwen3-8B-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/Qwen3-8B-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..3af769a033366744f3994452d617c284acbc2485 --- /dev/null +++ b/web-llm-models/v0_2_80/Qwen3-8B-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4705cca63950fca8a8bb50279ccc44a1233b41a75318ec87fd9395f46e77358f +size 5933178 diff --git a/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..722c4a49a34c23cdca07e9b2f8d895599aa66611 --- /dev/null +++ b/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f16_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808cf1d1dc3b055a5182d3d553b97617d3eed74da9deb951abc4efa9aa1df5d7 +size 5758025 diff --git a/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..f994c74395a221740b38ad0bb589e1d6301497e0 --- /dev/null +++ b/web-llm-models/v0_2_80/RedPajama-INCITE-Chat-3B-v1-q4f32_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a362f16c7b4f27d13b2c9859d06a64b21633f55c1b15c5cc1b86dc6c1fb24b6 +size 5711125 diff --git a/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..752a06d8852b32d01a0452e33532a9cb28b79d21 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cd02b91e46eedc5fff1d965ab204443e7b1028b9102bfaf719d4fe23785556 +size 5571268 diff --git a/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..979f6f3a7666ef6d0eddd751f4b61da60f30f03a --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7d1f62079c4104420bbb2d427ef86ffa5cb6e40a3602ffffc2cae72c454039 +size 5860242 diff --git a/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..896af405a45177f0bd779cb746c4ef4b5ac1380a --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-1.7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609d8d4a3f4b8d9bf2f35ee9c66c990663f8695bfd2d31c0724b5f57de069225 +size 5707270 diff --git a/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..924153599e500fe5f5119840bb9f337217f213cd --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b452272cdd31beb699c7bc40b457bccbaa969d1a0e64e2ee9aed75e81cbf7e +size 5536144 diff --git a/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..df07b10f839e26ebb5ce7b2c28353e65b8b29055 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d31657fd7014d15694e5c62b821c632d247b164456387eed36268333d5912b5 +size 5507329 diff --git a/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..46e8c2cc4b44c698366485f9f3584ea3ff5e0716 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f0ad5f7260bf234622df57e8b028c67fb664a3549cd32b0041a49b390f5b89 +size 5855768 diff --git a/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..364f8e512d95a629a98cd1c1defe02d501a6afe0 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-135M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0804ef4811dceb7c0bfb81e64570e14e2198fe3ab44d10430f204a879b517777 +size 5826871 diff --git a/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..2ad3a92b9bac8c003aca03a7e86516364af87208 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5adb137fcf74603fe253d128499b03112567d0f819e4a00f981589b1970748da +size 5656535 diff --git a/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..d41a8d6b3855b8f42ff9c284fe7fc104e51288ca --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2e9a2a997a8185fd665b49e199bed96a2b5062b4b5a3fee32679481097c5e7 +size 5627194 diff --git a/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..cf343fde4ca93650147cc23011d87ef4b681a725 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3db2492c6b6c2ad5fa8b3969deaa6d6a345c1b05b3bd5f3d8a82b52de9d14ceb +size 5887782 diff --git a/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..1505b32bbdd3fcb5bf49dcc564966fa82005a371 --- /dev/null +++ b/web-llm-models/v0_2_80/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5353f770c358be3c42db947c229ecd20ca604b174a3491aaf4297a7457b9d21 +size 5858417 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f16-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f16-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c72107c9005abcadda80bb178da5ec2dbb58910b --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f16-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e7f784ffadc53f1a4a555e53c6f82b7bdc8bfbda104b43bf7bc9b863fd6b20 +size 5500012 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f32-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f32-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..37b4a448a9603d2314d2dc9a3823c5f492748417 --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q0f32-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff7af4287d4d586258e609f55f232a02d76c89850d43fd60ec43ef460457a712 +size 5371960 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..a06c1371e1c58d34f7b55bf674ba673322783814 --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f16_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f55502998da25c50bd7a8350dbfb281b2f2acd7a30b56d8284a1dc3c98682a +size 5773206 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..463af5ebe5805e337bf28fbc55d5b0b75a77934d --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v0.4-q4f32_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27a56cb0b69f175f16108be46220ce078fb057e292d3cce63fa190e59694626 +size 5630291 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f16_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f16_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..a06c1371e1c58d34f7b55bf674ba673322783814 --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f16_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f55502998da25c50bd7a8350dbfb281b2f2acd7a30b56d8284a1dc3c98682a +size 5773206 diff --git a/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f32_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f32_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..463af5ebe5805e337bf28fbc55d5b0b75a77934d --- /dev/null +++ b/web-llm-models/v0_2_80/TinyLlama-1.1B-Chat-v1.0-q4f32_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27a56cb0b69f175f16108be46220ce078fb057e292d3cce63fa190e59694626 +size 5630291 diff --git a/web-llm-models/v0_2_80/compile_wasm.py b/web-llm-models/v0_2_80/compile_wasm.py new file mode 100644 index 0000000000000000000000000000000000000000..967b3fa0916260d695a2abf4704c625d33f788da --- /dev/null +++ b/web-llm-models/v0_2_80/compile_wasm.py @@ -0,0 +1,310 @@ +import os +import subprocess +import sys +from pathlib import Path +from huggingface_hub import hf_hub_download +import json + +LOG_PATH = Path("./") / "compile_wasm_log.txt" +# NOTE(Harry): Set this to your binary-mlc-llm-libs repo. +BINARY_DIR = "/path/to/binary-mlc-llm-libs/web-llm-models/v0_2_80" +CONFIG_PATH = "/path/to/hf-configs/" + +# -1. Clean log file +cmd = [ + "rm", + "-rf", + "./compile_wasm_log.txt", +] +print(" ".join(cmd), flush=True) +subprocess.run(cmd, check=True, stderr=subprocess.STDOUT, env=os.environ) + + +def compile( + model, + quantization, + context_window_size, + prefill_chunk_size, + model_id, + repo_id=None, + use_sliding_window=False, + max_batch_size=None, +): + with LOG_PATH.open("a", encoding="utf-8") as log_file: + # 0. Clean temp folder + cmd = [ + "rm", + "-rf", + "dist/temp/", + ] + print(" ".join(cmd), flush=True) + subprocess.run(cmd, check=True, stdout=log_file, stderr=subprocess.STDOUT, env=os.environ) + + # 1. Gen config + if repo_id: + HF_TOKEN = os.getenv("HF_TOKEN") + try: + cfg_path = hf_hub_download( + repo_id=repo_id, + filename="config.json", + token=HF_TOKEN, + ) + except Exception as err: + print(err) + + dst = f"{CONFIG_PATH}{repo_id.split('/')[-1]}.config.json" + with open(cfg_path, "r", encoding="utf-8") as src, open(dst, "w", encoding="utf-8") as out: + json.dump(json.load(src), out, indent=2, ensure_ascii=False) + + cmd = [ + sys.executable, + "-m", + "mlc_llm", + "gen_config", + dst, + "--output", + "dist/temp", + "--conv-template", + "LM", + "--quantization", + quantization, + "--prefill-chunk-size", + str(prefill_chunk_size), + ] + else: + cmd = [ + sys.executable, + "-m", + "mlc_llm", + "gen_config", + model, + "--output", + "dist/temp", + "--conv-template", + "LM", + "--quantization", + quantization, + "--prefill-chunk-size", + str(prefill_chunk_size), + ] + + if use_sliding_window: + cmd += [ + "--sliding-window-size", + str(context_window_size), + ] + else: + cmd += [ + "--context-window-size", + str(context_window_size), + ] + if max_batch_size: + cmd += [ + "--max-batch-size", + str(max_batch_size), + ] + print(" ".join(cmd), flush=True) + subprocess.run(cmd, check=True, stdout=log_file, stderr=subprocess.STDOUT, env=os.environ) + + # 2. compile + + # 2.1. Get output wasm name + ctx = "" + if context_window_size == 4096: + ctx = "4k" + elif context_window_size == 2048: + ctx = "2k" + elif context_window_size == 1024: + ctx = "1k" + elif context_window_size == 512: + ctx = "512" + else: + raise RuntimeError(f"Unrecognized ctx: {ctx}") + + cs = "" + if prefill_chunk_size == 4096: + cs = "4k" + elif prefill_chunk_size == 2048: + cs = "2k" + elif prefill_chunk_size == 1024: + cs = "1k" + elif prefill_chunk_size == 512: + cs = "512" + else: + raise RuntimeError(f"Unrecognized cs: {cs}") + + # e.g. Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm + if use_sliding_window: + output_file_name = f"{model_id}-{quantization}-sw{ctx}_cs{cs}" + else: + output_file_name = f"{model_id}-{quantization}-ctx{ctx}_cs{cs}" + if max_batch_size: + output_file_name += f"_batch{max_batch_size}" + output_file_name += "-webgpu.wasm" + output_path = os.path.join(BINARY_DIR, output_file_name) + + # 2.2. Compile + cmd = [ + sys.executable, + "-m", + "mlc_llm", + "compile", + "dist/temp/mlc-chat-config.json", + "--device", + "webgpu", + "--output", + output_path, + ] + print(" ".join(cmd), flush=True) + subprocess.run(cmd, check=True, stdout=log_file, stderr=subprocess.STDOUT, env=os.environ) + + # 3. Clean temp mlc-chat-config.json + cmd = [ + "rm", + "-rf", + "dist/temp/mlc-chat-config.json", + ] + print(" ".join(cmd), flush=True) + subprocess.run(cmd, check=True, stdout=log_file, stderr=subprocess.STDOUT, env=os.environ) + + +# NOTE(Charlie): As of 03/31/2025, the context window size does not do anything because +# it has become a runtime thing in both MLC-LLM and WebLLM. + +# NOTE(Harry): To compile a wasm, uncomment the corresponding line below. + +# compile("phi-3", "q4f16_1", 4096, 1024, "Phi-3-mini-4k-instruct") +# compile("phi-3", "q4f16_1", 4096, 1024, "Phi-3-mini-4k-instruct-old") +# compile("phi-3", "q4f32_1", 4096, 1024, "Phi-3-mini-4k-instruct") + +# compile("llama3_8b", "q4f16_1", 4096, 1024, "Llama-3-8B-Instruct") +# compile("llama3_8b", "q4f32_1", 4096, 1024, "Llama-3-8B-Instruct") + +# compile("llama2_7b", "q4f16_1", 4096, 1024, "Llama-2-7b-chat-hf") +# compile("llama2_7b", "q4f32_1", 4096, 1024, "Llama-2-7b-chat-hf") + +# compile("llama2_13b", "q4f16_1", 4096, 1024, "Llama-2-13b-chat-hf") + +# compile("mistral_7b_v03", "q4f16_1", 4096, 1024, "Mistral-7B-Instruct-v0.3") +# compile("mistral_7b_v03", "q4f32_1", 4096, 1024, "Mistral-7B-Instruct-v0.3") + +# compile("redpajama_3b_v1", "q4f16_1", 2048, 1024, "RedPajama-INCITE-Chat-3B-v1") +# compile("redpajama_3b_v1", "q4f32_1", 2048, 1024, "RedPajama-INCITE-Chat-3B-v1") + +# compile("tinyllama_1b_chat_v0.4", "q0f16", 2048, 1024, "TinyLlama-1.1B-Chat-v0.4") +# compile("tinyllama_1b_chat_v0.4", "q0f32", 2048, 1024, "TinyLlama-1.1B-Chat-v0.4") +# compile("tinyllama_1b_chat_v0.4", "q4f16_1", 2048, 1024, "TinyLlama-1.1B-Chat-v0.4") +# compile("tinyllama_1b_chat_v0.4", "q4f32_1", 2048, 1024, "TinyLlama-1.1B-Chat-v0.4") + +# compile("tinyllama_1b_chat_v1.0", "q4f16_1", 2048, 1024, "TinyLlama-1.1B-Chat-v1.0") +# compile("tinyllama_1b_chat_v1.0", "q4f32_1", 2048, 1024, "TinyLlama-1.1B-Chat-v1.0") + +# compile("gemma_2b", "q4f16_1", 4096, 1024, "gemma-2b-it") +# compile("gemma_2b", "q4f32_1", 4096, 1024, "gemma-2b-it") + +# compile("gpt2_medium", "q0f16", 1024, 1024, "gpt2-medium") +# compile("gpt2", "q0f16", 1024, 1024, "gpt2") + +# compile("phi-1_5", "q4f16_1", 2048, 1024, "phi-1_5") +# compile("phi-1_5", "q4f32_1", 2048, 1024, "phi-1_5") + +# compile("phi-2", "q4f16_1", 2048, 1024, "phi-2") +# compile("phi-2", "q4f32_1", 2048, 1024, "phi-2") + +# compile("stablelm-2-zephyr-1_6b", "q4f16_1", 4096, 1024, "stablelm-2-zephyr-1_6b") +# compile("stablelm-2-zephyr-1_6b", "q4f32_1", 4096, 1024, "stablelm-2-zephyr-1_6b") + +# compile("qwen2_0_5b", "q4f16_1", 4096, 1024, "Qwen2-0.5B-Instruct") +# compile("qwen2_0_5b", "q4f32_1", 4096, 1024, "Qwen2-0.5B-Instruct") +# compile("qwen2_0_5b", "q0f16", 4096, 1024, "Qwen2-0.5B-Instruct") +# compile("qwen2_0_5b", "q0f32", 4096, 1024, "Qwen2-0.5B-Instruct") + +# compile("qwen2_1_5b", "q4f16_1", 4096, 1024, "Qwen2-1.5B-Instruct") +# compile("qwen2_1_5b", "q4f32_1", 4096, 1024, "Qwen2-1.5B-Instruct") + +# compile("qwen2.5_3b", "q4f16_1", 4096, 1024, "Qwen2.5-3B-Instruct") +# compile("qwen2.5_3b", "q4f32_1", 4096, 1024, "Qwen2.5-3B-Instruct") + +# compile("qwen2_7b", "q4f16_1", 4096, 1024, "Qwen2-7B-Instruct") +# compile("qwen2_7b", "q4f32_1", 4096, 1024, "Qwen2-7B-Instruct") + +# compile("llama3_70b", "q3f16_1", 4096, 1024, "Llama-3-70B-Instruct") + +# compile("llama3_1_8b", "q4f16_1", 4096, 1024, "Llama-3_1-8B-Instruct") +# compile("llama3_1_8b", "q4f32_1", 4096, 1024, "Llama-3_1-8B-Instruct") + +# compile("llama3_1_70b", "q3f16_1", 4096, 1024, "Llama-3_1-70B-Instruct") + +# compile("gemma2_2b", "q4f16_1", 4096, 1024, "gemma-2-2b-it") +# compile("gemma2_2b", "q4f32_1", 4096, 1024, "gemma-2-2b-it") + +# compile("gemma2_9b", "q4f16_1", 4096, 1024, "gemma-2-9b-it") +# compile("gemma2_9b", "q4f32_1", 4096, 1024, "gemma-2-9b-it") + +# # max_position_embeddings is set to be 512 here for embedding model; no rope is used +# compile( +# "snowflake-arctic-embed-m", "q0f32", 512, 512, "snowflake-arctic-embed-m", max_batch_size=32 +# ) +# compile("snowflake-arctic-embed-m", "q0f32", 512, 512, "snowflake-arctic-embed-m", max_batch_size=4) + +# compile( +# "snowflake-arctic-embed-s", "q0f32", 512, 512, "snowflake-arctic-embed-s", max_batch_size=32 +# ) +# compile("snowflake-arctic-embed-s", "q0f32", 512, 512, "snowflake-arctic-embed-s", max_batch_size=4) + +# # Cannot be shared with phi-3 because phi3.5 has rope scaling +# compile("phi-3_5", "q4f16_1", 4096, 1024, "Phi-3.5-mini-instruct") +# compile("phi-3_5", "q4f32_1", 4096, 1024, "Phi-3.5-mini-instruct") + +# compile("phi-3_5-vision", "q4f16_1", 4096, 2048, "Phi-3.5-vision-instruct") +# compile("phi-3_5-vision", "q4f32_1", 4096, 2048, "Phi-3.5-vision-instruct") + +# # Llama3.2 1B/3B +# compile("llama3_2_1b", "q0f16", 4096, 1024, "Llama-3.2-1B-Instruct") +# compile("llama3_2_1b", "q0f32", 4096, 1024, "Llama-3.2-1B-Instruct") +# compile("llama3_2_1b", "q4f16_1", 4096, 1024, "Llama-3.2-1B-Instruct") +# compile("llama3_2_1b", "q4f32_1", 4096, 1024, "Llama-3.2-1B-Instruct") + +# compile("llama3_2_3b", "q4f16_1", 4096, 1024, "Llama-3.2-3B-Instruct") +# compile("llama3_2_3b", "q4f32_1", 4096, 1024, "Llama-3.2-3B-Instruct") + +# compile("gemma2_2b-jpn", "q4f16_1", 4096, 1024, "gemma-2-2b-jpn-it") +# compile("gemma2_2b-jpn", "q4f32_1", 4096, 1024, "gemma-2-2b-jpn-it") + +# compile("smollm2_1_7b", "q0f16", 4096, 1024, "SmolLM2-1.7B-Instruct") +# compile("smollm2_1_7b", "q4f16_1", 4096, 1024, "SmolLM2-1.7B-Instruct") +# compile("smollm2_1_7b", "q4f32_1", 4096, 1024, "SmolLM2-1.7B-Instruct") + +# compile("smollm2_360m", "q0f16", 4096, 1024, "SmolLM2-360M-Instruct") +# compile("smollm2_360m", "q0f32", 4096, 1024, "SmolLM2-360M-Instruct") +# compile("smollm2_360m", "q4f16_1", 4096, 1024, "SmolLM2-360M-Instruct") +# compile("smollm2_360m", "q4f32_1", 4096, 1024, "SmolLM2-360M-Instruct") + +# compile("smollm2_135m", "q0f16", 4096, 1024, "SmolLM2-135M-Instruct") +# compile("smollm2_135m", "q0f32", 4096, 1024, "SmolLM2-135M-Instruct") +# compile("smollm2_135m", "q4f16_1", 4096, 1024, "SmolLM2-135M-Instruct") +# compile("smollm2_135m", "q4f32_1", 4096, 1024, "SmolLM2-135M-Instruct") + +# compile("gemma3_1b_it", "q4f16_1", 4096, 1024, "gemma3-1b-it") + + +# compile("qwen3_0.6b", "q4f16_1", 4096, 1024, "Qwen3-0.6B") +# compile("qwen3_0.6b", "q4f32_1", 4096, 1024, "Qwen3-0.6B") +# compile("qwen3_0.6b", "q0f16", 4096, 1024, "Qwen3-0.6B") +# compile("qwen3_0.6b", "q0f32", 4096, 1024, "Qwen3-0.6B") + +# compile("qwen3_1.7b", "q4f16_1", 4096, 1024, "Qwen3-1.7B") +# compile("qwen3_1.7b", "q4f32_1", 4096, 1024, "Qwen3-1.7B") + +# compile("qwen3_4b", "q4f16_1", 4096, 1024, "Qwen3-4B") +# compile("qwen3_4b", "q4f32_1", 4096, 1024, "Qwen3-4B") + +# compile("qwen3_8b", "q4f16_1", 4096, 1024, "Qwen3-8B") +# compile("qwen3_8b", "q4f32_1", 4096, 1024, "Qwen3-8B") + +# compile("qwen3_4b_instruct_2507", "q4f16_1", 4096, 1024, "Qwen3-4B-Instruct-2507") +# compile("qwen3_4b_instruct_2507", "q4f32_1", 4096, 1024, "Qwen3-4B-Instruct-2507") + +# compile("qwen3_4b_thinking_2507", "q4f16_1", 4096, 1024, "Qwen3-4B-Thinking-2507") +compile("qwen3_4b_thinking_2507", "q4f32_1", 4096, 1024, "Qwen3-4B-Thinking-2507") + diff --git a/web-llm-models/v0_2_80/gemma-2-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..738b23cfb049f9120238917ef522682bedd22c6a --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96b93a758092a63a599c477084f82580549aaf3262a47944e5d075de71ff6df +size 5372475 diff --git a/web-llm-models/v0_2_80/gemma-2-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c14f8198cc9c5d7978c8dae6c481418319d0a8f8 --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600e43ae8216afbd658951bc6da811908820f7a06f018e8aedef2e10347908af +size 5347067 diff --git a/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..738b23cfb049f9120238917ef522682bedd22c6a --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96b93a758092a63a599c477084f82580549aaf3262a47944e5d075de71ff6df +size 5372475 diff --git a/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c14f8198cc9c5d7978c8dae6c481418319d0a8f8 --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-2b-jpn-it-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600e43ae8216afbd658951bc6da811908820f7a06f018e8aedef2e10347908af +size 5347067 diff --git a/web-llm-models/v0_2_80/gemma-2-9b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-9b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..938c17606dda226c5c634b294316796e863a56ab --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-9b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6e3ec0138c9318b042716e616b46a6e4868e25134a26e7765740aae9376671 +size 5920586 diff --git a/web-llm-models/v0_2_80/gemma-2-9b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2-9b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..e9f36926d700f5671d79c0e292b37be5c248891d --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2-9b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba8d2ef308e5740a7d4046e41ba1803877ae0fad46c357c9d4d5f556ec451e7 +size 5878271 diff --git a/web-llm-models/v0_2_80/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..496edaa857e2ba7e095940804d7d252fcf403860 --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156218c667563dba15e77118c92aac937f51e5a073549b34bbc4fd0cc14c03d3 +size 5083185 diff --git a/web-llm-models/v0_2_80/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..89bec4e2d1bc291f461a77c1efd079c53df6ad34 --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-2b-it-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881f2fdabb4c35d2065ca047b4016da5f125b4fc1ed0f4896f5e61c3bdeacb4a +size 5018042 diff --git a/web-llm-models/v0_2_80/gemma-3-1b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gemma-3-1b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..2e79f7fb52a8dce913093cea9fa9b6468be580ad --- /dev/null +++ b/web-llm-models/v0_2_80/gemma-3-1b-it-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17f0caa24ea7ed0782a507c1b573b68044eabc3ff3c67463ea60dc40f2af238 +size 5892106 diff --git a/web-llm-models/v0_2_80/gpt2-medium-q0f16-ctx1k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gpt2-medium-q0f16-ctx1k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..9c387b4286654afdaaf32a50dc615cd3b50eb637 --- /dev/null +++ b/web-llm-models/v0_2_80/gpt2-medium-q0f16-ctx1k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22ddb9fb732d09956181caf0a5b8224f9165c74071d5e221c84e65387c55c16 +size 5295677 diff --git a/web-llm-models/v0_2_80/gpt2-q0f16-ctx1k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/gpt2-q0f16-ctx1k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..bfeac2eacc4243d86f0ae2a390b9e0858b06ee04 --- /dev/null +++ b/web-llm-models/v0_2_80/gpt2-q0f16-ctx1k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598f2c959a448627584d9929d00eab4e3b9699fae5666d33926c321a28bcd9df +size 4930123 diff --git a/web-llm-models/v0_2_80/phi-1_5-q4f16_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/phi-1_5-q4f16_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..0b6737e834755597f06c54269ce5d0513d69d187 --- /dev/null +++ b/web-llm-models/v0_2_80/phi-1_5-q4f16_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d283238859a7f02466b3cbcef3c0196e7f4c90150e51b3a59faaf37905097dc3 +size 5384158 diff --git a/web-llm-models/v0_2_80/phi-1_5-q4f32_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/phi-1_5-q4f32_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..d5d26405a99a8b7fe8cfcc9d20ae2cf9368ec19a --- /dev/null +++ b/web-llm-models/v0_2_80/phi-1_5-q4f32_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebe815456fd023c8c43f8536f45185e41f2bc9df4508a8dd033f8377bc1391d +size 5356349 diff --git a/web-llm-models/v0_2_80/phi-2-q4f16_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/phi-2-q4f16_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..05e859781ce98c5b7a7b7b9a8660655ffedf7620 --- /dev/null +++ b/web-llm-models/v0_2_80/phi-2-q4f16_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a665c72244dd0b9882a9eb057b87d785040f55cb038d6f53091353753db9d27c +size 5556895 diff --git a/web-llm-models/v0_2_80/phi-2-q4f32_1-ctx2k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/phi-2-q4f32_1-ctx2k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..a89533c5c79a63f745ed7409e260738b70d96dcb --- /dev/null +++ b/web-llm-models/v0_2_80/phi-2-q4f32_1-ctx2k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eff2d4a3658a8fe92c33a856d6f909aa6edcc2ccb1792190ec0e53d2cb16144 +size 5524905 diff --git a/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch32-webgpu.wasm b/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch32-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..13030d105ce13de836ab4e206ef8b73e6dc3365d --- /dev/null +++ b/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch32-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0a131ea0e9314e564d26e22a543f1877515ef51d2bbec8aad65617f9bc42e2 +size 3578227 diff --git a/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch4-webgpu.wasm b/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch4-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..3c117195f9b34e3bb7a650baace0ab7c11a96667 --- /dev/null +++ b/web-llm-models/v0_2_80/snowflake-arctic-embed-m-q0f32-ctx512_cs512_batch4-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e0e1dc486f9478c4b6aa5ed6bdb7a2fdd9ffe177d0b7fb0ddaa415f5cb20a74 +size 3578228 diff --git a/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch32-webgpu.wasm b/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch32-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..c178d23e388627e8e5753828347504cd0f4579ea --- /dev/null +++ b/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch32-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d95ff5f162092f5081d330f9a377431b0f1013e3fa7878dfe70773c8bc17423 +size 3561248 diff --git a/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch4-webgpu.wasm b/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch4-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..7258ed342115acc6ce4d2e2acd343301df971c94 --- /dev/null +++ b/web-llm-models/v0_2_80/snowflake-arctic-embed-s-q0f32-ctx512_cs512_batch4-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7bc8e46a7abe947ccd3cdd0df8c9f473eb638975e2122ec3ace9796d0a6d5db +size 3561250 diff --git a/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f16_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f16_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..8940d9032f888db180c184e8de5c69f69cfde3e7 --- /dev/null +++ b/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f16_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d714daefc5d13b99d9cf244ad60780eeb4ce064be3e23e3609dd90f7ac53af6 +size 5356520 diff --git a/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f32_1-ctx4k_cs1k-webgpu.wasm b/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f32_1-ctx4k_cs1k-webgpu.wasm new file mode 100644 index 0000000000000000000000000000000000000000..f7516a8231c2eea318e29a2c0a928e672748f4e2 --- /dev/null +++ b/web-llm-models/v0_2_80/stablelm-2-zephyr-1_6b-q4f32_1-ctx4k_cs1k-webgpu.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e5ae435ea71732fe5b56ac809bc95b5e8d98255deb0f522fbeed4281b3a4cf +size 5327203