ErazerControl commited on
Commit
baa8a17
·
verified ·
1 Parent(s): 7ad2943

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -55,3 +55,4 @@ Nemotron-Cascade-8B-Thinking/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merg
55
  Nemotron-Mini-4B-Instruct/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
56
  Phi-4-mini-instruct/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
57
  Phi-4-mini-reasoning/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
55
  Nemotron-Mini-4B-Instruct/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
56
  Phi-4-mini-instruct/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
57
  Phi-4-mini-reasoning/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
58
+ Qwen2-0.5B-Instruct/onnx-webgpu/tokenizer.json filter=lfs diff=lfs merge=lfs -text
Qwen2-0.5B-Instruct/README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen2-0.5B-Instruct
2
+
3
+ ## GenAI WebGPU Model
4
+
5
+ Generated using `onnxruntime_genai.models.builder`
6
+
7
+ - **onnxruntime-genai commit**: `41c4ce18fec1240c2f848725e31fbe8854010188`
8
+ - **transformers version**: `4.57.0`
9
+ - **Precision**: int4
10
+ - **Execution Provider**: webgpu
11
+
12
+ ```
13
+ python -m onnxruntime_genai.models.builder -p int4 -e webgpu -m Qwen/Qwen2-0.5B-Instruct -o E:\ai-models\Qwen2-0.5B-Instruct\onnx-webgpu\ --extra_options int4_algo_config=rtn_last int4_is_symmetric=true prune_lm_head=true enable_webgpu_graph=true
14
+ ```
15
+
16
+ ## GGUF Model
17
+
18
+ Downloaded from: [bartowski/Qwen2.5-0.5B-Instruct-GGUF](https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF)
Qwen2-0.5B-Instruct/gguf/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb923e7d26e9cea28811e1a8e852009b21242fb157b26149d3b188f3a8c8653
3
+ size 397808192
Qwen2-0.5B-Instruct/onnx-webgpu/added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
Qwen2-0.5B-Instruct/onnx-webgpu/chat_template.jinja ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
4
+ ' + message['content'] + '<|im_end|>' + '
5
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
6
+ ' }}{% endif %}
Qwen2-0.5B-Instruct/onnx-webgpu/genai_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 151643,
4
+ "context_length": 32768,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [
9
+ {
10
+ "webgpu": {
11
+ "enableGraphCapture": "0",
12
+ "validationMode": "disabled"
13
+ }
14
+ }
15
+ ]
16
+ },
17
+ "filename": "model.onnx",
18
+ "head_size": 64,
19
+ "hidden_size": 896,
20
+ "inputs": {
21
+ "input_ids": "input_ids",
22
+ "attention_mask": "attention_mask",
23
+ "past_key_names": "past_key_values.%d.key",
24
+ "past_value_names": "past_key_values.%d.value"
25
+ },
26
+ "outputs": {
27
+ "logits": "logits",
28
+ "present_key_names": "present.%d.key",
29
+ "present_value_names": "present.%d.value"
30
+ },
31
+ "num_attention_heads": 14,
32
+ "num_hidden_layers": 24,
33
+ "num_key_value_heads": 2
34
+ },
35
+ "eos_token_id": [
36
+ 151645,
37
+ 151643
38
+ ],
39
+ "pad_token_id": 151643,
40
+ "type": "qwen2",
41
+ "vocab_size": 151936
42
+ },
43
+ "search": {
44
+ "diversity_penalty": 0,
45
+ "do_sample": true,
46
+ "early_stopping": true,
47
+ "length_penalty": 1,
48
+ "max_length": 32768,
49
+ "min_length": 0,
50
+ "no_repeat_ngram_size": 0,
51
+ "num_beams": 1,
52
+ "num_return_sequences": 1,
53
+ "past_present_share_buffer": true,
54
+ "repetition_penalty": 1,
55
+ "temperature": 0.7,
56
+ "top_k": 20,
57
+ "top_p": 0.8
58
+ }
59
+ }
Qwen2-0.5B-Instruct/onnx-webgpu/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
Qwen2-0.5B-Instruct/onnx-webgpu/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33e4b5626d0400d2b0960ab742f2a7c76e5b04f53cfa76a5697c46845a1481e1
3
+ size 190215
Qwen2-0.5B-Instruct/onnx-webgpu/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb5072b94ce99885b6e7112db146c5c6910e31a4f725d7983bb2d42c57bed38
3
+ size 353845248
Qwen2-0.5B-Instruct/onnx-webgpu/special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
Qwen2-0.5B-Instruct/onnx-webgpu/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
Qwen2-0.5B-Instruct/onnx-webgpu/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "<|im_end|>",
36
+ "errors": "replace",
37
+ "extra_special_tokens": {},
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "Qwen2Tokenizer",
42
+ "unk_token": null
43
+ }
Qwen2-0.5B-Instruct/onnx-webgpu/vocab.json ADDED
The diff for this file is too large to render. See raw diff