qjia7 commited on
Commit
a615462
·
verified ·
1 Parent(s): a118411

Add muffin ort-webgpu model

Browse files
muffin/onnx-webgpu/adapter_cache.bin ADDED
File without changes
muffin/onnx-webgpu/chat_template.jinja ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|system|>' }}
3
+ {%- if messages and messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- else %}
6
+ {{- 'You are Muffin, a large language model trained by Microsoft.\n\n' }}
7
+ {%- endif %}
8
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nThe available tools are provided as JSON between <|tool|> and <|/tool|> tokens:\n" }}
9
+ {{- '<|tool|>' }}
10
+ {{- tools | tojson }}
11
+ {{- '<|/tool|>\n\nFor each function call, respond with a single <|tool_call|>{\"name\": \"<function-name>\", \"arguments\": <json-args>}<|/tool_call|> block.\n' }}
12
+ <|end|>
13
+ {%- set start_index = 1 if messages and messages[0].role == 'system' else 0 %}
14
+ {%- else %}
15
+ {%- if messages and messages[0].role == 'system' %}
16
+ {{- '<|system|>' + messages[0].content + '<|end|>' }}
17
+ {%- set start_index = 1 %}
18
+ {%- else %}
19
+ {{- '<|system|>You are Muffin, a large language model trained by Microsoft.<|end|>' }}
20
+ {%- set start_index = 0 %}
21
+ {%- endif %}
22
+ {%- endif %}
23
+ {%- for message in messages[start_index:] %}
24
+ {%- if message.content is string %}
25
+ {%- set content = message.content %}
26
+ {%- else %}
27
+ {%- set content = '' %}
28
+ {%- endif %}
29
+ {%- if message.role == 'user' %}
30
+ {{- '<|user|>' + content + '<|end|>' }}
31
+ {%- elif message.role == 'assistant' %}
32
+ {{- '<|assistant|>' }}
33
+ {{- content }}
34
+ {%- if message.tool_calls %}
35
+ {%- for tool_call in message.tool_calls %}
36
+ {%- if tool_call.function %}
37
+ {%- set tc = tool_call.function %}
38
+ {%- else %}
39
+ {%- set tc = tool_call %}
40
+ {%- endif %}
41
+ {{- '<|tool_call|>' }}
42
+ {{- '{\"name\": \"' ~ tc.name ~ '\", \"arguments\": ' }}
43
+ {%- if tc.arguments is string %}
44
+ {{- tc.arguments }}
45
+ {%- else %}
46
+ {{- tc.arguments | tojson }}
47
+ {%- endif %}
48
+ {{- '}' }}
49
+ {{- '<|/tool_call|>' }}
50
+ {%- endfor %}
51
+ {%- endif %}
52
+ {%- if loop.last and not add_generation_prompt %}
53
+ {{- '<|endofprompt|>' }}
54
+ {%- else %}
55
+ {{- '<|end|>' }}
56
+ {%- endif %}
57
+ {%- elif message.role == 'tool' %}
58
+ {%- if loop.first or (messages[loop.index0 + start_index - 1].role != 'tool') %}
59
+ {{- '<|user|>' }}
60
+ {%- endif %}
61
+ {{- '<|tool_response|>' }}
62
+ {{- content }}
63
+ {%- if loop.last or (messages[loop.index0 + start_index + 1].role != 'tool') %}
64
+ <|end|>
65
+ {%- endif %}
66
+ {%- elif message.role == 'system' and (start_index + loop.index0) != 0 %}
67
+ {{- '<|system|>' + content + '<|end|>' }}
68
+ {%- endif %}
69
+ {%- endfor %}
70
+ {%- if add_generation_prompt %}
71
+ {{- '<|assistant|>' }}
72
+ {%- else %}
73
+ {%- endif %}
muffin/onnx-webgpu/edge_on_device_model_execution_config.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6174e6bddd62fc999e8578ef48a449b30976076088613f118091ce303ecf0b5
3
+ size 8812
muffin/onnx-webgpu/encoder_cache.bin ADDED
File without changes
muffin/onnx-webgpu/genai_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 8192,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [
9
+ {
10
+ "webgpu": {
11
+ "enableGraphCapture": "1",
12
+ "validationMode": "disabled"
13
+ }
14
+ }
15
+ ]
16
+ },
17
+ "filename": "model.onnx",
18
+ "head_size": 128,
19
+ "hidden_size": 2048,
20
+ "inputs": {
21
+ "input_ids": "input_ids",
22
+ "attention_mask": "attention_mask",
23
+ "past_key_names": "past_key_values.%d.key",
24
+ "past_value_names": "past_key_values.%d.value"
25
+ },
26
+ "outputs": {
27
+ "logits": "logits",
28
+ "present_key_names": "present.%d.key",
29
+ "present_value_names": "present.%d.value"
30
+ },
31
+ "num_attention_heads": 24,
32
+ "num_hidden_layers": 28,
33
+ "num_key_value_heads": 8
34
+ },
35
+ "eos_token_id": [
36
+ 200018,
37
+ 200018,
38
+ 200020,
39
+ 200019
40
+ ],
41
+ "pad_token_id": 199999,
42
+ "type": "qwen3",
43
+ "vocab_size": 200029
44
+ },
45
+ "search": {
46
+ "diversity_penalty": 0.0,
47
+ "do_sample": true,
48
+ "early_stopping": true,
49
+ "length_penalty": 1.0,
50
+ "max_length": 8192,
51
+ "min_length": 0,
52
+ "no_repeat_ngram_size": 0,
53
+ "num_beams": 1,
54
+ "num_return_sequences": 1,
55
+ "past_present_share_buffer": true,
56
+ "repetition_penalty": 1.2,
57
+ "temperature": 0.6,
58
+ "top_k": 20,
59
+ "top_p": 0.95
60
+ }
61
+ }
muffin/onnx-webgpu/manifest.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "manifest_version": 2,
3
+ "name": "Edge On Device Model",
4
+ "version": "2026.5.8.1",
5
+ "BaseModelSpec": {
6
+ "supported_performance_hints": [
7
+ 2,
8
+ 1
9
+ ],
10
+ "name": "Muffin",
11
+ "version": "2026.5.8.1",
12
+ "type": "webgpu"
13
+ }
14
+ }
muffin/onnx-webgpu/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9adf24cf528a984dd5d50d8496ad90054c6422da87f119135321c5c88502bfdf
3
+ size 353943
muffin/onnx-webgpu/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dca047f823e188347b2bedf11e5b6b1f13eef88c8a8f3c80f900df06f5ab8a88
3
+ size 1494558336
muffin/onnx-webgpu/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
3
+ size 15524476
muffin/onnx-webgpu/tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|endofprompt|>",
7
+ "is_local": true,
8
+ "model_max_length": 8192,
9
+ "pad_token": "<|endoftext|>",
10
+ "tokenizer_class": "TokenizersBackend",
11
+ "unk_token": null
12
+ }