willopcbeta commited on
Commit
b1de0b3
·
verified ·
1 Parent(s): a256402

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -40,3 +40,8 @@ onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
40
  onnx/model_q4f16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
41
  onnx/model_q4f16.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
42
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
40
  onnx/model_q4f16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
41
  onnx/model_q4f16.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
42
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
44
+ int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
45
+ int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
46
+ int4_cpu_block_32_accuracy_level_4/model.onnx.data filter=lfs diff=lfs merge=lfs -text
47
+ int4_cpu_block_32_accuracy_level_4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
int4_cpu_block_32_accuracy_level_4/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
int4_cpu_block_32_accuracy_level_4/genai_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 2,
4
+ "context_length": 131072,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": []
9
+ },
10
+ "filename": "model.onnx",
11
+ "head_size": 256,
12
+ "hidden_size": 2560,
13
+ "inputs": {
14
+ "input_ids": "input_ids",
15
+ "attention_mask": "attention_mask",
16
+ "past_key_names": "past_key_values.%d.key",
17
+ "past_value_names": "past_key_values.%d.value"
18
+ },
19
+ "outputs": {
20
+ "logits": "logits",
21
+ "present_key_names": "present.%d.key",
22
+ "present_value_names": "present.%d.value"
23
+ },
24
+ "num_attention_heads": 8,
25
+ "num_hidden_layers": 34,
26
+ "num_key_value_heads": 4
27
+ },
28
+ "eos_token_id": [
29
+ 1,
30
+ 106
31
+ ],
32
+ "pad_token_id": 0,
33
+ "type": "gemma3_text",
34
+ "vocab_size": 262208
35
+ },
36
+ "search": {
37
+ "diversity_penalty": 0.0,
38
+ "do_sample": true,
39
+ "early_stopping": true,
40
+ "length_penalty": 1.0,
41
+ "max_length": 131072,
42
+ "min_length": 0,
43
+ "no_repeat_ngram_size": 0,
44
+ "num_beams": 1,
45
+ "num_return_sequences": 1,
46
+ "past_present_share_buffer": true,
47
+ "repetition_penalty": 1.0,
48
+ "temperature": 1.0,
49
+ "top_k": 64,
50
+ "top_p": 0.95
51
+ }
52
+ }
int4_cpu_block_32_accuracy_level_4/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99f17762ecab7be245ba67505935fa3cee3f1ff7e88202de070eb162041e404
3
+ size 432401
int4_cpu_block_32_accuracy_level_4/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d001b8457daf2db19c52bc86c1375066a0735dfb04e45e013320910807668bde
3
+ size 2694922240
int4_cpu_block_32_accuracy_level_4/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b6f9ff5ee540f78391cf00fa322c385f3346b71bbf86288a23360f7a516fb8
3
+ size 435957
int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf447bf9bfcb77356a24c386c08624f89bd629a4f1fc701083ed63099ef1d044
3
+ size 1066170368
int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data_1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7036ee05471275273146477e94b99beb758d80fe9744c1a11e183b6ee842490
3
+ size 1061683200
int4_cpu_block_32_accuracy_level_4/model_q4.onnx_data_2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f06d912841e702aeb97070b62c0d394eb0492675fab4c2d57063ce3c0eb6b7
3
+ size 566988800
int4_cpu_block_32_accuracy_level_4/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
int4_cpu_block_32_accuracy_level_4/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
int4_cpu_block_32_accuracy_level_4/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff