Upload optimized ONNX model

#2
by Xenova HF Staff - opened
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
- "torch_dtype": "float16",
11
  "eos_token_id": 106,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
@@ -43,14 +43,33 @@
43
  "pad_token_id": 0,
44
  "query_pre_attn_scalar": 256,
45
  "rms_norm_eps": 1e-06,
46
- "rope_local_base_freq": 10000.0,
47
- "rope_scaling": null,
48
- "rope_theta": 1000000.0,
 
 
 
 
 
 
 
49
  "sliding_window": 512,
50
- "transformers_version": "4.57.3",
51
  "unsloth_fixed": true,
52
  "unsloth_version": "2026.1.2",
53
  "use_bidirectional_attention": false,
54
  "use_cache": true,
55
- "vocab_size": 262144
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
+ "dtype": "float16",
11
  "eos_token_id": 106,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
 
43
  "pad_token_id": 0,
44
  "query_pre_attn_scalar": 256,
45
  "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
  "sliding_window": 512,
57
+ "transformers_version": "5.0.0.dev0",
58
  "unsloth_fixed": true,
59
  "unsloth_version": "2026.1.2",
60
  "use_bidirectional_attention": false,
61
  "use_cache": true,
62
+ "vocab_size": 262144,
63
+ "transformers.js_config": {
64
+ "use_external_data_format": {
65
+ "model.onnx": 1,
66
+ "model_fp16.onnx": 1,
67
+ "model_q4.onnx": 1,
68
+ "model_q4f16.onnx": 1
69
+ },
70
+ "kv_cache_dtype": {
71
+ "q4f16": "float16",
72
+ "fp16": "float16"
73
+ }
74
+ }
75
+ }
onnx/model.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a55aebb86cfa1387698a09c5b1a75ae7ed75aa11e45f786ece3239641d1ac4
3
  size 1139501568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d3a412f0da96eaa98edee828ce6c0dfebbbb9974485615ad14bab736526e08
3
  size 1139501568
onnx/model_fp16.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26e9c33ae1b6dd9faaadb272c0adfbd60fd945552a4e790f5a04f0f632952354
3
  size 569862656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20f41ae1f64b827396fe58365d542d20f6d7f36e370701426446dfb59c153ee
3
  size 569862656
onnx/model_q4.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df04662e4eec6cceca86d2655c63d8d39a0faef245f9f43252f1c9a7ebbac878
3
  size 801090048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abddf978d152ad7d57bc5f1a98504bdbd76ed72db808542b8149c369bb092faa
3
  size 801090048
onnx/model_q4f16.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f09ff1b5edb7fa34dc9dd3056fe24caa97a01ad9e38301fc826cc0778fc50ec
3
  size 425724416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb1a1c91d28dac4c426e17df4b98127f04462a5e6ba23d639eb06a8c1bc3b6c
3
  size 425724416
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6b09a0b4a803ad453063ca4bb49a784540e8120004e2450e025df2b27d41fb2
3
- size 33384899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa00479b9f9d41a876e5cd1b369eea7af974fa0bdcc994c987972056d8ed5c2
3
+ size 20323384
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff