Upload optimized ONNX model

#1
by Xenova HF Staff - opened
.gitattributes CHANGED
@@ -34,3 +34,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
38
+ onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
39
+ onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
40
+ onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
- "torch_dtype": "float16",
11
  "eos_token_id": 106,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
@@ -43,14 +43,33 @@
43
  "pad_token_id": 0,
44
  "query_pre_attn_scalar": 256,
45
  "rms_norm_eps": 1e-06,
46
- "rope_local_base_freq": 10000.0,
47
- "rope_scaling": null,
48
- "rope_theta": 1000000.0,
 
 
 
 
 
 
 
49
  "sliding_window": 512,
50
- "transformers_version": "4.57.3",
51
  "unsloth_fixed": true,
52
  "unsloth_version": "2026.1.2",
53
  "use_bidirectional_attention": false,
54
  "use_cache": true,
55
- "vocab_size": 262144
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "attention_dropout": 0.0,
8
  "attn_logit_softcapping": null,
9
  "bos_token_id": 2,
10
+ "dtype": "float16",
11
  "eos_token_id": 106,
12
  "final_logit_softcapping": null,
13
  "head_dim": 256,
 
43
  "pad_token_id": 0,
44
  "query_pre_attn_scalar": 256,
45
  "rms_norm_eps": 1e-06,
46
+ "rope_parameters": {
47
+ "full_attention": {
48
+ "rope_theta": 1000000.0,
49
+ "rope_type": "default"
50
+ },
51
+ "sliding_attention": {
52
+ "rope_theta": 10000.0,
53
+ "rope_type": "default"
54
+ }
55
+ },
56
  "sliding_window": 512,
57
+ "transformers_version": "5.0.0.dev0",
58
  "unsloth_fixed": true,
59
  "unsloth_version": "2026.1.2",
60
  "use_bidirectional_attention": false,
61
  "use_cache": true,
62
+ "vocab_size": 262144,
63
+ "transformers.js_config": {
64
+ "use_external_data_format": {
65
+ "model.onnx": 1,
66
+ "model_fp16.onnx": 1,
67
+ "model_q4.onnx": 1,
68
+ "model_q4f16.onnx": 1
69
+ },
70
+ "kv_cache_dtype": {
71
+ "q4f16": "float16",
72
+ "fp16": "float16"
73
+ }
74
+ }
75
+ }
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e338be338d58e6ea71c7df51a659182eb77760adb83247bcdec139fe3bce81cc
3
+ size 184463
onnx/model.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a55aebb86cfa1387698a09c5b1a75ae7ed75aa11e45f786ece3239641d1ac4
3
+ size 1139501568
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb22ebcfb3cea0e3d7eac4225030087f7c5b9998256915e83fbddb6678bc93f4
3
+ size 272337
onnx/model_fp16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26e9c33ae1b6dd9faaadb272c0adfbd60fd945552a4e790f5a04f0f632952354
3
+ size 569862656
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:895ed01f3891d473d10596f5d60e463e5184e321429a2fa883dfe80a87b1f538
3
+ size 230960
onnx/model_q4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df04662e4eec6cceca86d2655c63d8d39a0faef245f9f43252f1c9a7ebbac878
3
+ size 801090048
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af12c7a8d0c6a0d719c3b6a4f5e08f76eb2526085ea293a7a3bc8972784ea7d
3
+ size 318807
onnx/model_q4f16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f09ff1b5edb7fa34dc9dd3056fe24caa97a01ad9e38301fc826cc0778fc50ec
3
+ size 425724416
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6b09a0b4a803ad453063ca4bb49a784540e8120004e2450e025df2b27d41fb2
3
- size 33384899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa00479b9f9d41a876e5cd1b369eea7af974fa0bdcc994c987972056d8ed5c2
3
+ size 20323384
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff