Optimize ONNX exports

#1
by Xenova HF Staff - opened
.gitattributes CHANGED
@@ -40,3 +40,5 @@ onnx/model_q8.onnx_data filter=lfs diff=lfs merge=lfs -text
40
  onnx/model.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
41
  onnx/model.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
42
  onnx/model_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
 
 
 
40
  onnx/model.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
41
  onnx/model.onnx_data_2 filter=lfs diff=lfs merge=lfs -text
42
  onnx/model_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
43
+ onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
44
+ onnx/model_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -53,14 +53,22 @@
53
  "rope_type": "default"
54
  },
55
  "tie_embedding": true,
56
- "transformers_version": "5.0.0.dev0",
 
57
  "use_cache": true,
58
  "use_pos_enc": true,
59
  "vocab_size": 65536,
60
  "transformers.js_config": {
61
- "kv_cache_dtype": {
62
- "fp32": "float32"
 
 
 
 
63
  },
64
- "use_external_data_format": true
 
 
 
65
  }
66
  }
 
53
  "rope_type": "default"
54
  },
55
  "tie_embedding": true,
56
+ "tie_word_embeddings": true,
57
+ "transformers_version": "5.1.0",
58
  "use_cache": true,
59
  "use_pos_enc": true,
60
  "vocab_size": 65536,
61
  "transformers.js_config": {
62
+ "use_external_data_format": {
63
+ "model.onnx": 3,
64
+ "model_fp16.onnx": 2,
65
+ "model_quantized": 1,
66
+ "model_q4.onnx": 1,
67
+ "model_q4f16.onnx": 1
68
  },
69
+ "kv_cache_dtype": {
70
+ "q4f16": "float16",
71
+ "fp16": "float16"
72
+ }
73
  }
74
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 7,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.54.0"
7
- }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 7,
5
  "pad_token_id": 0,
6
+ "transformers_version": "5.1.0"
7
+ }
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb2baa6debd7dfbb6c852fb9dfb776d25d09c8ec99d4c96ebcbaa72dd01416a
3
- size 145492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:240b836266733940519db3893df325b4cfac8bb87222acbfa0f05dd8c87a6639
3
+ size 140810
onnx/model.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0856d79deadd021aab372b778006498193e003e2425bce4e6a84b6c8763a15
3
- size 2130132992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04339fb19070d30e0b5523dfecf21b5a469dd3056a43ccd050ce6e756f039ebe
3
+ size 2063007744
onnx/model.onnx_data_1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5ddad41854b97da6c0468136e1b44dbd161c780c3355928a5416d4fdb0f1462
3
- size 2139348992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf2d2083fb09ebc8dfcdc46709ed1b9bc978dd7a08a83a6993f30c078b3e6fc5
3
+ size 2072240128
onnx/model.onnx_data_2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abc4e6c63035cc22207c20446cff3807e0865160718676d5366bc6956ccc07f0
3
- size 444645376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5817c9c46604164ef96aa9524e34422edc3ffa86e480bafbbcc80de38c0a7759
3
+ size 578879488
onnx/model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03cbd9f5ef71c72c669795b1717659bc03b4fc14f50ef1bce835c463176bf5d1
3
- size 151225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc743b4b9ba1bbb4b2549d09045b2b78717712a72ec68ea33bee69c166610e25
3
+ size 140027
onnx/model_fp16.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2341f7cf799c64d6adb142867a901a962665081f83d1d456890ecde8424f2998
3
- size 2134740992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc6b62ebc6547f80f5849d52f2e4511c0c1648984e46d71a7972534486bdeee
3
+ size 2067623936
onnx/model_fp16.onnx_data_1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6028e1d515192ed99b101e9345900fa62572d708f24bf09c86b80e22932a958f
3
- size 222322688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e320c36cdd1ed24e4af7f256151787128ff67a8fa36b1ff6b2c3f57edb5e6e4
3
+ size 289439744
onnx/model_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d45073c4b9ce3281397d226df7cc95c2f7a7fa5328dcb1109d480f9fddbc7c
3
- size 172944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c760a2d8be1d19c71c470ef294c1220ef85b5b3a0e31f12a363655ea7fab1d58
3
+ size 183173
onnx/model_q4.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b88bd4d65635398300edc8a82857dc1d08b6fb01e797cd4b323eb5bd61bb507
3
- size 1217650688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9666c44e2acc32f06c9351f9e7c4fd66bc060d88ca8e7f954e836c6845f7488
3
+ size 850059264
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9986ad188200507342ac32f727aa4691edb5428b0aa8c4a9fbdf0c85a6fe667
3
+ size 182795
onnx/model_q4f16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46cfacc12941150620a3f644a5269e9baebd75d681cfa09cadeef71b8ed64ac2
3
+ size 760279040
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41f10dd256378c87406abda307aac9147d1a3e4f2df8d7cc15bdeac560382428
3
+ size 185703
onnx/model_quantized.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9d6c2ff3f8cf3383ea18e90c32e5bf1aedf68a0dfc2a0afb32ff24b4903d97
3
+ size 1520558080
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,17 +1,15 @@
1
  {
2
- "additional_special_tokens": null,
3
  "backend": "tokenizers",
4
  "bos_token": "<|startoftext|>",
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|im_end|>",
7
- "is_local": true,
8
  "legacy": false,
9
  "model_input_names": [
10
  "input_ids",
11
  "attention_mask"
12
  ],
13
  "model_max_length": 1000000000000000019884624838656,
14
- "model_specific_special_tokens": {},
15
  "pad_token": "<|pad|>",
16
  "sp_model_kwargs": {},
17
  "spaces_between_special_tokens": false,
 
1
  {
 
2
  "backend": "tokenizers",
3
  "bos_token": "<|startoftext|>",
4
  "clean_up_tokenization_spaces": false,
5
  "eos_token": "<|im_end|>",
6
+ "is_local": false,
7
  "legacy": false,
8
  "model_input_names": [
9
  "input_ids",
10
  "attention_mask"
11
  ],
12
  "model_max_length": 1000000000000000019884624838656,
 
13
  "pad_token": "<|pad|>",
14
  "sp_model_kwargs": {},
15
  "spaces_between_special_tokens": false,