Upload optimized ONNX model (#4)
Browse files- Upload optimized ONNX model (615841e7a38681c6b76b1c653d180e116bbf06db)
- Upload 2 files (6ca4715a4a488838c413a337a0d384054c7defcc)
- .gitattributes +5 -0
- config.json +16 -4
- generation_config.json +1 -1
- onnx/model.onnx +3 -0
- onnx/model.onnx_data +3 -0
- onnx/model.onnx_data_1 +3 -0
- onnx/model_fp16.onnx +3 -0
- onnx/model_fp16.onnx_data +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4.onnx_data +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_q4f16.onnx_data +3 -0
.gitattributes
CHANGED
|
@@ -38,3 +38,8 @@ Constant_171_attr__value filter=lfs diff=lfs merge=lfs -text
|
|
| 38 |
onnx/decoder_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 39 |
onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 40 |
onnx/decoder_with_past_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
onnx/decoder_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 39 |
onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 40 |
onnx/decoder_with_past_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
onnx/model.onnx_data_1 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
config.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "MBZUAI/LaMini-GPT-774M",
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2LMHeadModel"
|
| 6 |
],
|
| 7 |
"attn_pdrop": 0.1,
|
| 8 |
"bos_token_id": 50256,
|
|
|
|
| 9 |
"embd_pdrop": 0.1,
|
| 10 |
"eos_token_id": 50256,
|
| 11 |
"initializer_range": 0.02,
|
|
@@ -32,7 +32,19 @@
|
|
| 32 |
"max_length": 50
|
| 33 |
}
|
| 34 |
},
|
| 35 |
-
"transformers_version": "
|
| 36 |
"use_cache": false,
|
| 37 |
-
"vocab_size": 50258
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"activation_function": "gelu_new",
|
| 3 |
"architectures": [
|
| 4 |
"GPT2LMHeadModel"
|
| 5 |
],
|
| 6 |
"attn_pdrop": 0.1,
|
| 7 |
"bos_token_id": 50256,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
"embd_pdrop": 0.1,
|
| 10 |
"eos_token_id": 50256,
|
| 11 |
"initializer_range": 0.02,
|
|
|
|
| 32 |
"max_length": 50
|
| 33 |
}
|
| 34 |
},
|
| 35 |
+
"transformers_version": "5.0.0.dev0",
|
| 36 |
"use_cache": false,
|
| 37 |
+
"vocab_size": 50258,
|
| 38 |
+
"transformers.js_config": {
|
| 39 |
+
"use_external_data_format": {
|
| 40 |
+
"model.onnx": 2,
|
| 41 |
+
"model_fp16.onnx": 1,
|
| 42 |
+
"model_q4.onnx": 1,
|
| 43 |
+
"model_q4f16.onnx": 1
|
| 44 |
+
},
|
| 45 |
+
"kv_cache_dtype": {
|
| 46 |
+
"q4f16": "float16",
|
| 47 |
+
"fp16": "float16"
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
}
|
generation_config.json
CHANGED
|
@@ -2,5 +2,5 @@
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
"eos_token_id": 50256,
|
| 5 |
-
"transformers_version": "
|
| 6 |
}
|
|
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "5.0.0.dev0"
|
| 6 |
}
|
onnx/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fdfc02439c49b27d7d73c2a598e50d7b13805e1d1ae99d07606fc7997b3b5e9
|
| 3 |
+
size 304626
|
onnx/model.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5917dc0a1bdcb21f343a57affeb59d70bd071c5d79f5043dd78c5ec765475f8c
|
| 3 |
+
size 2092574720
|
onnx/model.onnx_data_1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc3c928ff6659bb295bbe2a30fa1359c6a42caa59bd035948c2847da2e8df3cb
|
| 3 |
+
size 1003550720
|
onnx/model_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ab1251cfa51c7a89cdd7d4971a710172a96fe206150b42cc178047da531bef1
|
| 3 |
+
size 307363
|
onnx/model_fp16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ea921610fe0f965da4ab3f57157c80134c54200e9ba6509a6d4b460e801a5b5
|
| 3 |
+
size 1548062720
|
onnx/model_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b52541d6fe9a006d7d7609ebffeb70eb3904bc0ad4f2f222cad5765d321f8b6d
|
| 3 |
+
size 383528
|
onnx/model_q4.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d18c869b68d386ed5fe74bf177cde19fc4629d5c59a02bc96122c31765cc1dc
|
| 3 |
+
size 707338240
|
onnx/model_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59c707206744628df5cdc329f43f5b4a32e67dc45a7e089b359309510a8fa6d5
|
| 3 |
+
size 386243
|
onnx/model_q4f16.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40868290369e98fb5f35d91a73e66ca0552cba5ed1fedfc8c0036a88b86f6b48
|
| 3 |
+
size 530616320
|