Upload folder using huggingface_hub

Files changed (2) hide show

onnx/model_quantized.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee0de17cffc413323b61d8a8e602af1cbb37b77e6bf517d568c2054cc89f8b53
+size 11414361

onnx/ort_config.json CHANGED Viewed

@@ -1,37 +1,33 @@
 {
   "one_external_file": true,
   "opset": null,
-  "optimization": {
-    "disable_attention": null,
-    "disable_attention_fusion": false,
-    "disable_bias_gelu": null,
-    "disable_bias_gelu_fusion": false,
-    "disable_bias_skip_layer_norm": null,
-    "disable_bias_skip_layer_norm_fusion": false,
-    "disable_embed_layer_norm": true,
-    "disable_embed_layer_norm_fusion": true,
-    "disable_gelu": null,
-    "disable_gelu_fusion": false,
-    "disable_group_norm_fusion": true,
-    "disable_layer_norm": null,
-    "disable_layer_norm_fusion": false,
-    "disable_packed_kv": true,
-    "disable_rotary_embeddings": false,
-    "disable_shape_inference": true,
-    "disable_skip_layer_norm": null,
-    "disable_skip_layer_norm_fusion": false,
-    "enable_gelu_approximation": false,
-    "enable_gemm_fast_gelu_fusion": false,
-    "enable_transformers_specific_optimizations": true,
-    "fp16": false,
-    "no_attention_mask": false,
-    "optimization_level": 2,
-    "optimize_for_gpu": false,
-    "optimize_with_onnxruntime_only": null,
-    "use_mask_index": false,
-    "use_multi_head_attention": false,
-    "use_raw_attention_mask": false
   },
-  "quantization": {},
   "use_external_data_format": false
 }

 {
   "one_external_file": true,
   "opset": null,
+  "optimization": {},
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "Conv",
+      "MatMul",
+      "Attention",
+      "LSTM",
+      "Gather",
+      "Transpose",
+      "EmbedLayerNormalization"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
   },
   "use_external_data_format": false
 }