File size: 538 Bytes
ba43904
 
 
 
85d9e0e
 
 
 
 
 
 
 
 
f65b51a
d839cd5
ba43904
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "exported_from": "pittsburghese-merged-prompt-completion",
  "output_dir": "pittsburghese-web",
  "onnx_task": "text-generation-with-past",
  "quantization": "dynamic-int8",
  "preprocess": {
    "symbolic_shape_inference": true,
    "optimization": false,
    "onnx_shape_inference": true
  },
  "per_channel": true,
  "op_types_to_quantize": [
    "MatMul"
  ],
  "note": "Accuracy-first q8 export using ORT preprocess + dynamic INT8 quantization. Quantized models are re-saved with external data location forced to *.onnx_data."
}