File size: 2,390 Bytes
0d2b474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
{
"model_type": "ONNX",
"npu_mode": "NPU3",
"quant": {
"input_configs": [
{
"tensor_name": "tokens",
"calibration_dataset": "./calibrations_tiny/decoder_loop/tokens.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "n_layer_cross_k",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_k.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "in_n_layer_self_k_cache",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_k_cache.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "in_n_layer_self_v_cache",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_v_cache.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "n_layer_cross_v",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_v.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "positional_embedding",
"calibration_dataset": "./calibrations_tiny/decoder_loop/positional_embedding.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "mask",
"calibration_dataset": "./calibrations_tiny/decoder_loop/mask.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
}
],
"layer_configs": [
{
"start_tensor_names": ["DEFAULT"],
"end_tensor_names": ["DEFAULT"],
"data_type": "U16"
},
{
"op_types": ["Gather"],
"data_type": "FP32"
},
{
"layer_names": ["/Add_1", "/Add_5", "/Add_9", "/Add_13"],
"data_type": "FP32"
},
{
"layer_names": ["/Softmax", "/Softmax_2", "/Softmax_4", "/Softmax_6"],
"data_type": "FP32"
}
],
"calibration_method": "MinMax"
},
"input_processors": [
{
"tensor_name": "DEFAULT"
}
],
"compiler": {
"check": 2
}
} |