Whisper / config_whisper_decoder_loop_u16.json
qqc1989's picture
Upload 29 files
0d2b474 verified
{
"model_type": "ONNX",
"npu_mode": "NPU3",
"quant": {
"input_configs": [
{
"tensor_name": "tokens",
"calibration_dataset": "./calibrations_tiny/decoder_loop/tokens.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "n_layer_cross_k",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_k.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "in_n_layer_self_k_cache",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_k_cache.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "in_n_layer_self_v_cache",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_v_cache.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "n_layer_cross_v",
"calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_v.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "positional_embedding",
"calibration_dataset": "./calibrations_tiny/decoder_loop/positional_embedding.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
},
{
"tensor_name": "mask",
"calibration_dataset": "./calibrations_tiny/decoder_loop/mask.tar.gz",
"calibration_size": -1,
"calibration_format": "Numpy"
}
],
"layer_configs": [
{
"start_tensor_names": ["DEFAULT"],
"end_tensor_names": ["DEFAULT"],
"data_type": "U16"
},
{
"op_types": ["Gather"],
"data_type": "FP32"
},
{
"layer_names": ["/Add_1", "/Add_5", "/Add_9", "/Add_13"],
"data_type": "FP32"
},
{
"layer_names": ["/Softmax", "/Softmax_2", "/Softmax_4", "/Softmax_6"],
"data_type": "FP32"
}
],
"calibration_method": "MinMax"
},
"input_processors": [
{
"tensor_name": "DEFAULT"
}
],
"compiler": {
"check": 2
}
}