| { | |
| "model_type": "ONNX", | |
| "npu_mode": "NPU3", | |
| "quant": { | |
| "input_configs": [ | |
| { | |
| "tensor_name": "chunk_xs", | |
| "calibration_dataset": "./calibration_dataset/chunk_xs.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "chunk_lens", | |
| "calibration_dataset": "./calibration_dataset/chunk_lens.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "offset", | |
| "calibration_dataset": "./calibration_dataset/offset.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "att_cache", | |
| "calibration_dataset": "./calibration_dataset/att_cache.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "cnn_cache", | |
| "calibration_dataset": "./calibration_dataset/cnn_cache.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "cache_mask", | |
| "calibration_dataset": "./calibration_dataset/cache_mask.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| } | |
| ], | |
| "layer_configs": [ | |
| { | |
| "start_tensor_names": ["DEFAULT"], | |
| "end_tensor_names": ["DEFAULT"], | |
| "data_type": "U16" | |
| } | |
| ], | |
| "calibration_method": "MinMax", | |
| "conv_bias_data_type": "FP32", | |
| "disable_auto_refine_scale": true, | |
| "enable_smooth_quant": true, | |
| "precision_analysis": true, | |
| "precision_analysis_method": "EndToEnd", | |
| "transformer_opt_level": 1 | |
| }, | |
| "input_processors": [ | |
| { | |
| "tensor_name": "chunk_xs", | |
| "src_dtype": "FP32" | |
| }, | |
| { | |
| "tensor_name": "chunk_lens", | |
| "src_dtype": "S32" | |
| }, | |
| { | |
| "tensor_name": "offset", | |
| "src_dtype": "S32" | |
| }, | |
| { | |
| "tensor_name": "att_cache", | |
| "src_dtype": "FP32" | |
| }, | |
| { | |
| "tensor_name": "cnn_cache", | |
| "src_dtype": "FP32" | |
| }, | |
| { | |
| "tensor_name": "cache_mask", | |
| "src_dtype": "FP32" | |
| } | |
| ], | |
| "compiler": { | |
| "check": 0 | |
| } | |
| } | |