{ "model_type": "ONNX", "npu_mode": "NPU3", "quant": { "input_configs": [ { "tensor_name": "chunk_xs", "calibration_dataset": "./calibration_dataset/chunk_xs.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" }, { "tensor_name": "chunk_lens", "calibration_dataset": "./calibration_dataset/chunk_lens.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" }, { "tensor_name": "offset", "calibration_dataset": "./calibration_dataset/offset.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" }, { "tensor_name": "att_cache", "calibration_dataset": "./calibration_dataset/att_cache.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" }, { "tensor_name": "cnn_cache", "calibration_dataset": "./calibration_dataset/cnn_cache.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" }, { "tensor_name": "cache_mask", "calibration_dataset": "./calibration_dataset/cache_mask.tar.gz", "calibration_size": -1, "calibration_format": "Numpy" } ], "layer_configs": [ { "start_tensor_names": ["DEFAULT"], "end_tensor_names": ["DEFAULT"], "data_type": "U16" } ], "calibration_method": "MinMax", "conv_bias_data_type": "FP32", "disable_auto_refine_scale": true, "enable_smooth_quant": true, "precision_analysis": true, "precision_analysis_method": "EndToEnd", "transformer_opt_level": 1 }, "input_processors": [ { "tensor_name": "chunk_xs", "src_dtype": "FP32" }, { "tensor_name": "chunk_lens", "src_dtype": "S32" }, { "tensor_name": "offset", "src_dtype": "S32" }, { "tensor_name": "att_cache", "src_dtype": "FP32" }, { "tensor_name": "cnn_cache", "src_dtype": "FP32" }, { "tensor_name": "cache_mask", "src_dtype": "FP32" } ], "compiler": { "check": 0 } }