| { | |
| "model_type": "ONNX", | |
| "npu_mode": "NPU3", | |
| "quant": { | |
| "input_configs": [ | |
| { | |
| "tensor_name": "speech", | |
| "calibration_dataset": "./calibration_dataset/speech.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "speech_lengths", | |
| "calibration_dataset": "./calibration_dataset/speech_lengths.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| } | |
| ], | |
| "layer_configs": [ | |
| { | |
| "start_tensor_names": ["DEFAULT"], | |
| "end_tensor_names": ["DEFAULT"], | |
| "data_type": "U16" | |
| } | |
| ], | |
| "calibration_method": "MinMax", | |
| "conv_bias_data_type": "FP32", | |
| "disable_auto_refine_scale": true, | |
| "enable_smooth_quant": true, | |
| "precision_analysis": true, | |
| "precision_analysis_method": "EndToEnd", | |
| "transformer_opt_level": 1 | |
| }, | |
| "input_processors": [ | |
| { | |
| "tensor_name": "speech", | |
| "src_dtype": "FP32" | |
| }, | |
| { | |
| "tensor_name": "speech_lengths", | |
| "src_dtype": "S32" | |
| } | |
| ], | |
| "compiler": { | |
| "check": 0 | |
| } | |
| } | |