| { | |
| "model_type": "ONNX", | |
| "npu_mode": "NPU3", | |
| "quant": { | |
| "input_configs": [ | |
| { | |
| "tensor_name": "tokens", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/tokens.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "n_layer_cross_k", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_k.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "in_n_layer_self_k_cache", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_k_cache.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "in_n_layer_self_v_cache", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_self_v_cache.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "n_layer_cross_v", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/n_layer_cross_v.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "positional_embedding", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/positional_embedding.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| }, | |
| { | |
| "tensor_name": "mask", | |
| "calibration_dataset": "./calibrations_tiny/decoder_loop/mask.tar.gz", | |
| "calibration_size": -1, | |
| "calibration_format": "Numpy" | |
| } | |
| ], | |
| "layer_configs": [ | |
| { | |
| "start_tensor_names": ["DEFAULT"], | |
| "end_tensor_names": ["DEFAULT"], | |
| "data_type": "U16" | |
| }, | |
| { | |
| "op_types": ["Gather"], | |
| "data_type": "FP32" | |
| }, | |
| { | |
| "layer_names": ["/Add_1", "/Add_5", "/Add_9", "/Add_13"], | |
| "data_type": "FP32" | |
| }, | |
| { | |
| "layer_names": ["/Softmax", "/Softmax_2", "/Softmax_4", "/Softmax_6"], | |
| "data_type": "FP32" | |
| } | |
| ], | |
| "calibration_method": "MinMax" | |
| }, | |
| "input_processors": [ | |
| { | |
| "tensor_name": "DEFAULT" | |
| } | |
| ], | |
| "compiler": { | |
| "check": 2 | |
| } | |
| } |