{
    "model_type": "ONNX",
    "npu_mode": "NPU3",
    "quant": {
      "input_configs": [
        {
            "tensor_name": "chunk_xs",
            "calibration_dataset": "./calibration_dataset/chunk_xs.tar.gz",
            "calibration_size": -1,
            "calibration_format": "Numpy"
        },
        {
          "tensor_name": "chunk_lens",
          "calibration_dataset": "./calibration_dataset/chunk_lens.tar.gz",
          "calibration_size": -1,
          "calibration_format": "Numpy"
        },
        {
          "tensor_name": "offset",
          "calibration_dataset": "./calibration_dataset/offset.tar.gz",
          "calibration_size": -1,
          "calibration_format": "Numpy"
        },
        {
          "tensor_name": "att_cache",
          "calibration_dataset": "./calibration_dataset/att_cache.tar.gz",
          "calibration_size": -1,
          "calibration_format": "Numpy"
        },
        {
          "tensor_name": "cnn_cache",
          "calibration_dataset": "./calibration_dataset/cnn_cache.tar.gz",
          "calibration_size": -1,
          "calibration_format": "Numpy"
        },
        {
          "tensor_name": "cache_mask",
          "calibration_dataset": "./calibration_dataset/cache_mask.tar.gz",
          "calibration_size": -1,
          "calibration_format": "Numpy"
        }
      ],
      "layer_configs": [
        {
            "start_tensor_names": ["DEFAULT"], 
            "end_tensor_names": ["DEFAULT"], 
            "data_type": "U16"
        } 
      ],
      "calibration_method": "MinMax",
      "conv_bias_data_type": "FP32",
      "disable_auto_refine_scale": true,
      "enable_smooth_quant": true,
      "precision_analysis": true,
      "precision_analysis_method": "EndToEnd",
      "transformer_opt_level": 1
    },
    "input_processors": [
      {
        "tensor_name": "chunk_xs",
        "src_dtype": "FP32"
      },
      {
        "tensor_name": "chunk_lens",
        "src_dtype": "S32"
      },
      {
        "tensor_name": "offset",
        "src_dtype": "S32"
      },
      {
        "tensor_name": "att_cache",
        "src_dtype": "FP32"
      },
      {
        "tensor_name": "cnn_cache",
        "src_dtype": "FP32"
      },
      {
        "tensor_name": "cache_mask",
        "src_dtype": "FP32"
      }
    ],
    "compiler": {
      "check": 0
    }
  }