phi-3.5-onnx-qnn / weight_sharing_model_1_of_4.serialized.json
doberst's picture
Upload 20 files
7f81323 verified
{
"version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3",
"info": {
"backendId": 6,
"buildId": "v2.28.0.241029232508_102474",
"coreApiVersion": "2.21.0",
"backendApiVersion": "5.28.0",
"socVersion": "",
"contextBlobVersion": "3.2.0",
"contextBlobSize": 664148112,
"numContextTensors": 0,
"contextTensors": [],
"numGraphs": 2,
"graphs": [
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar128_cl4096_1_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2,
"name": "input_ids",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_INT_32",
"rank": 2,
"dimensions": [
1,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_UNDEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_UNDEFINED"
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 4,
"name": "past_key_0_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07073105126619339,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9,
"name": "past_key_5_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.13209088146686555,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 12,
"name": "past_value_5_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03382043167948723,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 19,
"name": "past_value_0_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.005486138164997101,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 128,
"name": "past_key_6_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1345406174659729,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 131,
"name": "past_value_6_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03435523062944412,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 270,
"name": "past_key_7_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14882269501686097,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 273,
"name": "past_value_7_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06318466365337372,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 444,
"name": "past_key_1_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09567122906446457,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 447,
"name": "past_value_1_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.011280891485512257,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 586,
"name": "past_key_2_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.12122304737567902,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 589,
"name": "past_value_2_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.02321721613407135,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 734,
"name": "past_key_3_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1403033435344696,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 737,
"name": "past_value_3_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.021280551329255105,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 878,
"name": "past_key_4_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.11964445561170578,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 881,
"name": "past_value_4_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03495820611715317,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1950,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1952,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2657,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1917,
"name": "past_value_0_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.005486138164997101,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2560,
"name": "past_key_0_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07073105126619339,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 3682,
"name": "past_value_1_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.011280891485512257,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 4323,
"name": "past_key_1_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09567122906446457,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 5446,
"name": "past_value_2_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.02321721613407135,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 6087,
"name": "past_key_2_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.12122304737567902,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7206,
"name": "past_value_3_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.021280551329255105,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7847,
"name": "past_key_3_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1403033435344696,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 8968,
"name": "past_value_4_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03495820611715317,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9609,
"name": "past_key_4_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.11964445561170578,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 10728,
"name": "past_value_5_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03382043167948723,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 11369,
"name": "past_key_5_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.13209088146686555,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 12488,
"name": "past_value_6_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03435523062944412,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 13129,
"name": "past_key_6_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1345406174659729,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14248,
"name": "past_value_7_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06318466365337372,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14889,
"name": "past_key_7_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14882269501686097,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15136,
"name": "_model_layers_7_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
128,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10904459655284882,
"offset": -40904
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
},
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar1_cl4096_1_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15138,
"name": "input_ids",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_INT_32",
"rank": 2,
"dimensions": [
1,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_UNDEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_UNDEFINED"
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15140,
"name": "past_key_0_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07073105126619339,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15145,
"name": "past_key_5_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.13209088146686555,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15148,
"name": "past_value_5_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03382043167948723,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15155,
"name": "past_value_0_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.005486138164997101,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15264,
"name": "past_key_6_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1345406174659729,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15267,
"name": "past_value_6_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03435523062944412,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15406,
"name": "past_key_7_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14882269501686097,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15409,
"name": "past_value_7_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06318466365337372,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15580,
"name": "past_key_1_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09567122906446457,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15583,
"name": "past_value_1_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.011280891485512257,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15722,
"name": "past_key_2_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.12122304737567902,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15725,
"name": "past_value_2_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.02321721613407135,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15870,
"name": "past_key_3_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1403033435344696,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15873,
"name": "past_value_3_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.021280551329255105,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16014,
"name": "past_key_4_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.11964445561170578,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16017,
"name": "past_value_4_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03495820611715317,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17086,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17088,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17793,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17053,
"name": "past_value_0_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.005486138164997101,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17696,
"name": "past_key_0_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07073105126619339,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 18818,
"name": "past_value_1_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.011280891485512257,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 19459,
"name": "past_key_1_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09567122906446457,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 20582,
"name": "past_value_2_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.02321721613407135,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 21223,
"name": "past_key_2_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.12122304737567902,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22342,
"name": "past_value_3_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.021280551329255105,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22983,
"name": "past_key_3_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1403033435344696,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24104,
"name": "past_value_4_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03495820611715317,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24745,
"name": "past_key_4_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.11964445561170578,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 25864,
"name": "past_value_5_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03382043167948723,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 26505,
"name": "past_key_5_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.13209088146686555,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 27624,
"name": "past_value_6_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.03435523062944412,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 28265,
"name": "past_key_6_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1345406174659729,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 29384,
"name": "past_value_7_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06318466365337372,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30025,
"name": "past_key_7_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14882269501686097,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30272,
"name": "_model_layers_7_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
1,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10904459655284882,
"offset": -40904
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
}
],
"contextMetadataSize": 8,
"contextMetadata": {
"version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1",
"info": {
"dsp arch": 73
}
},
"soc model": 43
}
}