| { |
| "version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3", |
| "info": { |
| "backendId": 6, |
| "buildId": "v2.28.0.241029232508_102474", |
| "coreApiVersion": "2.21.0", |
| "backendApiVersion": "5.28.0", |
| "socVersion": "", |
| "contextBlobVersion": "3.2.0", |
| "contextBlobSize": 664148112, |
| "numContextTensors": 0, |
| "contextTensors": [], |
| "numGraphs": 2, |
| "graphs": [ |
| { |
| "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", |
| "info": { |
| "graphName": "ar128_cl4096_1_of_4", |
| "numGraphInputs": 20, |
| "graphInputs": [ |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 2, |
| "name": "input_ids", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_INT_32", |
| "rank": 2, |
| "dimensions": [ |
| 1, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_UNDEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_UNDEFINED" |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 4, |
| "name": "past_key_0_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.07073105126619339, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 9, |
| "name": "past_key_5_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.13209088146686555, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 12, |
| "name": "past_value_5_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03382043167948723, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 19, |
| "name": "past_value_0_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.005486138164997101, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 128, |
| "name": "past_key_6_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1345406174659729, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 131, |
| "name": "past_value_6_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03435523062944412, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 270, |
| "name": "past_key_7_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.14882269501686097, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 273, |
| "name": "past_value_7_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.06318466365337372, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 444, |
| "name": "past_key_1_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.09567122906446457, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 447, |
| "name": "past_value_1_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.011280891485512257, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 586, |
| "name": "past_key_2_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.12122304737567902, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 589, |
| "name": "past_value_2_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.02321721613407135, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 734, |
| "name": "past_key_3_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1403033435344696, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 737, |
| "name": "past_value_3_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.021280551329255105, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 878, |
| "name": "past_key_4_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 3968 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.11964445561170578, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 881, |
| "name": "past_value_4_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 3968, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03495820611715317, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 1950, |
| "name": "position_ids_cos", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 128, |
| 48 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.00003632373773143627, |
| "offset": -32768 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 1952, |
| "name": "position_ids_sin", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 128, |
| 48 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.00003632373773143627, |
| "offset": -32768 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 2657, |
| "name": "attention_mask", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 128, |
| 4096 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.0007629510946571827, |
| "offset": -65535 |
| } |
| } |
| } |
| } |
| ], |
| "numGraphOutputs": 17, |
| "graphOutputs": [ |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 1917, |
| "name": "past_value_0_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.005486138164997101, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 2560, |
| "name": "past_key_0_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.07073105126619339, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 3682, |
| "name": "past_value_1_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.011280891485512257, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 4323, |
| "name": "past_key_1_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.09567122906446457, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 5446, |
| "name": "past_value_2_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.02321721613407135, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 6087, |
| "name": "past_key_2_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.12122304737567902, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 7206, |
| "name": "past_value_3_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.021280551329255105, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 7847, |
| "name": "past_key_3_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1403033435344696, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 8968, |
| "name": "past_value_4_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03495820611715317, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 9609, |
| "name": "past_key_4_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.11964445561170578, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 10728, |
| "name": "past_value_5_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03382043167948723, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 11369, |
| "name": "past_key_5_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.13209088146686555, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 12488, |
| "name": "past_value_6_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03435523062944412, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 13129, |
| "name": "past_key_6_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1345406174659729, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 14248, |
| "name": "past_value_7_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 128, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.06318466365337372, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 14889, |
| "name": "past_key_7_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 128 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.14882269501686097, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15136, |
| "name": "_model_layers_7_Add_1_Add_output_0", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 3, |
| "dimensions": [ |
| 1, |
| 128, |
| 3072 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.10904459655284882, |
| "offset": -40904 |
| } |
| } |
| } |
| } |
| ], |
| "numUpdateableTensors": 0, |
| "updateableTensors": [], |
| "graphBlobInfoSize": 40, |
| "graphBlobInfo": [ |
| { |
| "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", |
| "info": { |
| "spillFillBufferSize": 0, |
| "optimizationLevel": 3, |
| "vtcmSize": 8, |
| "htpDlbc": 0, |
| "numHvxThreads": 0 |
| } |
| } |
| ] |
| } |
| }, |
| { |
| "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", |
| "info": { |
| "graphName": "ar1_cl4096_1_of_4", |
| "numGraphInputs": 20, |
| "graphInputs": [ |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15138, |
| "name": "input_ids", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_INT_32", |
| "rank": 2, |
| "dimensions": [ |
| 1, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_UNDEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_UNDEFINED" |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15140, |
| "name": "past_key_0_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.07073105126619339, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15145, |
| "name": "past_key_5_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.13209088146686555, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15148, |
| "name": "past_value_5_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03382043167948723, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15155, |
| "name": "past_value_0_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.005486138164997101, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15264, |
| "name": "past_key_6_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1345406174659729, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15267, |
| "name": "past_value_6_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03435523062944412, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15406, |
| "name": "past_key_7_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.14882269501686097, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15409, |
| "name": "past_value_7_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.06318466365337372, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15580, |
| "name": "past_key_1_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.09567122906446457, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15583, |
| "name": "past_value_1_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.011280891485512257, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15722, |
| "name": "past_key_2_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.12122304737567902, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15725, |
| "name": "past_value_2_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.02321721613407135, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15870, |
| "name": "past_key_3_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1403033435344696, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 15873, |
| "name": "past_value_3_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.021280551329255105, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 16014, |
| "name": "past_key_4_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 4095 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.11964445561170578, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 16017, |
| "name": "past_value_4_in", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 4095, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03495820611715317, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 17086, |
| "name": "position_ids_cos", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 1, |
| 48 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.00003632373773143627, |
| "offset": -32768 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 17088, |
| "name": "position_ids_sin", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 1, |
| 48 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.00003632373773143627, |
| "offset": -32768 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 17793, |
| "name": "attention_mask", |
| "type": "QNN_TENSOR_TYPE_APP_WRITE", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 4, |
| "dimensions": [ |
| 1, |
| 1, |
| 1, |
| 4096 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.0007629510946571827, |
| "offset": -65535 |
| } |
| } |
| } |
| } |
| ], |
| "numGraphOutputs": 17, |
| "graphOutputs": [ |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 17053, |
| "name": "past_value_0_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.005486138164997101, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 17696, |
| "name": "past_key_0_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.07073105126619339, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 18818, |
| "name": "past_value_1_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.011280891485512257, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 19459, |
| "name": "past_key_1_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.09567122906446457, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 20582, |
| "name": "past_value_2_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.02321721613407135, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 21223, |
| "name": "past_key_2_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.12122304737567902, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 22342, |
| "name": "past_value_3_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.021280551329255105, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 22983, |
| "name": "past_key_3_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1403033435344696, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 24104, |
| "name": "past_value_4_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03495820611715317, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 24745, |
| "name": "past_key_4_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.11964445561170578, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 25864, |
| "name": "past_value_5_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03382043167948723, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 26505, |
| "name": "past_key_5_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.13209088146686555, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 27624, |
| "name": "past_value_6_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.03435523062944412, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 28265, |
| "name": "past_key_6_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.1345406174659729, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 29384, |
| "name": "past_value_7_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 1, |
| 96 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.06318466365337372, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 30025, |
| "name": "past_key_7_out", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_8", |
| "rank": 4, |
| "dimensions": [ |
| 32, |
| 1, |
| 96, |
| 1 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.14882269501686097, |
| "offset": -128 |
| } |
| } |
| } |
| }, |
| { |
| "version": "QNN_TENSOR_VERSION_1", |
| "info": { |
| "id": 30272, |
| "name": "_model_layers_7_Add_1_Add_output_0", |
| "type": "QNN_TENSOR_TYPE_APP_READ", |
| "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", |
| "dataType": "QNN_DATATYPE_UFIXED_POINT_16", |
| "rank": 3, |
| "dimensions": [ |
| 1, |
| 1, |
| 3072 |
| ], |
| "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", |
| "quantizeParams": { |
| "definition": "QNN_DEFINITION_DEFINED", |
| "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", |
| "scaleOffset": { |
| "scale": 0.10904459655284882, |
| "offset": -40904 |
| } |
| } |
| } |
| } |
| ], |
| "numUpdateableTensors": 0, |
| "updateableTensors": [], |
| "graphBlobInfoSize": 40, |
| "graphBlobInfo": [ |
| { |
| "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", |
| "info": { |
| "spillFillBufferSize": 0, |
| "optimizationLevel": 3, |
| "vtcmSize": 8, |
| "htpDlbc": 0, |
| "numHvxThreads": 0 |
| } |
| } |
| ] |
| } |
| } |
| ], |
| "contextMetadataSize": 8, |
| "contextMetadata": { |
| "version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1", |
| "info": { |
| "dsp arch": 73 |
| } |
| }, |
| "soc model": 43 |
| } |
| } |
|
|