|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "QnnModel.hpp" |
|
|
#include "QnnOpDef.h" |
|
|
|
|
|
|
|
|
#define DO_GRAPH_NODE_VALIDATIONS 1 |
|
|
|
|
|
#ifdef _MSC_VER |
|
|
#define MODEL_LIB_EXPORT __declspec(dllexport) |
|
|
#else |
|
|
#define MODEL_LIB_EXPORT __attribute__((visibility("default"))) |
|
|
#endif |
|
|
|
|
|
using namespace qnn_wrapper_api; |
|
|
extern "C" { |
|
|
MODEL_LIB_EXPORT ModelError_t QnnModel_GenAI_composeGraphs(Qnn_BackendHandle_t backendHandle, |
|
|
QNN_INTERFACE_VER_TYPE interface, |
|
|
Qnn_ContextHandle_t contextHandle, |
|
|
const GraphConfigInfo_t** graphsConfigInfo, |
|
|
const uint32_t numGraphsConfigInfo, |
|
|
uint32_t* inputDim, |
|
|
uint32_t inputRank, |
|
|
uint32_t* outputDim, |
|
|
uint32_t outputRank, |
|
|
uint32_t* kvDim, |
|
|
uint32_t kvRank, |
|
|
Qnn_Param_t* params, |
|
|
uint32_t numParams, |
|
|
GraphInfoPtr_t** graphsInfo, |
|
|
uint32_t* numGraphsInfo, |
|
|
bool debug, |
|
|
QnnLog_Callback_t logCallback, |
|
|
QnnLog_Level_t maxLogLevel) { |
|
|
(void) logCallback; |
|
|
(void) maxLogLevel; |
|
|
ModelError_t err = MODEL_NO_ERROR; |
|
|
|
|
|
|
|
|
QnnModel qnn_model; |
|
|
const QnnGraph_Config_t** graphConfigs = nullptr; |
|
|
VALIDATE( |
|
|
getQnnGraphConfigFromInfo("qnn_model", graphsConfigInfo, numGraphsConfigInfo, graphConfigs), |
|
|
err); |
|
|
VALIDATE(qnn_model.initialize(backendHandle, |
|
|
interface, |
|
|
contextHandle, |
|
|
"qnn_model", |
|
|
debug, |
|
|
DO_GRAPH_NODE_VALIDATIONS, |
|
|
graphConfigs), |
|
|
err); |
|
|
Qnn_Tensor_t tin; |
|
|
tin.version = QNN_TENSOR_VERSION_1; |
|
|
tin.v1.id = 0; |
|
|
tin.v1.name = "x0"; |
|
|
tin.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin.v1.rank = inputRank; |
|
|
tin.v1.dimensions = inputDim; |
|
|
tin.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x0", |
|
|
(Qnn_Tensor_t)tin), |
|
|
err); |
|
|
|
|
|
uint32_t input1Dim[1] = {1}; |
|
|
Qnn_Tensor_t tin2; |
|
|
tin2.version = QNN_TENSOR_VERSION_1; |
|
|
tin2.v1.id = 0; |
|
|
tin2.v1.name = "x1"; |
|
|
tin2.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin2.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin2.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin2.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin2.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin2.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin2.v1.rank = 1; |
|
|
tin2.v1.dimensions = input1Dim; |
|
|
tin2.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin2.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x1", |
|
|
(Qnn_Tensor_t)tin2), |
|
|
err); |
|
|
|
|
|
uint32_t input2Dim[1] = {1}; |
|
|
Qnn_Tensor_t tin3; |
|
|
tin3.version = QNN_TENSOR_VERSION_1; |
|
|
tin3.v1.id = 0; |
|
|
tin3.v1.name = "x2"; |
|
|
tin3.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin3.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin3.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin3.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin3.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin3.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin3.v1.rank = 1; |
|
|
tin3.v1.dimensions = input2Dim; |
|
|
tin3.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin3.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x2", |
|
|
(Qnn_Tensor_t)tin3), |
|
|
err); |
|
|
|
|
|
Qnn_Tensor_t tin4; |
|
|
tin4.version = QNN_TENSOR_VERSION_1; |
|
|
tin4.v1.id = 0; |
|
|
tin4.v1.name = "x3"; |
|
|
tin4.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin4.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin4.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin4.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin4.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin4.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin4.v1.rank = kvRank; |
|
|
tin4.v1.dimensions = kvDim; |
|
|
tin4.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin4.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x3", |
|
|
(Qnn_Tensor_t)tin4), |
|
|
err); |
|
|
|
|
|
Qnn_Tensor_t tin5; |
|
|
tin5.version = QNN_TENSOR_VERSION_1; |
|
|
tin5.v1.id = 0; |
|
|
tin5.v1.name = "x4"; |
|
|
tin5.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin5.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin5.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin5.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin5.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin5.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin5.v1.rank = kvRank; |
|
|
tin5.v1.dimensions = kvDim; |
|
|
tin5.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin5.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x4", |
|
|
(Qnn_Tensor_t)tin5), |
|
|
err); |
|
|
|
|
|
uint32_t input5Dim[1] = {1}; |
|
|
Qnn_Tensor_t tin6; |
|
|
tin6.version = QNN_TENSOR_VERSION_1; |
|
|
tin6.v1.id = 0; |
|
|
tin6.v1.name = "x5"; |
|
|
tin6.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin6.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin6.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tin6.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin6.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin6.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin6.v1.rank = 1; |
|
|
tin6.v1.dimensions = input5Dim; |
|
|
tin6.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin6.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x5", |
|
|
(Qnn_Tensor_t)tin6), |
|
|
err); |
|
|
|
|
|
uint32_t input6Dim[1] = {1}; |
|
|
Qnn_Tensor_t tin7; |
|
|
tin7.version = QNN_TENSOR_VERSION_1; |
|
|
tin7.v1.id = 0; |
|
|
tin7.v1.name = "x6"; |
|
|
tin7.v1.type = QNN_TENSOR_TYPE_APP_WRITE; |
|
|
tin7.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tin7.v1.dataType = QNN_DATATYPE_FLOAT_32; |
|
|
tin7.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tin7.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tin7.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tin7.v1.rank = 1; |
|
|
tin7.v1.dimensions = input6Dim; |
|
|
tin7.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tin7.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
VALIDATE(qnn_model.addTensor( |
|
|
"x6", |
|
|
(Qnn_Tensor_t)tin7), |
|
|
err); |
|
|
|
|
|
|
|
|
const char* inputs_genAI[] = {"x0", "x1", "x2", "x3", "x4", "x5", "x6"}; |
|
|
|
|
|
Qnn_Tensor_t tout; |
|
|
tout.version = QNN_TENSOR_VERSION_1; |
|
|
tout.v1.id = 0; |
|
|
tout.v1.name = "output_genAI"; |
|
|
tout.v1.type = QNN_TENSOR_TYPE_APP_READ; |
|
|
tout.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tout.v1.dataType = QNN_DATATYPE_FLOAT_32; |
|
|
tout.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tout.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tout.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tout.v1.rank = outputRank; |
|
|
tout.v1.dimensions = outputDim; |
|
|
tout.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tout.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
|
|
|
uint32_t output1Dim[1] = {1}; |
|
|
Qnn_Tensor_t tout1; |
|
|
tout1.version = QNN_TENSOR_VERSION_1; |
|
|
tout1.v1.id = 0; |
|
|
tout1.v1.name = "output_npast"; |
|
|
tout1.v1.type = QNN_TENSOR_TYPE_APP_READ; |
|
|
tout1.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; |
|
|
tout1.v1.dataType = QNN_DATATYPE_UINT_32; |
|
|
tout1.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; |
|
|
tout1.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; |
|
|
tout1.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, |
|
|
.offset = 0}; |
|
|
tout1.v1.rank = 1; |
|
|
tout1.v1.dimensions = output1Dim; |
|
|
tout1.v1.memType = QNN_TENSORMEMTYPE_RAW; |
|
|
tout1.v1.clientBuf = {.data = nullptr, .dataSize = 0}; |
|
|
|
|
|
Qnn_Tensor_t outputs_genAI[] = {(Qnn_Tensor_t)tout, (Qnn_Tensor_t)tout1}; |
|
|
|
|
|
VALIDATE(qnn_model.addNode(QNN_OPCONFIG_VERSION_1, |
|
|
"LLM", |
|
|
"llm_engine.oppackage", |
|
|
"LLM", |
|
|
params, |
|
|
numParams, |
|
|
inputs_genAI, |
|
|
7, |
|
|
outputs_genAI, |
|
|
2 |
|
|
), |
|
|
err); |
|
|
|
|
|
|
|
|
QnnModel* models[] = {&qnn_model}; |
|
|
uint32_t numModels = 1; |
|
|
|
|
|
|
|
|
VALIDATE(getGraphInfoFromModels(*models, numModels, graphsInfo), err); |
|
|
*numGraphsInfo = numModels; |
|
|
|
|
|
return err; |
|
|
|
|
|
} |
|
|
|
|
|
MODEL_LIB_EXPORT ModelError_t QnnModel_freeGraphsInfo(GraphInfoPtr_t** graphsInfo, uint32_t numGraphsInfo) { |
|
|
return qnn_wrapper_api::freeGraphsInfo(graphsInfo, numGraphsInfo); |
|
|
} |
|
|
} |
|
|
|