//============================================================================= // // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. // All Rights Reserved. // Confidential and Proprietary - Qualcomm Technologies, Inc. // //============================================================================= #include "QnnModel.hpp" #include "QnnOpDef.h" // Flag to determine if Backend should do node validation for each opNode added #define DO_GRAPH_NODE_VALIDATIONS 1 #ifdef _MSC_VER #define MODEL_LIB_EXPORT __declspec(dllexport) #else #define MODEL_LIB_EXPORT __attribute__((visibility("default"))) #endif using namespace qnn_wrapper_api; extern "C" { MODEL_LIB_EXPORT ModelError_t QnnModel_GenAI_composeGraphs(Qnn_BackendHandle_t backendHandle, QNN_INTERFACE_VER_TYPE interface, Qnn_ContextHandle_t contextHandle, const GraphConfigInfo_t** graphsConfigInfo, const uint32_t numGraphsConfigInfo, uint32_t* inputDim, uint32_t inputRank, uint32_t* outputDim, uint32_t outputRank, uint32_t* kvDim, uint32_t kvRank, Qnn_Param_t* params, uint32_t numParams, GraphInfoPtr_t** graphsInfo, uint32_t* numGraphsInfo, bool debug, QnnLog_Callback_t logCallback, QnnLog_Level_t maxLogLevel) { (void) logCallback; (void) maxLogLevel; ModelError_t err = MODEL_NO_ERROR; /* model/graph for qnn_model*/ QnnModel qnn_model; const QnnGraph_Config_t** graphConfigs = nullptr; VALIDATE( getQnnGraphConfigFromInfo("qnn_model", graphsConfigInfo, numGraphsConfigInfo, graphConfigs), err); VALIDATE(qnn_model.initialize(backendHandle, interface, contextHandle, "qnn_model", debug, DO_GRAPH_NODE_VALIDATIONS, graphConfigs), err); Qnn_Tensor_t tin; tin.version = QNN_TENSOR_VERSION_1; tin.v1.id = 0; tin.v1.name = "x0"; tin.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin.v1.dataType = QNN_DATATYPE_UINT_32; tin.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin.v1.rank = inputRank; tin.v1.dimensions = inputDim; tin.v1.memType = QNN_TENSORMEMTYPE_RAW; tin.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x0", // Node Name (Qnn_Tensor_t)tin), err); uint32_t input1Dim[1] = {1}; Qnn_Tensor_t tin2; tin2.version = QNN_TENSOR_VERSION_1; tin2.v1.id = 0; tin2.v1.name = "x1"; tin2.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin2.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin2.v1.dataType = QNN_DATATYPE_UINT_32; tin2.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin2.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin2.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin2.v1.rank = 1; tin2.v1.dimensions = input1Dim; tin2.v1.memType = QNN_TENSORMEMTYPE_RAW; tin2.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x1", // Node Name (Qnn_Tensor_t)tin2), err); uint32_t input2Dim[1] = {1}; Qnn_Tensor_t tin3; tin3.version = QNN_TENSOR_VERSION_1; tin3.v1.id = 0; tin3.v1.name = "x2"; tin3.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin3.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin3.v1.dataType = QNN_DATATYPE_UINT_32; tin3.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin3.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin3.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin3.v1.rank = 1; tin3.v1.dimensions = input2Dim; tin3.v1.memType = QNN_TENSORMEMTYPE_RAW; tin3.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x2", // Node Name (Qnn_Tensor_t)tin3), err); Qnn_Tensor_t tin4; tin4.version = QNN_TENSOR_VERSION_1; tin4.v1.id = 0; tin4.v1.name = "x3"; tin4.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin4.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin4.v1.dataType = QNN_DATATYPE_UINT_32; tin4.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin4.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin4.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin4.v1.rank = kvRank; tin4.v1.dimensions = kvDim; tin4.v1.memType = QNN_TENSORMEMTYPE_RAW; tin4.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x3", // Node Name (Qnn_Tensor_t)tin4), err); Qnn_Tensor_t tin5; tin5.version = QNN_TENSOR_VERSION_1; tin5.v1.id = 0; tin5.v1.name = "x4"; tin5.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin5.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin5.v1.dataType = QNN_DATATYPE_UINT_32; tin5.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin5.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin5.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin5.v1.rank = kvRank; tin5.v1.dimensions = kvDim; tin5.v1.memType = QNN_TENSORMEMTYPE_RAW; tin5.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x4", // Node Name (Qnn_Tensor_t)tin5), err); uint32_t input5Dim[1] = {1}; Qnn_Tensor_t tin6; tin6.version = QNN_TENSOR_VERSION_1; tin6.v1.id = 0; tin6.v1.name = "x5"; tin6.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin6.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin6.v1.dataType = QNN_DATATYPE_UINT_32; tin6.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin6.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin6.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin6.v1.rank = 1; tin6.v1.dimensions = input5Dim; tin6.v1.memType = QNN_TENSORMEMTYPE_RAW; tin6.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x5", // Node Name (Qnn_Tensor_t)tin6), err); uint32_t input6Dim[1] = {1}; Qnn_Tensor_t tin7; tin7.version = QNN_TENSOR_VERSION_1; tin7.v1.id = 0; tin7.v1.name = "x6"; tin7.v1.type = QNN_TENSOR_TYPE_APP_WRITE; tin7.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tin7.v1.dataType = QNN_DATATYPE_FLOAT_32; tin7.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tin7.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tin7.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tin7.v1.rank = 1; tin7.v1.dimensions = input6Dim; tin7.v1.memType = QNN_TENSORMEMTYPE_RAW; tin7.v1.clientBuf = {.data = nullptr, .dataSize = 0}; VALIDATE(qnn_model.addTensor( "x6", // Node Name (Qnn_Tensor_t)tin7), err); /* ADDING NODE FOR genAI */ const char* inputs_genAI[] = {"x0", "x1", "x2", "x3", "x4", "x5", "x6"}; Qnn_Tensor_t tout; tout.version = QNN_TENSOR_VERSION_1; tout.v1.id = 0; tout.v1.name = "output_genAI"; tout.v1.type = QNN_TENSOR_TYPE_APP_READ; tout.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tout.v1.dataType = QNN_DATATYPE_FLOAT_32; tout.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tout.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tout.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tout.v1.rank = outputRank; tout.v1.dimensions = outputDim; tout.v1.memType = QNN_TENSORMEMTYPE_RAW; tout.v1.clientBuf = {.data = nullptr, .dataSize = 0}; uint32_t output1Dim[1] = {1}; Qnn_Tensor_t tout1; tout1.version = QNN_TENSOR_VERSION_1; tout1.v1.id = 0; tout1.v1.name = "output_npast"; tout1.v1.type = QNN_TENSOR_TYPE_APP_READ; tout1.v1.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; tout1.v1.dataType = QNN_DATATYPE_UINT_32; tout1.v1.quantizeParams.encodingDefinition = QNN_DEFINITION_UNDEFINED; tout1.v1.quantizeParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; tout1.v1.quantizeParams.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}; tout1.v1.rank = 1; tout1.v1.dimensions = output1Dim; tout1.v1.memType = QNN_TENSORMEMTYPE_RAW; tout1.v1.clientBuf = {.data = nullptr, .dataSize = 0}; Qnn_Tensor_t outputs_genAI[] = {(Qnn_Tensor_t)tout, (Qnn_Tensor_t)tout1}; VALIDATE(qnn_model.addNode(QNN_OPCONFIG_VERSION_1, // Op_Config_t Version "LLM", // Node Name "llm_engine.oppackage", // Package Name "LLM", // Qnn Node Type params, // Node Params numParams, // Num Node Params inputs_genAI, // Input Tensor Names 7, // Num Input Tensor Names outputs_genAI, // Output Tensors 2 // Num Output Tensors ), err); // Add all models to array to get graphsInfo QnnModel* models[] = {&qnn_model}; uint32_t numModels = 1; // Populate the constructed graphs in provided output variables VALIDATE(getGraphInfoFromModels(*models, numModels, graphsInfo), err); *numGraphsInfo = numModels; return err; } // PREPARE_GRAPHS MODEL_LIB_EXPORT ModelError_t QnnModel_freeGraphsInfo(GraphInfoPtr_t** graphsInfo, uint32_t numGraphsInfo) { return qnn_wrapper_api::freeGraphsInfo(graphsInfo, numGraphsInfo); } // FREEGRAPHINFO }