| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #ifndef EI_CLASSIFIER_INFERENCING_ENGINE_MEMRYX_H |
| #define EI_CLASSIFIER_INFERENCING_ENGINE_MEMRYX_H |
|
|
| #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX) |
|
|
| |
| |
| |
| |
| |
| #ifndef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE |
| #define EI_CLASSIFIER_USE_MEMRYX_HARDWARE 1 |
| #endif |
|
|
| |
| |
| |
| |
| |
| #ifndef EI_CLASSIFIER_USE_MEMRYX_CHIPS_COUNT |
| #define EI_CLASSIFIER_USE_MEMRYX_CHIPS_COUNT 1 |
| #endif |
|
|
| #include "model-parameters/model_metadata.h" |
| #if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1 |
| #include "model-parameters/model_variables.h" |
| #endif |
|
|
| #include "edge-impulse-sdk/porting/ei_classifier_porting.h" |
| #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h" |
| #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h" |
| #include <vector> |
| #include <fstream> |
| #include <sstream> |
| #include <iomanip> |
| #include <limits> |
| #include <math.h> |
| #ifdef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE |
| #include "pybind11/embed.h" |
| #include "pybind11/numpy.h" |
| #include "pybind11/stl.h" |
| #else |
| #include "memx/memx.h" |
| #endif |
| |
| #include "memryx-model/memryx-model.h" |
| #include "utils/model_header_utils.h" |
|
|
| |
| #define MX_SIM_RES_OUTPUTS 0 |
| #define MX_SIM_RES_LATENCY 1 |
| #define MX_SIM_RES_FPS 2 |
|
|
| std::stringstream engine_info; |
|
|
| static bool memryx_initialized = false; |
|
|
| #ifdef EI_CLASSIFIER_USE_MEMRYX_SOFTWARE |
| |
| using namespace pybind11::literals; |
| namespace py = pybind11; |
| |
| static py::module_ memryx; |
| static py::module_ np; |
| static py::object zeroes; |
| static py::object Simulator; |
| static py::object model; |
| static py::object device; |
| static std::vector<size_t> vec; |
| #endif |
|
|
| #ifdef EI_CLASSIFIER_USE_MEMRYX_HARDWARE |
| |
| const uint8_t flow_id = 0; |
| const uint8_t model_id = 0; |
| const uint8_t group_id = 0; |
| const int timeout = 0; |
| int argmax = 0; |
| #endif |
|
|
| |
| |
| |
| |
| static tflite::RuntimeShape softmax_shape; |
| static tflite::SoftmaxParams dummy_params; |
|
|
| static bool verbose_debug = 0; |
|
|
| bool init_memryx(bool debug, const ei_impulse_t *impulse) |
| { |
| |
| std::string project_file_path = "/tmp/" + std::string(impulse->project_name) + "-" + std::to_string(impulse->project_id) + "-" + std::to_string(impulse->deploy_version); |
| create_project_if_not_exists(project_file_path, model_h_files, model_h_files_len); |
|
|
| std::string proj_model_path = project_file_path + "/memryx_trained.dfp"; |
| const char * model_file_path = proj_model_path.c_str(); |
| #if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1)) |
| #warning "Building EIM for use with MemryX Hardware" |
| memx_status status = MEMX_STATUS_OK; |
| |
| status = memx_open(model_id, group_id, MEMX_DEVICE_CASCADE); |
| if(memx_status_error(status)) { |
| return false; |
| } |
| ei_printf("Memryx device opened.\n"); |
|
|
| |
| |
| status = memx_download_model(model_id, model_file_path, 0, |
| MEMX_DOWNLOAD_TYPE_WTMEM_AND_MODEL); |
| if(memx_status_error(status)) { |
| return false; |
| } |
| ei_printf("Memryx model downloaded.\n"); |
|
|
| |
| |
| status = memx_set_stream_enable(model_id, 0); |
| if(memx_status_error(status)) { |
| return false; |
| } |
| ei_printf("Data streaming to and from the MX3 board is enabled\n"); |
| #elif (defined(EI_CLASSIFIER_USE_MEMRYX_SOFTWARE) && (EI_CLASSIFIER_USE_MEMRYX_SOFTWARE == 1)) |
| #warning "MEMRYX model will be run in SIMULATION mode (not on real hardware)!" |
| py::list path; |
| |
| try { |
| memryx = py::module_::import("memryx"); |
| if(debug) printf("Memryx PyModule init\n"); |
| } |
| catch (py::error_already_set &e) { |
| ei_printf("ERR: Importing 'memryx' library failed:\n%s\n", e.what()); |
| return false; |
| } |
|
|
| Simulator = memryx.attr("Simulator"); |
| if(debug) printf("Simulator API init\n"); |
|
|
| |
| try { |
| model = Simulator("dfp"_a = model_file_path); |
| if(debug) printf("Model API init\n"); |
| } |
| catch (py::error_already_set &e) { |
| ei_printf("ERR: Can't load model file from %s\n", model_file_path); |
| return false; |
| } |
| #else |
| #error "Neither EI_CLASSIFIER_USE_MEMRYX_HARDWARE or EI_CLASSIFIER_USE_MEMRYX_SOFTWARE are defined or set to 1" |
| #endif |
|
|
| |
| engine_info.str(""); |
|
|
| return true; |
| } |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1)) |
| EI_IMPULSE_ERROR run_nn_inference( |
| const ei_impulse_t *impulse, |
| ei::matrix_t *fmatrix, |
| ei_impulse_result_t *result, |
| void *config_ptr, |
| bool debug = false) |
| { |
| memx_status status = MEMX_STATUS_OK; |
| int32_t ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format; |
| int32_t ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format; |
| uint64_t ctx_start_us = 0; |
| uint64_t ctx_end_us = 0; |
|
|
| |
| if (memryx_initialized == false) { |
| if(init_memryx(debug, impulse) == false) { |
| return EI_IMPULSE_MEMRYX_ERROR; |
| } |
| memryx_initialized = true; |
| } |
|
|
| |
| if(verbose_debug) { |
| status = memx_get_ifmap_size(model_id, flow_id, &ifmap_height, &ifmap_width, &ifmap_channel_number, &ifmap_format); |
| ei_printf("status = %d, ifmap shape = (%d, %d, %d), format = %d\n", |
| status, ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format); |
| } |
|
|
| |
| status = memx_get_ofmap_size(model_id, flow_id, &ofmap_height, &ofmap_width, &ofmap_channel_number, &ofmap_format); |
| if(debug) { |
| ei_printf("status = %d, ofmap shape = (%d, %d, %d), format = %d\n", |
| status, ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format); |
| } |
| if(memx_status_error(status)) { |
| return EI_IMPULSE_MEMRYX_ERROR; |
| } |
|
|
| |
| float* ofmap = new float [ofmap_width * ofmap_height * ofmap_channel_number]; |
| float* ifmap = (float*)fmatrix->buffer; |
|
|
| if(verbose_debug) { |
| for(int fidx = 0; fidx < (ofmap_width*ofmap_height); fidx++) { |
| ei_printf("%f\t", fmatrix->buffer[fidx]); |
| if(!(fidx % ofmap_width)) ei_printf("\n"); |
| } |
| } |
|
|
| |
| |
| ctx_start_us = ei_read_timer_us(); |
| |
| status = memx_stream_ifmap(model_id, 0, ifmap, timeout); |
| ctx_end_us = ei_read_timer_us(); |
| if(memx_status_error(status)) { |
| return EI_IMPULSE_MEMRYX_ERROR; |
| } |
|
|
| result->timing.classification_us = ctx_end_us - ctx_start_us; |
| result->timing.classification = (int)(result->timing.classification_us / 1000); |
|
|
| engine_info.str(""); |
| engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us); |
|
|
| |
| status = memx_stream_ofmap(model_id, 0, ofmap, timeout); |
| if(debug) { |
| ei_printf(" memx_stream_ofmap (status=%d)\n", status); |
| } |
| if(memx_status_error(status)) { |
| return EI_IMPULSE_MEMRYX_ERROR; |
| } |
|
|
| |
| std::vector<size_t> output_shape = {static_cast<size_t>(ofmap_height),static_cast<size_t>(ofmap_width), |
| static_cast<size_t>(ofmap_channel_number)}; |
| softmax_shape.BuildFrom(output_shape); |
| |
| dummy_params.beta = 1; |
|
|
| |
| tflite::reference_ops::Softmax(dummy_params, softmax_shape, ofmap, softmax_shape, ofmap); |
|
|
| |
| if (impulse->object_detection) { |
| switch (impulse->object_detection_last_layer) { |
| case EI_CLASSIFIER_LAST_LAYER_FOMO: { |
| ei_printf("FOMO executed on Memryx\n"); |
| fill_result_struct_f32_fomo( |
| impulse, |
| result, |
| ofmap, |
| impulse->input_width / 8, |
| impulse->input_height / 8); |
| break; |
| } |
| case EI_CLASSIFIER_LAST_LAYER_SSD: { |
| ei_printf("Mobilenet SSD is not implemented for Edge Impulse MemryX engine, please contact Edge Impulse Support\n"); |
| break; |
| } |
| default: { |
| ei_printf("ERR: Unsupported object detection last layer (%d)\n", |
| impulse->object_detection_last_layer); |
| return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; |
| } |
| } |
| } |
| else { |
| fill_result_struct_f32(impulse, result, ofmap, debug); |
| } |
|
|
| delete ofmap; |
| |
| return EI_IMPULSE_OK; |
| } |
| #elif (defined(EI_CLASSIFIER_USE_MEMRYX_SOFTWARE) && (EI_CLASSIFIER_USE_MEMRYX_SOFTWARE == 1)) |
| EI_IMPULSE_ERROR run_nn_inference( |
| const ei_impulse_t *impulse, |
| ei::matrix_t *fmatrix, |
| ei_impulse_result_t *result, |
| void *config_ptr, |
| bool debug = false) |
| { |
| |
| static py::scoped_interpreter guard{}; |
|
|
| |
| if (memryx_initialized == false) { |
| if(init_memryx(debug, impulse) == false) { |
| return EI_IMPULSE_MEMRYX_ERROR; |
| } |
| memryx_initialized = true; |
| } |
|
|
| std::vector<size_t> input_shape = {1,EI_CLASSIFIER_INPUT_WIDTH,EI_CLASSIFIER_INPUT_HEIGHT,3}; |
| py::array_t<float> input_data(input_shape); |
|
|
| printf("impulse->w=%d h=%d\n", impulse->input_width, impulse->input_height); |
|
|
| |
| |
| |
| |
| |
| |
| auto r = input_data.mutable_unchecked<4>(); |
| for (py::ssize_t x = 0; x < r.shape(1); x++) { |
| for (py::ssize_t y = 0; y < r.shape(2); y++) { |
| for(py::ssize_t z = 0; z < r.shape(3); z++) { |
| r(0, x, y, z) = (float)(fmatrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z]); |
| } |
| } |
| } |
|
|
| py::object runmodel = model.attr("run"); |
| |
| py::tuple args = py::make_tuple(py::none(), 0.00, 0.00); |
| |
| printf("start inference\n"); |
| uint64_t ctx_start_us = ei_read_timer_us(); |
| args = runmodel("inputs"_a=input_data,"frames"_a=1); |
| uint64_t ctx_end_us = ei_read_timer_us(); |
| printf("end of inference\n"); |
|
|
| result->timing.classification_us = ctx_end_us - ctx_start_us; |
| result->timing.classification = (int)(result->timing.classification_us / 1000); |
|
|
| engine_info.str(""); |
| engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us); |
|
|
| py::array outputs = py::list(args[0]); |
| py::array_t<float> potentials; |
| std::vector<float> potentials_v; |
|
|
| potentials = outputs.squeeze().cast<py::array_t<float>>(); |
|
|
| if (impulse->object_detection == false) { |
| potentials_v = outputs.squeeze().cast<std::vector<float>>(); |
| } |
| else { |
| auto q = potentials.unchecked<>(); |
| for (py::ssize_t x = 0; x < q.shape(0); x++) { |
| for (py::ssize_t y = 0; y < q.shape(1); y++) { |
| for(py::ssize_t z = 0; z < q.shape(2); z++) { |
| potentials_v.push_back(q(x, y, z)); |
| } |
| } |
| } |
| } |
|
|
| if(debug) { |
| std::string ret_str = py::str(potentials).cast<std::string>(); |
| ei_printf("Memryx raw output:\n%s\n", ret_str.c_str()); |
| } |
|
|
| if (impulse->object_detection) { |
| switch (impulse->object_detection_last_layer) { |
| case EI_CLASSIFIER_LAST_LAYER_FOMO: { |
| ei_printf("FOMO executed on Memryx\n"); |
| fill_result_struct_f32_fomo( |
| impulse, |
| result, |
| potentials_v.data(), |
| impulse->input_width / 8, |
| impulse->input_height / 8); |
| break; |
| } |
| case EI_CLASSIFIER_LAST_LAYER_SSD: { |
| ei_printf("Mobilenet SSD executed on Memryx\n"); |
| break; |
| } |
| default: { |
| ei_printf("ERR: Unsupported object detection last layer (%d)\n", |
| impulse->object_detection_last_layer); |
| return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE; |
| } |
| } |
| } |
| else { |
| fill_result_struct_f32(impulse, result, potentials_v.data(), debug); |
| } |
|
|
| return EI_IMPULSE_OK; |
| } |
| #else |
| #error "Neither EI_CLASSIFIER_USE_MEMRYX_HARDWARE or EI_CLASSIFIER_USE_MEMRYX_SOFTWARE are defined or set to 1" |
| #endif |
|
|
| #endif |
|
|
| #endif |
|
|