| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include "onnx.pb.h" |
| |
|
| | #include <algorithm> |
| | #include <float.h> |
| | #include <fstream> |
| | #include <google/protobuf/io/coded_stream.h> |
| | #include <google/protobuf/io/zero_copy_stream_impl.h> |
| | #include <google/protobuf/message.h> |
| | #include <google/protobuf/text_format.h> |
| | #include <iostream> |
| | #include <limits.h> |
| | #include <limits> |
| | #include <set> |
| | #include <stdio.h> |
| |
|
| | static bool read_proto_from_binary(const char* filepath, onnx::ModelProto* message) |
| | { |
| | std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary); |
| | if (!fs.is_open()) |
| | { |
| | fprintf(stderr, "open failed %s\n", filepath); |
| | return false; |
| | } |
| |
|
| | google::protobuf::io::IstreamInputStream input(&fs); |
| | google::protobuf::io::CodedInputStream codedstr(&input); |
| |
|
| | #if GOOGLE_PROTOBUF_VERSION >= 3011000 |
| | codedstr.SetTotalBytesLimit(INT_MAX); |
| | #else |
| | codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2); |
| | #endif |
| |
|
| | bool success = message->ParseFromCodedStream(&codedstr); |
| |
|
| | fs.close(); |
| |
|
| | return success; |
| | } |
| |
|
| | static std::vector<int> get_node_attr_ai(const onnx::NodeProto& node, const char* key) |
| | { |
| | std::vector<int> v; |
| |
|
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | v.resize(attr.ints_size()); |
| | for (int j = 0; j < attr.ints_size(); j++) |
| | { |
| | v[j] = std::max(std::min(attr.ints(j), (::google::protobuf::int64)INT_MAX), (::google::protobuf::int64)INT_MIN); |
| | } |
| |
|
| | break; |
| | } |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static void set_node_attr_ai(onnx::NodeProto& node, const char* key, |
| | const std::vector<int>& value) |
| | { |
| | onnx::AttributeProto* attr_group = node.add_attribute(); |
| | attr_group->set_name(key); |
| | for (auto v : value) |
| | { |
| | attr_group->add_ints(v); |
| | } |
| |
|
| | return; |
| | } |
| |
|
| | static std::vector<float> get_node_attr_af(const onnx::NodeProto& node, const char* key) |
| | { |
| | std::vector<float> v; |
| |
|
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | v.resize(attr.floats_size()); |
| | for (int j = 0; j < attr.floats_size(); j++) |
| | { |
| | v[j] = attr.floats(j); |
| | } |
| |
|
| | break; |
| | } |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static int get_node_attr_i(const onnx::NodeProto& node, const char* key, int def = 0) |
| | { |
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | return std::max(std::min(attr.i(), (::google::protobuf::int64)INT_MAX), (::google::protobuf::int64)INT_MIN); |
| | } |
| | } |
| |
|
| | return def; |
| | } |
| |
|
| | static float get_node_attr_f(const onnx::NodeProto& node, const char* key, float def = 0.f) |
| | { |
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | return attr.f(); |
| | } |
| | } |
| |
|
| | return def; |
| | } |
| |
|
| | static std::string get_node_attr_s(const onnx::NodeProto& node, const char* key, const std::string& def = std::string()) |
| | { |
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | return attr.s(); |
| | } |
| | } |
| |
|
| | return def; |
| | } |
| |
|
| | static onnx::TensorProto get_node_attr_tensor(const onnx::NodeProto& node, const char* key) |
| | { |
| | for (int i = 0; i < node.attribute_size(); i++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(i); |
| | if (attr.name() == key) |
| | { |
| | return attr.t(); |
| | } |
| | } |
| |
|
| | return onnx::TensorProto(); |
| | } |
| |
|
| | static float get_node_attr_from_input_f(const onnx::TensorProto& tp) |
| | { |
| | float v = 0.f; |
| |
|
| | |
| | if (tp.data_type() == 1) |
| | { |
| | const float* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const float*)tp.raw_data().data(); |
| | } |
| | else |
| | { |
| | shape_data = tp.float_data().data(); |
| | } |
| | v = shape_data[0]; |
| | } |
| | |
| | else if (tp.data_type() == 11) |
| | { |
| | const double* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const double*)tp.raw_data().data(); |
| | } |
| | else |
| | { |
| | shape_data = tp.double_data().data(); |
| | } |
| | v = shape_data[0]; |
| | } |
| | |
| | else if (tp.data_type() == 7) |
| | { |
| | const int64_t* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const int64_t*)tp.raw_data().data(); |
| | } |
| | else |
| | { |
| | shape_data = tp.int64_data().data(); |
| | } |
| | v = std::max(std::min(shape_data[0], (::google::protobuf::int64)INT_MAX), (::google::protobuf::int64)INT_MIN); |
| | } |
| | |
| | else if (tp.data_type() == 6) |
| | { |
| | const int32_t* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const int32_t*)tp.raw_data().data(); |
| | } |
| | else |
| | { |
| | shape_data = tp.int32_data().data(); |
| | } |
| | v = shape_data[0]; |
| | } |
| | else |
| | { |
| | fprintf(stderr, "Unknown data type %d\n", tp.data_type()); |
| | abort(); |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static std::vector<int> get_node_attr_from_input_ai(const onnx::TensorProto& tp) |
| | { |
| | int size = 0; |
| |
|
| | std::vector<int> v; |
| |
|
| | |
| | if (tp.data_type() == 7) |
| | { |
| | const int64_t* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const int64_t*)tp.raw_data().data(); |
| | size = (int)(tp.raw_data().size() / 8); |
| | } |
| | else |
| | { |
| | shape_data = tp.int64_data().data(); |
| | size = tp.int64_data_size(); |
| | } |
| | for (int j = 0; j < size; j++) |
| | { |
| | int vi = std::max(std::min(shape_data[j], (::google::protobuf::int64)INT_MAX), (::google::protobuf::int64)INT_MIN); |
| | v.push_back(vi); |
| | } |
| | } |
| | |
| | else if (tp.data_type() == 6) |
| | { |
| | const int32_t* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const int32_t*)tp.raw_data().data(); |
| | size = (int)(tp.raw_data().size() / 4); |
| | } |
| | else |
| | { |
| | shape_data = tp.int32_data().data(); |
| | size = tp.int32_data_size(); |
| | } |
| | for (int j = 0; j < size; j++) |
| | { |
| | v.push_back(shape_data[j]); |
| | } |
| | } |
| | else |
| | { |
| | fprintf(stderr, "Unknown data type %d\n", tp.data_type()); |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static std::vector<float> get_node_attr_from_input_af(const onnx::TensorProto& tp) |
| | { |
| | int size = 0; |
| |
|
| | std::vector<float> v; |
| |
|
| | |
| | if (tp.data_type() == 1) |
| | { |
| | const float* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const float*)tp.raw_data().data(); |
| | size = (int)(tp.raw_data().size() / 4); |
| | } |
| | else |
| | { |
| | shape_data = tp.float_data().data(); |
| | size = tp.float_data_size(); |
| | } |
| | for (int j = 0; j < size; j++) |
| | { |
| | v.push_back(shape_data[j]); |
| | } |
| | } |
| | |
| | else if (tp.data_type() == 11) |
| | { |
| | const double* shape_data = 0; |
| | if (tp.has_raw_data()) |
| | { |
| | shape_data = (const double*)tp.raw_data().data(); |
| | size = (int)(tp.raw_data().size() / 8); |
| | } |
| | else |
| | { |
| | shape_data = tp.double_data().data(); |
| | size = tp.double_data_size(); |
| | } |
| | for (int j = 0; j < size; j++) |
| | { |
| | v.push_back((float)shape_data[j]); |
| | } |
| | } |
| | else |
| | { |
| | fprintf(stderr, "Unknown data type %d\n", tp.data_type()); |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static int get_tensor_proto_data_size(const onnx::TensorProto& tp) |
| | { |
| | if (tp.has_raw_data()) |
| | { |
| | const std::string& raw_data = tp.raw_data(); |
| | int size = (int)raw_data.size() / 4; |
| | return size; |
| | } |
| | else if (tp.data_type() == 1) |
| | { |
| | return tp.float_data_size(); |
| | } |
| |
|
| | return 0; |
| | } |
| |
|
| | static void fwrite_tensor_proto_data(const onnx::TensorProto& tp, FILE* bp) |
| | { |
| | int size = get_tensor_proto_data_size(tp); |
| |
|
| | if (tp.has_raw_data()) |
| | { |
| | const std::string& raw_data = tp.raw_data(); |
| | fwrite(raw_data.data(), sizeof(float), size, bp); |
| | } |
| | else if (tp.data_type() == 1) |
| | { |
| | fwrite(tp.float_data().data(), sizeof(float), size, bp); |
| | } |
| | } |
| |
|
| | static void fuse_rewrite_gather(onnx::GraphProto* mutable_graph, |
| | std::map<std::string, onnx::TensorProto>& weights, |
| | std::map<std::string, int>& node_reference, |
| | std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | const int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; ++i) |
| | { |
| | onnx::NodeProto* gather = mutable_graph->mutable_node(i); |
| | if (gather->op_type() != "Gather") |
| | { |
| | continue; |
| | } |
| | auto indices = get_node_attr_from_input_ai(weights[gather->input(1)]); |
| | if (indices.size() != 1) |
| | { |
| | continue; |
| | } |
| |
|
| | { |
| | |
| | node_reference[gather->input(1)] -= 1; |
| | std::string origin_inp = gather->input(0); |
| | gather->clear_input(); |
| | gather->add_input(origin_inp); |
| | } |
| |
|
| | { |
| | |
| | int axis = get_node_attr_i(*gather, "axis", 1) - 1; |
| |
|
| | gather->set_op_type("Crop"); |
| | gather->clear_attribute(); |
| |
|
| | int indice = indices[0]; |
| | set_node_attr_ai(*gather, "starts", std::vector<int> {indice}); |
| | set_node_attr_ai(*gather, "ends", std::vector<int> {indice + 1}); |
| | set_node_attr_ai(*gather, "axis", std::vector<int> {axis}); |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_weight_reshape(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Reshape") |
| | { |
| | |
| | if (weights.find(node->input(0)) == weights.end()) |
| | continue; |
| |
|
| | weights[node->output(0)] = weights[node->input(0)]; |
| |
|
| | |
| | std::vector<int> shape; |
| | if (node->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node, "shape"); |
| | } |
| | else if (node->input_size() == 2) |
| | { |
| | |
| | shape = get_node_attr_from_input_ai(weights[node->input(1)]); |
| | } |
| |
|
| | weights[node->output(0)].clear_dims(); |
| | for (int j = 0; j < shape.size(); j++) |
| | { |
| | weights[node->output(0)].add_dims(shape[j]); |
| | } |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_weight_transpose(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Transpose") |
| | { |
| | |
| | if (weights.find(node->input(0)) == weights.end()) |
| | continue; |
| |
|
| | if (weights[node->input(0)].dims_size() != 2) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm = get_node_attr_ai(*node, "perm"); |
| | if (perm.size() != 2) |
| | continue; |
| | if (perm[0] != 1 || perm[1] != 0) |
| | continue; |
| |
|
| | weights[node->output(0)] = weights[node->input(0)]; |
| |
|
| | |
| | { |
| | onnx::TensorProto& B = weights[node->output(0)]; |
| |
|
| | const int h = B.dims(0); |
| | const int w = B.dims(1); |
| |
|
| | std::vector<float> permuted_data; |
| | permuted_data.reserve((size_t)h * w); |
| | const float* bptr = B.has_raw_data() ? (const float*)B.raw_data().data() : B.float_data().data(); |
| |
|
| | for (int j = 0; j < w; j++) |
| | { |
| | for (int k = 0; k < h; k++) |
| | { |
| | float vb = bptr[k * w + j]; |
| | permuted_data.push_back(vb); |
| | } |
| | } |
| |
|
| | B.set_dims(0, w); |
| | B.set_dims(1, h); |
| |
|
| | if (B.has_raw_data()) |
| | { |
| | B.set_raw_data(permuted_data.data(), permuted_data.size() * sizeof(float)); |
| | } |
| | else |
| | { |
| | for (int j = 0; j < (int)permuted_data.size(); j++) |
| | B.set_float_data(j, permuted_data[j]); |
| | } |
| | } |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_shufflechannel(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "Reshape") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | std::vector<int> shape; |
| | if (node->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape = get_node_attr_from_input_ai(weights[node->input(1)]); |
| | } |
| |
|
| | |
| | |
| | if (shape.size() != 5 && shape.size() != 3) |
| | continue; |
| |
|
| | if (shape.size() == 5 && shape[0] != 1) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node3->op_type() == "Constant") |
| | { |
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | node3 = mutable_graph->mutable_node(i + 3); |
| | } |
| |
|
| | if (node2->op_type() != "Transpose" || node3->op_type() != "Reshape") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | |
| | std::vector<int> perm = get_node_attr_ai(*node2, "perm"); |
| | if (perm.size() != 5 && perm.size() != 3) |
| | continue; |
| |
|
| | if (perm.size() == 5 && (perm[0] != 0 || perm[1] != 2 || perm[2] != 1 || perm[3] != 3 || perm[4] != 4)) |
| | continue; |
| |
|
| | if (perm.size() == 3 && (perm[0] != 1 || perm[1] != 0 || perm[2] != 2)) |
| | continue; |
| |
|
| | std::vector<int> shape3; |
| | if (node3->input_size() == 1) |
| | { |
| | shape3 = get_node_attr_ai(*node3, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape3 = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | } |
| |
|
| | |
| | |
| | if (shape3.size() != 4 && shape3.size() != 5) |
| | continue; |
| |
|
| | if (shape3.size() == 4 && (shape3[0] != 1 || (shape3[1] != -1 && shape3[1] != shape[1] * shape[2]))) |
| | continue; |
| |
|
| | if (shape3.size() == 5 && (shape3[0] != shape[1] || shape3[2] != shape[0] || shape3[3] * shape3[4] != shape[2])) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node3->set_op_type("ShuffleChannel"); |
| | node3->set_input(0, node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_group = node3->add_attribute(); |
| | attr_group->set_name("group"); |
| | attr_group->set_i(shape[1]); |
| |
|
| | onnx::AttributeProto* attr_reverse = node3->add_attribute(); |
| | attr_reverse->set_name("reverse"); |
| | attr_reverse->set_i(shape.size() == 3); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_shufflechannel_split(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "ShuffleChannel") |
| | { |
| | |
| | int reverse = get_node_attr_i(*node, "reverse"); |
| | if (reverse != 1) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node2->op_type() != "Gather" || node3->op_type() != "Gather") |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node->output(0)) |
| | continue; |
| |
|
| | |
| | int gather2_axis = get_node_attr_i(*node2, "axis"); |
| | if (gather2_axis != 0) |
| | continue; |
| |
|
| | |
| | if (weights.find(node2->input(1)) == weights.end()) |
| | continue; |
| |
|
| | std::vector<int> gather2_indices = get_node_attr_from_input_ai(weights[node2->input(1)]); |
| | if (gather2_indices.size() != 1 || gather2_indices[0] != 0) |
| | continue; |
| |
|
| | |
| | int gather3_axis = get_node_attr_i(*node3, "axis"); |
| | if (gather3_axis != 0) |
| | continue; |
| |
|
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | std::vector<int> gather3_indices = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | if (gather3_indices.size() != 1 || gather3_indices[0] != 1) |
| | continue; |
| |
|
| | |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 2; |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node3->input(1)] -= 1; |
| |
|
| | node3->set_op_type("Split"); |
| | node3->clear_input(); |
| | node3->add_input(node->output(0)); |
| | node3->add_output(node3->output(0)); |
| | node3->set_output(0, node2->output(0)); |
| |
|
| | node3->clear_attribute(); |
| | onnx::AttributeProto* attr_axis = node3->add_attribute(); |
| | attr_axis->set_name("axis"); |
| | attr_axis->set_i(1); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_hardswish(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if (node->op_type() == "Add") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& add_three = weights[node->input(1)]; |
| | if (add_three.dims_size() != 0 || get_tensor_proto_data_size(add_three) != 1) |
| | continue; |
| |
|
| | float constant_add_three = get_node_attr_from_input_f(add_three); |
| | if (constant_add_three != 3.f) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| |
|
| | if (node4->op_type() == "Constant") |
| | { |
| | if (i + 4 >= node_count) |
| | continue; |
| |
|
| | node4 = mutable_graph->mutable_node(i + 4); |
| | } |
| |
|
| | if (node2->op_type() != "Clip" || node3->op_type() != "Mul" || (node4->op_type() != "Div" && node4->op_type() != "Mul")) |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | float relu6_min; |
| | float relu6_max; |
| | if (node2->input_size() == 1) |
| | { |
| | relu6_min = get_node_attr_f(*node2, "min", -FLT_MAX); |
| | relu6_max = get_node_attr_f(*node2, "max", FLT_MAX); |
| | } |
| | else |
| | { |
| | const onnx::TensorProto& min_tp = weights[node2->input(1)]; |
| | const onnx::TensorProto& max_tp = weights[node2->input(2)]; |
| |
|
| | relu6_min = get_node_attr_from_input_f(min_tp); |
| | relu6_max = get_node_attr_from_input_f(max_tp); |
| | } |
| | if (relu6_min != 0.f || relu6_max != 6.f) |
| | continue; |
| |
|
| | if (node_reference[node3->output(0)] != 1) |
| | continue; |
| |
|
| | if (node3->input(0) != node->input(0) || node3->input(1) != node2->output(0)) |
| | continue; |
| |
|
| | if (weights.find(node4->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& div_six = weights[node4->input(1)]; |
| | if (div_six.dims_size() != 0 || get_tensor_proto_data_size(div_six) != 1) |
| | continue; |
| |
|
| | float constant_div_six = get_node_attr_from_input_f(div_six); |
| | if (node4->op_type() == "Div" && constant_div_six != 6.f) |
| | continue; |
| | if (node4->op_type() == "Mul" && constant_div_six != 1 / 6.f) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | node_reference[node->input(1)] -= 1; |
| | node_reference[node->output(0)] -= 1; |
| | if (node2->input_size() == 3) |
| | { |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node2->input(2)] -= 1; |
| | } |
| | node_reference[node2->output(0)] -= 1; |
| | node_reference[node3->output(0)] -= 1; |
| | node_reference[node4->input(1)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | blob_names.erase(node3->output(0)); |
| |
|
| | node4->set_op_type("HardSwish"); |
| | node4->clear_input(); |
| | node4->add_input(node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_alpha = node4->add_attribute(); |
| | attr_alpha->set_name("alpha"); |
| | attr_alpha->set_f(1.f / 6.f); |
| |
|
| | onnx::AttributeProto* attr_beta = node4->add_attribute(); |
| | attr_beta->set_name("beta"); |
| | attr_beta->set_f(3.f / 6.f); |
| |
|
| | reduced_node_count += 3; |
| | i += 3; |
| | } |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "HardSigmoid") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | float alpha = get_node_attr_f(*node, "alpha", 0.2f); |
| | float beta = get_node_attr_f(*node, "beta", 0.5f); |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "Mul") |
| | continue; |
| |
|
| | if (node2->input(0) != node->input(0) || node2->input(1) != node->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node2->set_op_type("HardSwish"); |
| | node2->clear_input(); |
| | node2->add_input(node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_alpha = node2->add_attribute(); |
| | attr_alpha->set_name("alpha"); |
| | attr_alpha->set_f(alpha); |
| |
|
| | onnx::AttributeProto* attr_beta = node2->add_attribute(); |
| | attr_beta->set_name("beta"); |
| | attr_beta->set_f(beta); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_hardsigmoid(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if (node->op_type() == "Add") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& add_three = weights[node->input(1)]; |
| | if (add_three.dims_size() != 0 || get_tensor_proto_data_size(add_three) != 1) |
| | continue; |
| |
|
| | float constant_add_three = get_node_attr_from_input_f(add_three); |
| | if (constant_add_three != 3.f) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node3->op_type() == "Constant") |
| | { |
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | node3 = mutable_graph->mutable_node(i + 3); |
| | } |
| |
|
| | if (node2->op_type() != "Clip" || (node3->op_type() != "Div" && node3->op_type() != "Mul")) |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | float relu6_min; |
| | float relu6_max; |
| | if (node2->input_size() == 1) |
| | { |
| | relu6_min = get_node_attr_f(*node2, "min", -FLT_MAX); |
| | relu6_max = get_node_attr_f(*node2, "max", FLT_MAX); |
| | } |
| | else |
| | { |
| | const onnx::TensorProto& min_tp = weights[node2->input(1)]; |
| | const onnx::TensorProto& max_tp = weights[node2->input(2)]; |
| |
|
| | relu6_min = get_node_attr_from_input_f(min_tp); |
| | relu6_max = get_node_attr_from_input_f(max_tp); |
| | } |
| | if (relu6_min != 0.f || relu6_max != 6.f) |
| | continue; |
| |
|
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& div_six = weights[node3->input(1)]; |
| | if (div_six.dims_size() != 0 || get_tensor_proto_data_size(div_six) != 1) |
| | continue; |
| |
|
| | float constant_div_six = get_node_attr_from_input_f(div_six); |
| | if (node3->op_type() == "Div" && constant_div_six != 6.f) |
| | continue; |
| | if (node3->op_type() == "Mul" && constant_div_six != 1 / 6.f) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(1)] -= 1; |
| | node_reference[node->output(0)] -= 1; |
| | if (node2->input_size() == 3) |
| | { |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node2->input(2)] -= 1; |
| | } |
| | node_reference[node2->output(0)] -= 1; |
| | node_reference[node3->input(1)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node3->set_op_type("HardSigmoid"); |
| | node3->clear_input(); |
| | node3->add_input(node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_alpha = node3->add_attribute(); |
| | attr_alpha->set_name("alpha"); |
| | attr_alpha->set_f(1.f / 6.f); |
| |
|
| | onnx::AttributeProto* attr_beta = node3->add_attribute(); |
| | attr_beta->set_name("beta"); |
| | attr_beta->set_f(3.f / 6.f); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_swish(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "Sigmoid") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "Mul") |
| | continue; |
| |
|
| | if (node2->input(0) != node->input(0) || node2->input(1) != node->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node2->set_op_type("Swish"); |
| | node2->clear_input(); |
| | node2->add_input(node->input(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_batchnorm1d_squeeze_unsqueeze(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Unsqueeze") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node2->op_type() != "BatchNormalization" || node3->op_type() != "Squeeze") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node2->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node2->set_input(0, node->input(0)); |
| | node2->set_output(0, node3->output(0)); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_unsqueeze_prelu(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Unsqueeze") |
| | { |
| | |
| | if (weights.find(node->input(0)) == weights.end()) |
| | continue; |
| |
|
| | onnx::TensorProto& B = weights[node->input(0)]; |
| | if (B.dims_size() != 1) |
| | continue; |
| |
|
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | std::vector<int> axes = get_node_attr_ai(*node, "axes"); |
| | if (axes.size() != 2) |
| | continue; |
| | if (axes[0] != 1 || axes[1] != 2) |
| | continue; |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "PRelu") |
| | continue; |
| |
|
| | if (node2->input(1) != node->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node2->set_input(1, node->input(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_normalize(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "ReduceL2") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | std::vector<int> axes = get_node_attr_ai(*node, "axes"); |
| | if (axes.size() != 1) |
| | continue; |
| | if (axes[0] != 1) |
| | continue; |
| |
|
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| |
|
| | bool has_shape_node = node3->op_type() == "Shape"; |
| | onnx::NodeProto* node_shape = 0; |
| | if (has_shape_node) |
| | { |
| | if (i + 4 >= node_count) |
| | continue; |
| |
|
| | node_shape = node3; |
| | node3 = mutable_graph->mutable_node(i + 3); |
| | node4 = mutable_graph->mutable_node(i + 4); |
| | } |
| |
|
| | if (node2->op_type() != "Clip" || node3->op_type() != "Expand" || node4->op_type() != "Div") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node3->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node2->output(0) |
| | || node4->input(0) != node->input(0) || node4->input(1) != node3->output(0)) |
| | continue; |
| |
|
| | if (has_shape_node) |
| | { |
| | if (node_shape->input(0) != node->input(0) || node3->input(1) != node_shape->output(0)) |
| | continue; |
| | } |
| |
|
| | |
| | float clip_min; |
| | if (node2->input_size() == 1) |
| | { |
| | clip_min = get_node_attr_f(*node2, "min", -FLT_MAX); |
| | } |
| | else |
| | { |
| | const onnx::TensorProto& min_tp = weights[node2->input(1)]; |
| |
|
| | clip_min = get_node_attr_from_input_f(min_tp); |
| | } |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | if (has_shape_node) |
| | { |
| | node_shape->set_op_type("noop_reducedncnn"); |
| | } |
| | node3->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= has_shape_node ? 2 : 1; |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (has_shape_node) |
| | { |
| | node_reference[node_shape->output(0)] -= 1; |
| | } |
| | node_reference[node3->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | if (has_shape_node) |
| | { |
| | blob_names.erase(node_shape->output(0)); |
| | } |
| | blob_names.erase(node3->output(0)); |
| |
|
| | node4->set_op_type("Normalize"); |
| | node4->clear_input(); |
| | node4->add_input(node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_alpha = node4->add_attribute(); |
| | attr_alpha->set_name("eps"); |
| | attr_alpha->set_f(clip_min); |
| |
|
| | reduced_node_count += has_shape_node ? 4 : 3; |
| | i += has_shape_node ? 4 : 3; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_groupnorm(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Reshape") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | std::vector<int> shape; |
| | if (node->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape = get_node_attr_from_input_ai(weights[node->input(1)]); |
| | } |
| |
|
| | |
| | if (shape.size() != 3) |
| | continue; |
| |
|
| | if (shape[0] != 0 || shape[2] != -1) |
| | continue; |
| |
|
| | int groups = shape[1]; |
| |
|
| | if (i + 4 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| | onnx::NodeProto* node5 = mutable_graph->mutable_node(i + 4); |
| |
|
| | if (node2->op_type() != "InstanceNormalization" || node3->op_type() != "Reshape" || node4->op_type() != "Mul" || node5->op_type() != "Add") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node3->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node4->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node2->output(0) |
| | || node4->input(0) != node3->output(0) || node5->input(0) != node4->output(0)) |
| | continue; |
| |
|
| | |
| | float eps = get_node_attr_f(*node2, "epsilon", 1e-05f); |
| |
|
| | |
| | std::vector<float> S = get_node_attr_from_input_af(weights[node2->input(1)]); |
| | std::vector<float> B = get_node_attr_from_input_af(weights[node2->input(2)]); |
| | if ((int)S.size() != groups || (int)B.size() != groups) |
| | continue; |
| |
|
| | bool instancenorm_affine = false; |
| | for (int j = 0; j < groups; j++) |
| | { |
| | if (S[j] != 1.f || B[j] != 0.f) |
| | { |
| | instancenorm_affine = true; |
| | break; |
| | } |
| | } |
| |
|
| | if (instancenorm_affine) |
| | continue; |
| |
|
| | std::vector<int> shape2; |
| | if (node3->input_size() == 1) |
| | { |
| | shape2 = get_node_attr_ai(*node3, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape2 = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | } |
| |
|
| | |
| | if (shape2.size() != 4) |
| | continue; |
| |
|
| | if (shape2[0] != 1) |
| | continue; |
| |
|
| | int channels = shape2[1]; |
| |
|
| | |
| | std::vector<float> affine_S = get_node_attr_from_input_af(weights[node4->input(1)]); |
| | std::vector<float> affine_B = get_node_attr_from_input_af(weights[node5->input(1)]); |
| | if (affine_S.size() == 1 && affine_S[0] == 1.f && affine_B.size() == 1 && affine_B[0] == 0.f) |
| | { |
| | |
| | } |
| | else if ((int)affine_S.size() != channels && (int)affine_B.size() != channels) |
| | { |
| | |
| | continue; |
| | } |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| | node4->set_op_type("noop_reducedncnn"); |
| |
|
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node2->input(2)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| | node_reference[node3->output(0)] -= 1; |
| | node_reference[node4->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | blob_names.erase(node3->output(0)); |
| | blob_names.erase(node4->output(0)); |
| |
|
| | std::string affine_scale = node4->input(1); |
| | std::string affine_bias = node5->input(1); |
| |
|
| | node5->set_op_type("GroupNorm"); |
| | node5->clear_input(); |
| | node5->add_input(node->input(0)); |
| | node5->add_input(affine_scale); |
| | node5->add_input(affine_bias); |
| |
|
| | onnx::AttributeProto* attr_groups = node5->add_attribute(); |
| | attr_groups->set_name("groups"); |
| | attr_groups->set_i(groups); |
| |
|
| | onnx::AttributeProto* attr_channels = node5->add_attribute(); |
| | attr_channels->set_name("channels"); |
| | attr_channels->set_i(channels); |
| |
|
| | onnx::AttributeProto* attr_eps = node5->add_attribute(); |
| | attr_eps->set_name("epsilon"); |
| | attr_eps->set_f(eps); |
| |
|
| | onnx::AttributeProto* attr_affine = node5->add_attribute(); |
| | attr_affine->set_name("affine"); |
| | attr_affine->set_i(1); |
| |
|
| | reduced_node_count += 4; |
| | i += 4; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_layernorm(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "ReduceMean") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | std::vector<int> axes = get_node_attr_ai(*node, "axes"); |
| |
|
| | |
| | |
| | if (axes.size() != 1 && axes.size() != 2) |
| | continue; |
| |
|
| | int normed_axes = (int)axes.size(); |
| | if (normed_axes == 1 && axes[0] != -1) |
| | continue; |
| | if (normed_axes == 2 && (axes[0] != -2 || axes[1] != -1)) |
| | continue; |
| |
|
| | if (i + 6 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| | onnx::NodeProto* node5 = mutable_graph->mutable_node(i + 4); |
| | onnx::NodeProto* node6 = mutable_graph->mutable_node(i + 5); |
| | onnx::NodeProto* node7 = mutable_graph->mutable_node(i + 6); |
| |
|
| | if (node2->op_type() != "Sub" || node3->op_type() != "Pow" || node4->op_type() != "ReduceMean" || node5->op_type() != "Add" || node6->op_type() != "Sqrt" || node7->op_type() != "Div") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 2) |
| | continue; |
| |
|
| | if (node_reference[node3->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node4->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node5->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node6->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->input(0) || node2->input(1) != node->output(0) |
| | || node3->input(0) != node2->output(0) || node4->input(0) != node3->output(0) |
| | || node5->input(0) != node4->output(0) || node6->input(0) != node5->output(0) |
| | || node7->input(0) != node2->output(0) || node7->input(1) != node6->output(0)) |
| | continue; |
| |
|
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& pow_two = weights[node3->input(1)]; |
| | if (pow_two.dims_size() != 0 || get_tensor_proto_data_size(pow_two) != 1) |
| | continue; |
| |
|
| | float constant_pow_two = get_node_attr_from_input_f(pow_two); |
| | if (constant_pow_two != 2.f) |
| | continue; |
| |
|
| | std::vector<int> axes4 = get_node_attr_ai(*node4, "axes"); |
| |
|
| | |
| | |
| | if ((int)axes4.size() != normed_axes) |
| | continue; |
| |
|
| | if (normed_axes == 1 && axes4[0] != -1) |
| | continue; |
| | if (normed_axes == 2 && (axes4[0] != -2 || axes4[1] != -1)) |
| | continue; |
| |
|
| | if (weights.find(node5->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& add_eps = weights[node5->input(1)]; |
| | if (add_eps.dims_size() != 0 || get_tensor_proto_data_size(add_eps) != 1) |
| | continue; |
| |
|
| | float eps = get_node_attr_from_input_f(add_eps); |
| |
|
| | int affine = 0; |
| | while (i + 8 < node_count) |
| | { |
| | onnx::NodeProto* node8 = mutable_graph->mutable_node(i + 7); |
| | onnx::NodeProto* node9 = mutable_graph->mutable_node(i + 8); |
| |
|
| | if (node8->op_type() != "Mul" || node9->op_type() != "Add") |
| | break; |
| |
|
| | if (node_reference[node7->output(0)] != 1) |
| | break; |
| |
|
| | if (node_reference[node8->output(0)] != 1) |
| | break; |
| |
|
| | if (node8->input(0) != node7->output(0) || node9->input(0) != node8->output(0)) |
| | break; |
| |
|
| | |
| | std::vector<float> affine_S = get_node_attr_from_input_af(weights[node8->input(1)]); |
| | std::vector<float> affine_B = get_node_attr_from_input_af(weights[node9->input(1)]); |
| | if (affine_S.size() != affine_B.size()) |
| | break; |
| |
|
| | affine = 1; |
| | break; |
| | } |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| | node4->set_op_type("noop_reducedncnn"); |
| | node5->set_op_type("noop_reducedncnn"); |
| | node6->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | node_reference[node2->input(0)] -= 1; |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node3->input(0)] -= 1; |
| | node_reference[node3->input(1)] -= 1; |
| | node_reference[node4->input(0)] -= 1; |
| | node_reference[node5->input(0)] -= 1; |
| | node_reference[node5->input(1)] -= 1; |
| | node_reference[node6->input(0)] -= 1; |
| | node_reference[node7->input(0)] -= 1; |
| | node_reference[node7->input(1)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | blob_names.erase(node3->output(0)); |
| | blob_names.erase(node4->output(0)); |
| | blob_names.erase(node5->output(0)); |
| | blob_names.erase(node6->output(0)); |
| |
|
| | node_reference[node->input(0)] += 1; |
| |
|
| | if (affine == 0) |
| | { |
| | node7->set_op_type("LayerNorm"); |
| | node7->clear_input(); |
| | node7->add_input(node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_eps = node7->add_attribute(); |
| | attr_eps->set_name("epsilon"); |
| | attr_eps->set_f(eps); |
| |
|
| | onnx::AttributeProto* attr_affine = node7->add_attribute(); |
| | attr_affine->set_name("affine"); |
| | attr_affine->set_i(affine); |
| |
|
| | reduced_node_count += 6; |
| | i += 6; |
| | } |
| | else |
| | { |
| | onnx::NodeProto* node8 = mutable_graph->mutable_node(i + 7); |
| | onnx::NodeProto* node9 = mutable_graph->mutable_node(i + 8); |
| |
|
| | node7->set_op_type("noop_reducedncnn"); |
| | node8->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node8->input(0)] -= 1; |
| | node_reference[node9->input(0)] -= 1; |
| |
|
| | blob_names.erase(node7->output(0)); |
| | blob_names.erase(node8->output(0)); |
| |
|
| | std::string affine_scale = node8->input(1); |
| | std::string affine_bias = node9->input(1); |
| |
|
| | node9->set_op_type("LayerNorm"); |
| | node9->clear_input(); |
| | node9->add_input(node->input(0)); |
| | node9->add_input(affine_scale); |
| | node9->add_input(affine_bias); |
| |
|
| | onnx::AttributeProto* attr_eps = node9->add_attribute(); |
| | attr_eps->set_name("epsilon"); |
| | attr_eps->set_f(eps); |
| |
|
| | onnx::AttributeProto* attr_affine = node9->add_attribute(); |
| | attr_affine->set_name("affine"); |
| | attr_affine->set_i(affine); |
| |
|
| | reduced_node_count += 8; |
| | i += 8; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_flatten(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Shape") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 6 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| | onnx::NodeProto* node5 = mutable_graph->mutable_node(i + 4); |
| | onnx::NodeProto* node6 = mutable_graph->mutable_node(i + 5); |
| | onnx::NodeProto* node7 = mutable_graph->mutable_node(i + 6); |
| |
|
| | if (node2->op_type() != "Gather" || node3->op_type() != "Constant" || node4->op_type() != "Unsqueeze" || node5->op_type() != "Unsqueeze" |
| | || node6->op_type() != "Concat" || node7->op_type() != "Reshape") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | |
| |
|
| | if (node_reference[node4->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node5->output(0)] != 1) |
| | continue; |
| |
|
| | if (node_reference[node6->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node4->input(0) != node2->output(0) || node5->input(0) != node3->output(0) |
| | || node6->input(0) != node4->output(0) || node6->input(1) != node5->output(0) |
| | || node7->input(0) != node->input(0) || node7->input(1) != node6->output(0)) |
| | continue; |
| |
|
| | |
| | int gather_axis = get_node_attr_i(*node2, "axis"); |
| | if (gather_axis != 0) |
| | continue; |
| |
|
| | |
| | if (weights.find(node2->input(1)) == weights.end()) |
| | continue; |
| |
|
| | std::vector<int> gather_indices = get_node_attr_from_input_ai(weights[node2->input(1)]); |
| | if (gather_indices.size() != 1 || gather_indices[0] != 0) |
| | continue; |
| |
|
| | |
| | std::vector<int> unsqueeze_axes = get_node_attr_ai(*node4, "axes"); |
| | if (unsqueeze_axes.size() != 1) |
| | continue; |
| | if (unsqueeze_axes[0] != 0) |
| | continue; |
| |
|
| | |
| | std::vector<int> unsqueeze2_axes = get_node_attr_ai(*node5, "axes"); |
| | if (unsqueeze2_axes.size() != 1) |
| | continue; |
| | if (unsqueeze2_axes[0] != 0) |
| | continue; |
| |
|
| | |
| | if (weights.find(node5->input(0)) == weights.end()) |
| | continue; |
| |
|
| | std::vector<int> unsqueeze2_data = get_node_attr_from_input_ai(weights[node5->input(0)]); |
| | if (unsqueeze2_data.size() != 1 || unsqueeze2_data[0] != -1) |
| | continue; |
| |
|
| | |
| | int concat_axis = get_node_attr_i(*node6, "axis"); |
| | if (concat_axis != 0) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | |
| | node4->set_op_type("noop_reducedncnn"); |
| | node5->set_op_type("noop_reducedncnn"); |
| | node6->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->input(1)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | |
| | node_reference[node4->output(0)] -= 1; |
| | node_reference[node5->input(0)] -= 1; |
| | node_reference[node5->output(0)] -= 1; |
| | node_reference[node6->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | |
| | blob_names.erase(node4->output(0)); |
| | blob_names.erase(node5->output(0)); |
| | blob_names.erase(node6->output(0)); |
| |
|
| | node7->set_op_type("Flatten"); |
| | node7->clear_input(); |
| | node7->add_input(node->input(0)); |
| |
|
| | reduced_node_count += 5; |
| | i += 5; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_pixelshuffle(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "Reshape") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | std::vector<int> shape; |
| | if (node->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape = get_node_attr_from_input_ai(weights[node->input(1)]); |
| | } |
| |
|
| | |
| | if (shape.size() != 6) |
| | continue; |
| |
|
| | if (shape[0] != 1 && shape[0] != -1) |
| | continue; |
| |
|
| | if (shape[2] != shape[3]) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node3->op_type() == "Constant") |
| | { |
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | node3 = mutable_graph->mutable_node(i + 3); |
| | } |
| |
|
| | if (node2->op_type() != "Transpose" || node3->op_type() != "Reshape") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm = get_node_attr_ai(*node2, "perm"); |
| | if (perm.size() != 6) |
| | continue; |
| |
|
| | if (perm[0] != 0 || perm[1] != 1 || perm[2] != 4 || perm[3] != 2 || perm[4] != 5 || perm[5] != 3) |
| | continue; |
| |
|
| | std::vector<int> shape3; |
| | if (node3->input_size() == 1) |
| | { |
| | shape3 = get_node_attr_ai(*node3, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape3 = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | } |
| |
|
| | |
| | if (shape3.size() != 4) |
| | continue; |
| |
|
| | if (shape3[0] != 1 && shape3[0] != -1) |
| | continue; |
| |
|
| | if (shape3[1] != shape[1] || shape3[2] != shape[2] * shape[4] || shape3[3] != shape[3] * shape[5]) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node3->set_op_type("PixelShuffle"); |
| | node3->set_input(0, node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_group = node3->add_attribute(); |
| | attr_group->set_name("scale_factor"); |
| | attr_group->set_i(shape[2]); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_reorg(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | if (node->op_type() == "Reshape") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | std::vector<int> shape; |
| | if (node->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape = get_node_attr_from_input_ai(weights[node->input(1)]); |
| | } |
| |
|
| | |
| | if (shape.size() != 6) |
| | continue; |
| |
|
| | if (shape[0] != 1 && shape[0] != -1) |
| | continue; |
| |
|
| | if (shape[3] != shape[5]) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node3->op_type() == "Constant") |
| | { |
| | if (i + 3 >= node_count) |
| | continue; |
| |
|
| | node3 = mutable_graph->mutable_node(i + 3); |
| | } |
| |
|
| | if (node2->op_type() != "Transpose" || node3->op_type() != "Reshape") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm = get_node_attr_ai(*node2, "perm"); |
| | if (perm.size() != 6) |
| | continue; |
| |
|
| | if (perm[0] != 0 || perm[1] != 1 || perm[2] != 3 || perm[3] != 5 || perm[4] != 2 || perm[5] != 4) |
| | continue; |
| |
|
| | std::vector<int> shape3; |
| | if (node3->input_size() == 1) |
| | { |
| | shape3 = get_node_attr_ai(*node3, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape3 = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | } |
| |
|
| | |
| | if (shape3.size() != 4) |
| | continue; |
| |
|
| | if (shape3[0] != 1 && shape3[0] != -1) |
| | continue; |
| |
|
| | if (shape3[1] != shape[1] * shape[3] * shape[5] || shape3[2] != shape[2] || shape3[3] != shape[4]) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node3->set_op_type("Reorg"); |
| | node3->set_input(0, node->input(0)); |
| |
|
| | onnx::AttributeProto* attr_group = node3->add_attribute(); |
| | attr_group->set_name("stride"); |
| | attr_group->set_i(shape[3]); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_expand_broadcast(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Expand") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "Add" && node2->op_type() != "Sub" && node2->op_type() != "Mul" && node2->op_type() != "Div" && node2->op_type() != "Min" && node2->op_type() != "Max") |
| | continue; |
| |
|
| | if (node2->input(1) != node->output(0) && node2->input(0) != node->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| | if (node->input_size() == 2) |
| | { |
| | node_reference[node->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | if (node2->input(0) == node->output(0)) |
| | { |
| | node2->set_input(0, node->input(0)); |
| | } |
| | else |
| | { |
| | node2->set_input(1, node->input(0)); |
| | } |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_lstm_gru_rnn(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "LSTM" || node->op_type() == "GRU" || node->op_type() == "RNN") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 2 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| |
|
| | if (node2->op_type() != "Transpose" || node3->op_type() != "Reshape") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node2->output(0)) |
| | continue; |
| |
|
| | std::string direction = get_node_attr_s(*node, "direction"); |
| | if (direction != "bidirectional") |
| | continue; |
| |
|
| | |
| | std::vector<int> perm = get_node_attr_ai(*node2, "perm"); |
| | if (perm.size() != 4) |
| | continue; |
| |
|
| | if (perm[0] != 0 || perm[1] != 2 || perm[2] != 1 || perm[3] != 3) |
| | continue; |
| |
|
| | std::vector<int> shape; |
| | if (node3->input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(*node3, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node3->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape = get_node_attr_from_input_ai(weights[node3->input(1)]); |
| | } |
| |
|
| | |
| | if (shape.size() != 3) |
| | continue; |
| |
|
| | if (shape[0] != 0 || shape[1] != 0 || shape[2] != -1) |
| | continue; |
| |
|
| | |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| | node_reference[node2->output(0)] -= 1; |
| | if (node3->input_size() == 2) |
| | { |
| | node_reference[node3->input(1)] -= 1; |
| | } |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| |
|
| | node->set_output(0, node3->output(0)); |
| |
|
| | reduced_node_count += 2; |
| | i += 2; |
| |
|
| | if (i + 1 < node_count) |
| | { |
| | if (node_reference[node3->output(0)] != 1) |
| | continue; |
| |
|
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node4->op_type() != "Transpose") |
| | continue; |
| |
|
| | if (node4->input(0) != node->output(0)) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm4 = get_node_attr_ai(*node4, "perm"); |
| | if (perm4.size() != 3) |
| | continue; |
| |
|
| | if (perm4[0] != 1 || perm4[1] != 0 || perm4[2] != 2) |
| | continue; |
| |
|
| | |
| | node4->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node->set_output(0, node4->output(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "LSTM" || node->op_type() == "GRU" || node->op_type() == "RNN") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "Squeeze") |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0)) |
| | continue; |
| |
|
| | std::string direction = get_node_attr_s(*node, "direction"); |
| | if (direction == "bidirectional") |
| | continue; |
| |
|
| | |
| | std::vector<int> axes = get_node_attr_ai(*node2, "axes"); |
| | if (axes.size() != 1) |
| | continue; |
| |
|
| | if (axes[0] != 1) |
| | continue; |
| |
|
| | |
| | node2->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node->set_output(0, node2->output(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| |
|
| | if (i + 1 < node_count) |
| | { |
| | if (node_reference[node2->output(0)] != 1) |
| | continue; |
| |
|
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node3->op_type() != "Transpose") |
| | continue; |
| |
|
| | if (node3->input(0) != node->output(0)) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm4 = get_node_attr_ai(*node3, "perm"); |
| | if (perm4.size() != 3) |
| | continue; |
| |
|
| | if (perm4[0] != 1 || perm4[1] != 0 || perm4[2] != 2) |
| | continue; |
| |
|
| | |
| | node3->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node->set_output(0, node3->output(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Transpose") |
| | { |
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm = get_node_attr_ai(*node, "perm"); |
| | if (perm.size() != 3) |
| | continue; |
| |
|
| | if (perm[0] != 1 || perm[1] != 0 || perm[2] != 2) |
| | continue; |
| |
|
| | if (i + 1 >= node_count) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| |
|
| | if (node2->op_type() != "LSTM" && node->op_type() != "GRU" && node->op_type() != "RNN") |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0)) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node->output(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| |
|
| | node2->set_input(0, node->input(0)); |
| |
|
| | reduced_node_count += 1; |
| | i += 1; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_multiheadattention(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if (node->op_type() == "MatMul") |
| | { |
| | if (i + 19 >= node_count) |
| | continue; |
| |
|
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| | onnx::NodeProto* node5 = mutable_graph->mutable_node(i + 4); |
| | onnx::NodeProto* node6 = mutable_graph->mutable_node(i + 5); |
| | onnx::NodeProto* node7 = mutable_graph->mutable_node(i + 6); |
| | onnx::NodeProto* node8 = mutable_graph->mutable_node(i + 7); |
| | onnx::NodeProto* node9 = mutable_graph->mutable_node(i + 8); |
| | onnx::NodeProto* node10 = mutable_graph->mutable_node(i + 9); |
| | onnx::NodeProto* node11 = mutable_graph->mutable_node(i + 10); |
| | onnx::NodeProto* node12 = mutable_graph->mutable_node(i + 11); |
| | onnx::NodeProto* node13 = mutable_graph->mutable_node(i + 12); |
| | onnx::NodeProto* node14 = mutable_graph->mutable_node(i + 13); |
| | onnx::NodeProto* node15 = mutable_graph->mutable_node(i + 14); |
| | onnx::NodeProto* node16 = mutable_graph->mutable_node(i + 15); |
| | onnx::NodeProto* node17 = mutable_graph->mutable_node(i + 16); |
| | onnx::NodeProto* node18 = mutable_graph->mutable_node(i + 17); |
| | onnx::NodeProto* node19 = mutable_graph->mutable_node(i + 18); |
| | onnx::NodeProto* node20 = mutable_graph->mutable_node(i + 19); |
| |
|
| | if (node2->op_type() != "Add" || node3->op_type() != "MatMul" || node4->op_type() != "Add" || node5->op_type() != "MatMul" || node6->op_type() != "Add" || node7->op_type() != "Mul" || node8->op_type() != "Reshape" || node9->op_type() != "Transpose" || node10->op_type() != "Reshape" || node11->op_type() != "Reshape" || node12->op_type() != "Transpose" || node13->op_type() != "Transpose" || node14->op_type() != "MatMul" || node15->op_type() != "Softmax" || node16->op_type() != "MatMul" || node17->op_type() != "Transpose" || node18->op_type() != "Reshape" || node19->op_type() != "MatMul" || node20->op_type() != "Add") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1 || node_reference[node3->output(0)] != 1 || node_reference[node4->output(0)] != 1 || node_reference[node5->output(0)] != 1 || node_reference[node6->output(0)] != 1 || node_reference[node7->output(0)] != 1 || node_reference[node8->output(0)] != 1 || node_reference[node9->output(0)] != 1 || node_reference[node10->output(0)] != 1 || node_reference[node11->output(0)] != 1 || node_reference[node12->output(0)] != 1 || node_reference[node13->output(0)] != 1 || node_reference[node14->output(0)] != 1 || node_reference[node15->output(0)] != 1 || node_reference[node16->output(0)] != 1 || node_reference[node17->output(0)] != 1 || node_reference[node18->output(0)] != 1 || node_reference[node19->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node4->input(0) != node3->output(0) || node6->input(0) != node5->output(0) || node7->input(0) != node2->output(0) || node8->input(0) != node7->output(0) || node9->input(0) != node8->output(0) || node10->input(0) != node4->output(0) || node11->input(0) != node6->output(0) || node12->input(0) != node11->output(0) || node13->input(0) != node10->output(0) || node14->input(0) != node9->output(0) || node14->input(1) != node13->output(0) || node15->input(0) != node14->output(0) || node16->input(0) != node15->output(0) || node16->input(1) != node12->output(0) || node17->input(0) != node16->output(0) || node18->input(0) != node17->output(0) || node19->input(0) != node18->output(0) || node20->input(0) != node19->output(0)) |
| | continue; |
| |
|
| | std::vector<float> q_B = get_node_attr_from_input_af(weights[node2->input(1)]); |
| | std::vector<float> k_B = get_node_attr_from_input_af(weights[node4->input(1)]); |
| | std::vector<float> v_B = get_node_attr_from_input_af(weights[node6->input(1)]); |
| | std::vector<float> o_B = get_node_attr_from_input_af(weights[node20->input(1)]); |
| |
|
| | if (q_B.size() != k_B.size() || q_B.size() != v_B.size() || q_B.size() != o_B.size()) |
| | continue; |
| |
|
| | int embed_dim = q_B.size(); |
| |
|
| | |
| | std::vector<int> perm9 = get_node_attr_ai(*node9, "perm"); |
| | std::vector<int> perm12 = get_node_attr_ai(*node12, "perm"); |
| | if (perm9.size() != 3 || perm12.size() != 3) |
| | continue; |
| |
|
| | if (perm9[0] != 1 || perm9[1] != 0 || perm9[2] != 2 || perm12[0] != 1 || perm12[1] != 0 || perm12[2] != 2) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm13 = get_node_attr_ai(*node13, "perm"); |
| | if (perm13.size() != 3) |
| | continue; |
| |
|
| | if (perm13[0] != 1 || perm13[1] != 2 || perm13[2] != 0) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm17 = get_node_attr_ai(*node17, "perm"); |
| | if (perm17.size() != 3) |
| | continue; |
| |
|
| | if (perm17[0] != 1 || perm17[1] != 0 || perm17[2] != 2) |
| | continue; |
| |
|
| | int softmax_axis = get_node_attr_i(*node15, "axis"); |
| | if (softmax_axis != 2) |
| | continue; |
| |
|
| | |
| | std::vector<int> shape8; |
| | std::vector<int> shape10; |
| | std::vector<int> shape11; |
| | if (node8->input_size() == 1) |
| | { |
| | shape8 = get_node_attr_ai(*node8, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node8->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape8 = get_node_attr_from_input_ai(weights[node8->input(1)]); |
| | } |
| | if (node10->input_size() == 1) |
| | { |
| | shape10 = get_node_attr_ai(*node10, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node10->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape10 = get_node_attr_from_input_ai(weights[node10->input(1)]); |
| | } |
| | if (node11->input_size() == 1) |
| | { |
| | shape11 = get_node_attr_ai(*node11, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node11->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape11 = get_node_attr_from_input_ai(weights[node11->input(1)]); |
| | } |
| |
|
| | if (shape8.size() != 3 || shape10.size() != 3 || shape11.size() != 3) |
| | continue; |
| |
|
| | if (shape8[1] != shape10[1] || shape8[1] != shape11[1] || shape8[2] != shape10[2] || shape8[2] != shape11[2]) |
| | continue; |
| |
|
| | int num_heads = embed_dim / shape8[2]; |
| |
|
| | |
| | std::vector<int> shape18; |
| | if (node18->input_size() == 1) |
| | { |
| | shape18 = get_node_attr_ai(*node18, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node18->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape18 = get_node_attr_from_input_ai(weights[node18->input(1)]); |
| | } |
| |
|
| | if (shape18.size() != 3) |
| | continue; |
| |
|
| | if (shape18[2] != embed_dim || shape18[1] * num_heads != shape8[1]) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| | node4->set_op_type("noop_reducedncnn"); |
| | node5->set_op_type("noop_reducedncnn"); |
| | node6->set_op_type("noop_reducedncnn"); |
| | node7->set_op_type("noop_reducedncnn"); |
| | node8->set_op_type("noop_reducedncnn"); |
| | node9->set_op_type("noop_reducedncnn"); |
| | node10->set_op_type("noop_reducedncnn"); |
| | node11->set_op_type("noop_reducedncnn"); |
| | node12->set_op_type("noop_reducedncnn"); |
| | node13->set_op_type("noop_reducedncnn"); |
| | node14->set_op_type("noop_reducedncnn"); |
| | node15->set_op_type("noop_reducedncnn"); |
| | node16->set_op_type("noop_reducedncnn"); |
| | node17->set_op_type("noop_reducedncnn"); |
| | node18->set_op_type("noop_reducedncnn"); |
| | node19->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node2->input(0)] -= 1; |
| | node_reference[node4->input(0)] -= 1; |
| | node_reference[node6->input(0)] -= 1; |
| | node_reference[node7->input(0)] -= 1; |
| | node_reference[node7->input(1)] -= 1; |
| | node_reference[node8->input(0)] -= 1; |
| | if (node8->input_size() == 2) |
| | { |
| | node_reference[node8->input(1)] -= 1; |
| | } |
| | node_reference[node9->input(0)] -= 1; |
| | node_reference[node10->input(0)] -= 1; |
| | if (node10->input_size() == 2) |
| | { |
| | node_reference[node10->input(1)] -= 1; |
| | } |
| | node_reference[node11->input(0)] -= 1; |
| | if (node11->input_size() == 2) |
| | { |
| | node_reference[node11->input(1)] -= 1; |
| | } |
| | node_reference[node12->input(0)] -= 1; |
| | node_reference[node13->input(0)] -= 1; |
| | node_reference[node14->input(0)] -= 1; |
| | node_reference[node14->input(1)] -= 1; |
| | node_reference[node15->input(0)] -= 1; |
| | node_reference[node16->input(0)] -= 1; |
| | node_reference[node16->input(1)] -= 1; |
| | node_reference[node17->input(0)] -= 1; |
| | node_reference[node18->input(0)] -= 1; |
| | if (node18->input_size() == 2) |
| | { |
| | node_reference[node18->input(1)] -= 1; |
| | } |
| | node_reference[node19->input(0)] -= 1; |
| | node_reference[node20->input(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | blob_names.erase(node3->output(0)); |
| | blob_names.erase(node4->output(0)); |
| | blob_names.erase(node5->output(0)); |
| | blob_names.erase(node6->output(0)); |
| | blob_names.erase(node7->output(0)); |
| | blob_names.erase(node8->output(0)); |
| | blob_names.erase(node9->output(0)); |
| | blob_names.erase(node10->output(0)); |
| | blob_names.erase(node11->output(0)); |
| | blob_names.erase(node12->output(0)); |
| | blob_names.erase(node13->output(0)); |
| | blob_names.erase(node14->output(0)); |
| | blob_names.erase(node15->output(0)); |
| | blob_names.erase(node16->output(0)); |
| | blob_names.erase(node17->output(0)); |
| | blob_names.erase(node18->output(0)); |
| | blob_names.erase(node19->output(0)); |
| |
|
| | std::string qw = node->input(1); |
| | std::string qb = node2->input(1); |
| | std::string kw = node3->input(1); |
| | std::string kb = node4->input(1); |
| | std::string vw = node5->input(1); |
| | std::string vb = node6->input(1); |
| | std::string ow = node19->input(1); |
| | std::string ob = node20->input(1); |
| |
|
| | node20->set_op_type("MultiHeadAttention"); |
| | node20->clear_input(); |
| | node20->add_input(node->input(0)); |
| | node20->add_input(node3->input(0)); |
| | node20->add_input(node5->input(0)); |
| | |
| | node20->add_input(qw); |
| | node20->add_input(qb); |
| | |
| | node20->add_input(kw); |
| | node20->add_input(kb); |
| | |
| | node20->add_input(vw); |
| | node20->add_input(vb); |
| | |
| | node20->add_input(ow); |
| | node20->add_input(ob); |
| |
|
| | onnx::AttributeProto* attr_embed_dim = node20->add_attribute(); |
| | attr_embed_dim->set_name("embed_dim"); |
| | attr_embed_dim->set_i(embed_dim); |
| |
|
| | onnx::AttributeProto* attr_num_heads = node20->add_attribute(); |
| | attr_num_heads->set_name("num_heads"); |
| | attr_num_heads->set_i(num_heads); |
| |
|
| | reduced_node_count += 19; |
| | i += 19; |
| | } |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if (node->op_type() == "MatMul") |
| | { |
| | if (i + 16 >= node_count) |
| | continue; |
| |
|
| | if (node_reference[node->output(0)] != 1) |
| | continue; |
| |
|
| | onnx::NodeProto* node2 = mutable_graph->mutable_node(i + 1); |
| | onnx::NodeProto* node3 = mutable_graph->mutable_node(i + 2); |
| | onnx::NodeProto* node4 = mutable_graph->mutable_node(i + 3); |
| | onnx::NodeProto* node5 = mutable_graph->mutable_node(i + 4); |
| | onnx::NodeProto* node6 = mutable_graph->mutable_node(i + 5); |
| | onnx::NodeProto* node7 = mutable_graph->mutable_node(i + 6); |
| | onnx::NodeProto* node8 = mutable_graph->mutable_node(i + 7); |
| | onnx::NodeProto* node9 = mutable_graph->mutable_node(i + 8); |
| | onnx::NodeProto* node10 = mutable_graph->mutable_node(i + 9); |
| | onnx::NodeProto* node11 = mutable_graph->mutable_node(i + 10); |
| | onnx::NodeProto* node12 = mutable_graph->mutable_node(i + 11); |
| | onnx::NodeProto* node13 = mutable_graph->mutable_node(i + 12); |
| | onnx::NodeProto* node14 = mutable_graph->mutable_node(i + 13); |
| | onnx::NodeProto* node15 = mutable_graph->mutable_node(i + 14); |
| | onnx::NodeProto* node16 = mutable_graph->mutable_node(i + 15); |
| | onnx::NodeProto* node17 = mutable_graph->mutable_node(i + 16); |
| |
|
| | if (node2->op_type() != "Add" || node3->op_type() != "Split" || node4->op_type() != "Mul" || node5->op_type() != "Reshape" || node6->op_type() != "Transpose" || node7->op_type() != "Reshape" || node8->op_type() != "Reshape" || node9->op_type() != "Transpose" || node10->op_type() != "Transpose" || node11->op_type() != "MatMul" || node12->op_type() != "Softmax" || node13->op_type() != "MatMul" || node14->op_type() != "Transpose" || node15->op_type() != "Reshape" || node16->op_type() != "MatMul" || node17->op_type() != "Add") |
| | continue; |
| |
|
| | if (node_reference[node2->output(0)] != 1 || node_reference[node3->output(0)] != 1 || node_reference[node3->output(1)] != 1 || node_reference[node3->output(2)] != 1 || node_reference[node4->output(0)] != 1 || node_reference[node5->output(0)] != 1 || node_reference[node6->output(0)] != 1 || node_reference[node7->output(0)] != 1 || node_reference[node8->output(0)] != 1 || node_reference[node9->output(0)] != 1 || node_reference[node10->output(0)] != 1 || node_reference[node11->output(0)] != 1 || node_reference[node12->output(0)] != 1 || node_reference[node13->output(0)] != 1 || node_reference[node14->output(0)] != 1 || node_reference[node15->output(0)] != 1 || node_reference[node16->output(0)] != 1) |
| | continue; |
| |
|
| | if (node2->input(0) != node->output(0) || node3->input(0) != node2->output(0) || node4->input(0) != node3->output(0) || node5->input(0) != node4->output(0) || node6->input(0) != node5->output(0) || node7->input(0) != node3->output(1) || node8->input(0) != node3->output(2) || node9->input(0) != node8->output(0) || node10->input(0) != node7->output(0) || node11->input(0) != node6->output(0) || node11->input(1) != node10->output(0) || node12->input(0) != node11->output(0) || node13->input(0) != node12->output(0) || node13->input(1) != node9->output(0) || node14->input(0) != node13->output(0) || node15->input(0) != node14->output(0) || node16->input(0) != node15->output(0) || node17->input(0) != node16->output(0)) |
| | continue; |
| |
|
| | std::vector<float> qkv_B = get_node_attr_from_input_af(weights[node2->input(1)]); |
| | std::vector<float> o_B = get_node_attr_from_input_af(weights[node17->input(1)]); |
| |
|
| | if (qkv_B.size() != o_B.size() * 3) |
| | continue; |
| |
|
| | int embed_dim = o_B.size(); |
| |
|
| | |
| | std::vector<int> perm6 = get_node_attr_ai(*node6, "perm"); |
| | std::vector<int> perm9 = get_node_attr_ai(*node9, "perm"); |
| | if (perm6.size() != 3 || perm9.size() != 3) |
| | continue; |
| |
|
| | if (perm6[0] != 1 || perm6[1] != 0 || perm6[2] != 2 || perm9[0] != 1 || perm9[1] != 0 || perm9[2] != 2) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm10 = get_node_attr_ai(*node10, "perm"); |
| | if (perm10.size() != 3) |
| | continue; |
| |
|
| | if (perm10[0] != 1 || perm10[1] != 2 || perm10[2] != 0) |
| | continue; |
| |
|
| | |
| | std::vector<int> perm14 = get_node_attr_ai(*node14, "perm"); |
| | if (perm14.size() != 3) |
| | continue; |
| |
|
| | if (perm14[0] != 1 || perm14[1] != 0 || perm14[2] != 2) |
| | continue; |
| |
|
| | int softmax_axis = get_node_attr_i(*node12, "axis"); |
| | if (softmax_axis != 2) |
| | continue; |
| |
|
| | |
| | std::vector<int> shape5; |
| | std::vector<int> shape7; |
| | std::vector<int> shape8; |
| | if (node5->input_size() == 1) |
| | { |
| | shape5 = get_node_attr_ai(*node5, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node5->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape5 = get_node_attr_from_input_ai(weights[node5->input(1)]); |
| | } |
| | if (node7->input_size() == 1) |
| | { |
| | shape7 = get_node_attr_ai(*node7, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node7->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape7 = get_node_attr_from_input_ai(weights[node7->input(1)]); |
| | } |
| | if (node8->input_size() == 1) |
| | { |
| | shape8 = get_node_attr_ai(*node8, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node8->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape8 = get_node_attr_from_input_ai(weights[node8->input(1)]); |
| | } |
| |
|
| | if (shape5.size() != 3 || shape7.size() != 3 || shape8.size() != 3) |
| | continue; |
| |
|
| | if (shape5[1] != shape7[1] || shape5[1] != shape8[1] || shape5[2] != shape7[2] || shape5[2] != shape8[2]) |
| | continue; |
| |
|
| | int num_heads = embed_dim / shape5[2]; |
| |
|
| | |
| | std::vector<int> shape15; |
| | if (node15->input_size() == 1) |
| | { |
| | shape15 = get_node_attr_ai(*node15, "shape"); |
| | } |
| | else |
| | { |
| | |
| | if (weights.find(node15->input(1)) == weights.end()) |
| | continue; |
| |
|
| | shape15 = get_node_attr_from_input_ai(weights[node15->input(1)]); |
| | } |
| |
|
| | if (shape15.size() != 3) |
| | continue; |
| |
|
| | if (shape15[2] != embed_dim || shape15[1] * num_heads != shape8[1]) |
| | continue; |
| |
|
| | |
| | node->set_op_type("noop_reducedncnn"); |
| | node2->set_op_type("noop_reducedncnn"); |
| | node3->set_op_type("noop_reducedncnn"); |
| | node4->set_op_type("noop_reducedncnn"); |
| | node5->set_op_type("noop_reducedncnn"); |
| | node6->set_op_type("noop_reducedncnn"); |
| | node7->set_op_type("noop_reducedncnn"); |
| | node8->set_op_type("noop_reducedncnn"); |
| | node9->set_op_type("noop_reducedncnn"); |
| | node10->set_op_type("noop_reducedncnn"); |
| | node11->set_op_type("noop_reducedncnn"); |
| | node12->set_op_type("noop_reducedncnn"); |
| | node13->set_op_type("noop_reducedncnn"); |
| | node14->set_op_type("noop_reducedncnn"); |
| | node15->set_op_type("noop_reducedncnn"); |
| | node16->set_op_type("noop_reducedncnn"); |
| |
|
| | node_reference[node2->input(0)] -= 1; |
| | node_reference[node3->input(0)] -= 1; |
| | node_reference[node4->input(0)] -= 1; |
| | node_reference[node4->input(1)] -= 1; |
| | node_reference[node5->input(0)] -= 1; |
| | if (node5->input_size() == 2) |
| | { |
| | node_reference[node5->input(1)] -= 1; |
| | } |
| | node_reference[node6->input(0)] -= 1; |
| | node_reference[node7->input(0)] -= 1; |
| | if (node7->input_size() == 2) |
| | { |
| | node_reference[node7->input(1)] -= 1; |
| | } |
| | node_reference[node8->input(0)] -= 1; |
| | if (node8->input_size() == 2) |
| | { |
| | node_reference[node8->input(1)] -= 1; |
| | } |
| | node_reference[node9->input(0)] -= 1; |
| | node_reference[node10->input(0)] -= 1; |
| | node_reference[node11->input(0)] -= 1; |
| | node_reference[node11->input(1)] -= 1; |
| | node_reference[node12->input(0)] -= 1; |
| | node_reference[node13->input(0)] -= 1; |
| | node_reference[node13->input(1)] -= 1; |
| | node_reference[node14->input(0)] -= 1; |
| | node_reference[node15->input(0)] -= 1; |
| | if (node15->input_size() == 2) |
| | { |
| | node_reference[node15->input(1)] -= 1; |
| | } |
| | node_reference[node16->input(0)] -= 1; |
| | node_reference[node17->input(0)] -= 1; |
| |
|
| | blob_names.erase(node->output(0)); |
| | blob_names.erase(node2->output(0)); |
| | blob_names.erase(node3->output(0)); |
| | blob_names.erase(node3->output(1)); |
| | blob_names.erase(node3->output(2)); |
| | blob_names.erase(node4->output(0)); |
| | blob_names.erase(node5->output(0)); |
| | blob_names.erase(node6->output(0)); |
| | blob_names.erase(node7->output(0)); |
| | blob_names.erase(node8->output(0)); |
| | blob_names.erase(node9->output(0)); |
| | blob_names.erase(node10->output(0)); |
| | blob_names.erase(node11->output(0)); |
| | blob_names.erase(node12->output(0)); |
| | blob_names.erase(node13->output(0)); |
| | blob_names.erase(node14->output(0)); |
| | blob_names.erase(node15->output(0)); |
| | blob_names.erase(node16->output(0)); |
| |
|
| | std::string qkvw = node->input(1); |
| | std::string qkvb = node2->input(1); |
| | std::string ow = node16->input(1); |
| | std::string ob = node17->input(1); |
| |
|
| | node17->set_op_type("MultiHeadAttention"); |
| | node17->clear_input(); |
| | node17->add_input(node->input(0)); |
| | |
| | node17->add_input(qkvw); |
| | node17->add_input(qkvb); |
| | |
| | node17->add_input(ow); |
| | node17->add_input(ob); |
| |
|
| | onnx::AttributeProto* attr_embed_dim = node17->add_attribute(); |
| | attr_embed_dim->set_name("embed_dim"); |
| | attr_embed_dim->set_i(embed_dim); |
| |
|
| | onnx::AttributeProto* attr_num_heads = node17->add_attribute(); |
| | attr_num_heads->set_name("num_heads"); |
| | attr_num_heads->set_i(num_heads); |
| |
|
| | reduced_node_count += 16; |
| | i += 16; |
| | } |
| | } |
| | } |
| |
|
| | static void fuse_binaryop_with_scalar(onnx::GraphProto* mutable_graph, std::map<std::string, onnx::TensorProto>& weights, std::map<std::string, int>& node_reference, std::set<std::string>& blob_names, int& reduced_node_count) |
| | { |
| | int node_count = mutable_graph->node_size(); |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Add" || node->op_type() == "Sub" || node->op_type() == "Mul" || node->op_type() == "Div" || node->op_type() == "Max" || node->op_type() == "Min" || node->op_type() == "Pow") |
| | { |
| | if (weights.find(node->input(0)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& scalar_b = weights[node->input(0)]; |
| | if (scalar_b.dims_size() != 0 || get_tensor_proto_data_size(scalar_b) != 1) |
| | continue; |
| |
|
| | if (node->op_type() == "Sub") |
| | { |
| | node->set_op_type("RSub"); |
| | } |
| | else if (node->op_type() == "Div") |
| | { |
| | node->set_op_type("RDiv"); |
| | } |
| |
|
| | float b = get_node_attr_from_input_f(scalar_b); |
| |
|
| | node_reference[node->input(0)] -= 1; |
| |
|
| | std::string input = node->input(1); |
| |
|
| | node->clear_input(); |
| | node->add_input(input); |
| |
|
| | onnx::AttributeProto* attr_with_scalar = node->add_attribute(); |
| | attr_with_scalar->set_name("with_scalar"); |
| | attr_with_scalar->set_i(1); |
| |
|
| | onnx::AttributeProto* attr_b = node->add_attribute(); |
| | attr_b->set_name("b"); |
| | attr_b->set_f(b); |
| | } |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | |
| | if (node->op_type() == "Add" || node->op_type() == "Sub" || node->op_type() == "Mul" || node->op_type() == "Div" || node->op_type() == "Max" || node->op_type() == "Min" || node->op_type() == "Pow") |
| | { |
| | if (weights.find(node->input(1)) == weights.end()) |
| | continue; |
| |
|
| | const onnx::TensorProto& scalar_b = weights[node->input(1)]; |
| | if (scalar_b.dims_size() != 0 || get_tensor_proto_data_size(scalar_b) != 1) |
| | continue; |
| |
|
| | float b = get_node_attr_from_input_f(scalar_b); |
| |
|
| | node_reference[node->input(1)] -= 1; |
| |
|
| | std::string input = node->input(0); |
| |
|
| | node->clear_input(); |
| | node->add_input(input); |
| |
|
| | onnx::AttributeProto* attr_with_scalar = node->add_attribute(); |
| | attr_with_scalar->set_name("with_scalar"); |
| | attr_with_scalar->set_i(1); |
| |
|
| | onnx::AttributeProto* attr_b = node->add_attribute(); |
| | attr_b->set_name("b"); |
| | attr_b->set_f(b); |
| | } |
| | } |
| | } |
| |
|
| | |
| | static std::string trunc_name(std::string name) |
| | { |
| | static int trunc_idx = 0; |
| | static std::map<std::string, std::string> name_trunc_map; |
| |
|
| | const int max_len = 255; |
| | if (name.size() <= max_len) |
| | { |
| | return name; |
| | } |
| | if (name_trunc_map.count(name)) |
| | { |
| | return name_trunc_map[name]; |
| | } |
| |
|
| | std::string concat_name = name + "_t" + std::to_string(trunc_idx); |
| | std::string trunc_name = concat_name.substr(concat_name.size() - max_len); |
| | trunc_idx += 1; |
| | name_trunc_map[name] = trunc_name; |
| |
|
| | return trunc_name; |
| | } |
| |
|
| | int main(int argc, char** argv) |
| | { |
| | if (!(argc == 2 || argc == 4)) |
| | { |
| | fprintf(stderr, "Usage: %s [onnxpb] [ncnnparam] [ncnnbin]\n", argv[0]); |
| | return -1; |
| | } |
| |
|
| | const char* onnxpb = argv[1]; |
| | const char* ncnn_prototxt = argc == 4 ? argv[2] : "ncnn.param"; |
| | const char* ncnn_modelbin = argc == 4 ? argv[3] : "ncnn.bin"; |
| |
|
| | onnx::ModelProto model; |
| |
|
| | |
| | bool s1 = read_proto_from_binary(onnxpb, &model); |
| | if (!s1) |
| | { |
| | fprintf(stderr, "read_proto_from_binary failed\n"); |
| | return -1; |
| | } |
| |
|
| | FILE* pp = fopen(ncnn_prototxt, "wb"); |
| | FILE* bp = fopen(ncnn_modelbin, "wb"); |
| |
|
| | |
| | fprintf(pp, "7767517\n"); |
| |
|
| | const onnx::GraphProto& graph = model.graph(); |
| | onnx::GraphProto* mutable_graph = model.mutable_graph(); |
| |
|
| | int node_count = graph.node_size(); |
| |
|
| | |
| | std::map<std::string, int> node_reference; |
| |
|
| | |
| | std::map<std::string, onnx::TensorProto> weights; |
| |
|
| | for (int j = 0; j < graph.initializer_size(); j++) |
| | { |
| | const onnx::TensorProto& initializer = graph.initializer(j); |
| |
|
| | |
| |
|
| | weights[initializer.name()] = initializer; |
| | } |
| |
|
| | |
| | { |
| | |
| | std::set<std::string> producers; |
| | for (int j = 0; j < graph.input_size(); j++) |
| | { |
| | const std::string& input_name = graph.input(j).name(); |
| | producers.insert(input_name); |
| | } |
| |
|
| | for (int i = 0; i < node_count;) |
| | { |
| | onnx::NodeProto* node = mutable_graph->mutable_node(i); |
| |
|
| | bool swapnode = false; |
| | std::string missing_input_name; |
| | for (int j = 0; j < (int)node->input_size(); j++) |
| | { |
| | const std::string& input_name = node->input(j); |
| | if (input_name.empty()) |
| | continue; |
| |
|
| | if (producers.find(input_name) == producers.end() && weights.find(input_name) == weights.end()) |
| | { |
| | swapnode = true; |
| | missing_input_name = input_name; |
| | break; |
| | } |
| | } |
| |
|
| | if (!swapnode) |
| | { |
| | for (int j = 0; j < (int)node->output_size(); j++) |
| | { |
| | const std::string& output_name = node->output(j); |
| | if (output_name.empty()) |
| | continue; |
| |
|
| | producers.insert(output_name); |
| | } |
| |
|
| | i++; |
| | continue; |
| | } |
| |
|
| | |
| | int q = i + 1; |
| | for (; q < node_count; q++) |
| | { |
| | onnx::NodeProto* nodeq = mutable_graph->mutable_node(q); |
| | bool found = false; |
| | for (int j = 0; j < (int)nodeq->output_size(); j++) |
| | { |
| | const std::string& output_name = nodeq->output(j); |
| | if (output_name == missing_input_name) |
| | { |
| | found = true; |
| | break; |
| | } |
| | } |
| |
|
| | if (found) |
| | break; |
| | } |
| |
|
| | if (q == node_count) |
| | { |
| | fprintf(stderr, "cannot find node produces %s but node %d requires it\n", missing_input_name.c_str(), i); |
| | return -1; |
| | } |
| |
|
| | |
| | |
| | onnx::NodeProto* nodeq = mutable_graph->mutable_node(q); |
| | onnx::NodeProto tmp = *node; |
| | *node = *nodeq; |
| | *nodeq = tmp; |
| | } |
| | } |
| |
|
| | |
| | |
| | std::set<std::string> blob_names; |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | const onnx::NodeProto& node = graph.node(i); |
| |
|
| | const std::string& op = node.op_type(); |
| |
|
| | std::string name = node.name(); |
| | if (name.empty()) |
| | { |
| | name = node.output(0); |
| | } |
| |
|
| | if (op == "Constant") |
| | { |
| | onnx::TensorProto tensor = get_node_attr_tensor(node, "value"); |
| | weights[node.output(0)] = tensor; |
| | } |
| |
|
| | for (int j = 0; j < (int)node.input_size(); j++) |
| | { |
| | const std::string& input_name = node.input(j); |
| |
|
| | blob_names.insert(input_name); |
| |
|
| | if (node_reference.find(input_name) == node_reference.end()) |
| | { |
| | node_reference[input_name] = 1; |
| | } |
| | else |
| | { |
| | node_reference[input_name] = node_reference[input_name] + 1; |
| | } |
| | } |
| |
|
| | if (op == "Dropout") |
| | { |
| | const std::string& output_name = node.output(0); |
| | blob_names.insert(output_name); |
| | node_reference[output_name] = 0; |
| | continue; |
| | } |
| |
|
| | for (int j = 0; j < (int)node.output_size(); j++) |
| | { |
| | const std::string& output_name = node.output(j); |
| |
|
| | blob_names.insert(output_name); |
| |
|
| | node_reference[output_name] = 0; |
| | } |
| | } |
| |
|
| | |
| | int input_node_count = 0; |
| | for (int j = 0; j < graph.input_size(); j++) |
| | { |
| | const std::string& input_name = graph.input(j).name(); |
| |
|
| | |
| | if (weights.find(input_name) != weights.end()) |
| | continue; |
| |
|
| | blob_names.insert(input_name); |
| |
|
| | input_node_count++; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | int reduced_node_count = 0; |
| | fuse_weight_reshape(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_weight_transpose(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_shufflechannel(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_shufflechannel_split(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_hardsigmoid(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_hardswish(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_swish(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_batchnorm1d_squeeze_unsqueeze(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_unsqueeze_prelu(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_normalize(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_groupnorm(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_layernorm(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_flatten(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_pixelshuffle(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_reorg(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_expand_broadcast(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_lstm_gru_rnn(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_multiheadattention(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_binaryop_with_scalar(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| | fuse_rewrite_gather(mutable_graph, weights, node_reference, blob_names, reduced_node_count); |
| |
|
| | |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | const onnx::NodeProto& node = graph.node(i); |
| |
|
| | const std::string& op = node.op_type(); |
| |
|
| | if (op == "BatchNormalization") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | node_reference[node.input(3)] -= 1; |
| | node_reference[node.input(4)] -= 1; |
| | } |
| | else if (op == "BiasGelu") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | else if (op == "Clip") |
| | { |
| | if (node.input_size() == 3) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "Conv") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | if (node.input_size() == 3) |
| | { |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "ConvTranspose") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | if (node.input_size() == 3) |
| | { |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "EmbedLayerNormalization") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | node_reference[node.input(3)] -= 1; |
| | node_reference[node.input(4)] -= 1; |
| | node_reference[node.input(5)] -= 1; |
| | node_reference[node.input(6)] -= 1; |
| | } |
| | else if (op == "Gemm") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.f); |
| | float beta = get_node_attr_f(node, "beta", 1.f); |
| | int transA = get_node_attr_i(node, "transA", 0); |
| | int transB = get_node_attr_i(node, "transB", 0); |
| |
|
| | if (alpha == 1.f && beta == 1.f && transA == 0 && transB == 1) |
| | { |
| | |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "GroupNorm") |
| | { |
| | int affine = get_node_attr_i(node, "affine", 1); |
| | if (affine) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "GRU") |
| | { |
| | for (int j = 1; j < node.input_size(); j++) |
| | { |
| | node_reference[node.input(j)] -= 1; |
| | } |
| | } |
| | else if (op == "InstanceNormalization") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | else if (op == "LayerNorm") |
| | { |
| | int affine = get_node_attr_i(node, "affine", 1); |
| | if (affine) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | } |
| | } |
| | else if (op == "LSTM") |
| | { |
| | for (int j = 1; j < node.input_size(); j++) |
| | { |
| | node_reference[node.input(j)] -= 1; |
| | } |
| | } |
| | else if (op == "MatMul") |
| | { |
| | if (weights.find(node.input(1)) != weights.end() && weights[node.input(1)].dims_size() == 2) |
| | { |
| | |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | } |
| | else if (op == "MultiHeadAttention") |
| | { |
| | if (node.input_size() == 5) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | node_reference[node.input(3)] -= 1; |
| | node_reference[node.input(4)] -= 1; |
| | } |
| | else |
| | { |
| | node_reference[node.input(3)] -= 1; |
| | node_reference[node.input(4)] -= 1; |
| | node_reference[node.input(5)] -= 1; |
| | node_reference[node.input(6)] -= 1; |
| | node_reference[node.input(7)] -= 1; |
| | node_reference[node.input(8)] -= 1; |
| | node_reference[node.input(9)] -= 1; |
| | node_reference[node.input(10)] -= 1; |
| | } |
| | } |
| | else if (op == "Pad") |
| | { |
| | if (node.input_size() >= 2) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | } |
| | else if (op == "PRelu") |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | else if (op == "Reshape") |
| | { |
| | if (node.input_size() >= 2) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | } |
| | else if (op == "Resize") |
| | { |
| | if (node.input_size() == 2) |
| | { |
| | |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | else |
| | { |
| | |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | if (node.input_size() >= 4) |
| | { |
| | node_reference[node.input(3)] -= 1; |
| | } |
| | } |
| | } |
| | else if (op == "RNN") |
| | { |
| | for (int j = 1; j < node.input_size(); j++) |
| | { |
| | node_reference[node.input(j)] -= 1; |
| | } |
| | } |
| | else if (op == "SkipLayerNormalization") |
| | { |
| | node_reference[node.input(2)] -= 1; |
| | node_reference[node.input(3)] -= 1; |
| | node_reference[node.input(4)] -= 1; |
| | } |
| | else if (op == "Slice") |
| | { |
| | if (node.input_size() >= 2) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | node_reference[node.input(2)] -= 1; |
| | if (node.input_size() >= 4) |
| | node_reference[node.input(3)] -= 1; |
| | if (node.input_size() >= 5) |
| | node_reference[node.input(4)] -= 1; |
| | } |
| | } |
| | else if (op == "Upsample") |
| | { |
| | if (node.input_size() >= 2) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | } |
| | else if (op == "adaptive_avg_pool2d" || op == "adaptive_max_pool2d") |
| | { |
| | if (node.input_size() >= 2) |
| | { |
| | node_reference[node.input(1)] -= 1; |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | int zero_reference_weight_node_count = 0; |
| | for (std::map<std::string, onnx::TensorProto>::iterator it = weights.begin(); it != weights.end(); it++) |
| | { |
| | const std::string& input_name = it->first; |
| |
|
| | |
| | |
| | blob_names.insert(input_name); |
| |
|
| | int refcount = node_reference[input_name]; |
| | if (refcount == 0) |
| | zero_reference_weight_node_count++; |
| | } |
| |
|
| | |
| | |
| | int constant_node_count_moved_to_weight = 0; |
| | for (int i = 0; i < node_count; i++) |
| | { |
| | const onnx::NodeProto& node = graph.node(i); |
| |
|
| | const std::string& op = node.op_type(); |
| |
|
| | if (op == "Constant") |
| | { |
| | constant_node_count_moved_to_weight++; |
| | } |
| | } |
| |
|
| | |
| | |
| | blob_names.erase(""); |
| | node_reference.erase(""); |
| |
|
| | |
| | int split_layer_count = 0; |
| | int splitncnn_blob_count = 0; |
| | |
| | std::map<std::string, int> split_node_reference; |
| | for (std::map<std::string, int>::iterator it = node_reference.begin(); it != node_reference.end(); it++) |
| | { |
| | if (it->second > 1) |
| | { |
| | split_layer_count++; |
| | splitncnn_blob_count += it->second; |
| |
|
| | split_node_reference[it->first] = it->second; |
| | } |
| | } |
| |
|
| | fprintf(pp, "%zu %zu\n", node_count - constant_node_count_moved_to_weight + weights.size() - zero_reference_weight_node_count - reduced_node_count + input_node_count + split_layer_count, blob_names.size() - zero_reference_weight_node_count + splitncnn_blob_count); |
| |
|
| | int internal_split = 0; |
| |
|
| | |
| | for (int j = 0; j < graph.input_size(); j++) |
| | { |
| | const std::string& input_name = graph.input(j).name(); |
| |
|
| | |
| | if (weights.find(input_name) != weights.end()) |
| | continue; |
| |
|
| | fprintf(pp, "%-16s %-24s 0 1 %s\n", "Input", trunc_name(input_name).c_str(), trunc_name(input_name).c_str()); |
| |
|
| | int refcount = node_reference[input_name]; |
| | if (refcount <= 1) |
| | { |
| | continue; |
| | } |
| |
|
| | char splitname[256]; |
| | sprintf(splitname, "splitncnn_input%d", j); |
| | fprintf(pp, "%-16s %-24s %d %d", "Split", splitname, 1, refcount); |
| | fprintf(pp, " %s", trunc_name(input_name).c_str()); |
| |
|
| | for (int k = 0; k < refcount; k++) |
| | { |
| | std::string split_name = input_name + "_splitncnn_" + std::to_string(k); |
| | fprintf(pp, " %s", trunc_name(split_name).c_str()); |
| | } |
| | fprintf(pp, "\n"); |
| | } |
| |
|
| | |
| | for (std::map<std::string, onnx::TensorProto>::iterator weight_it = weights.begin(); weight_it != weights.end(); weight_it++) |
| | { |
| | const std::string& input_name = weight_it->first; |
| |
|
| | int refcount = node_reference[input_name]; |
| | if (refcount == 0) |
| | { |
| | continue; |
| | } |
| |
|
| | fprintf(pp, "%-16s %-24s 0 1 %s", "MemoryData", trunc_name(input_name).c_str(), trunc_name(input_name).c_str()); |
| |
|
| | const onnx::TensorProto& M = weights[input_name]; |
| |
|
| | if (M.dims_size() == 0) |
| | { |
| | fprintf(pp, " 0=%d", get_tensor_proto_data_size(M)); |
| | } |
| | else if (M.dims_size() == 1) |
| | { |
| | fprintf(pp, " 0=%d", (int)M.dims(0)); |
| | } |
| | else if (M.dims_size() == 2) |
| | { |
| | fprintf(pp, " 0=%d", (int)M.dims(1)); |
| | if (M.dims(0) != 1) |
| | { |
| | fprintf(pp, " 1=%d", (int)M.dims(0)); |
| | } |
| | } |
| | else if (M.dims_size() == 3) |
| | { |
| | fprintf(pp, " 0=%d", (int)M.dims(2)); |
| | fprintf(pp, " 1=%d", (int)M.dims(1)); |
| | if (M.dims(0) != 1) |
| | { |
| | fprintf(pp, " 2=%d", (int)M.dims(0)); |
| | } |
| | } |
| | else if (M.dims_size() == 4) |
| | { |
| | fprintf(pp, " 0=%d", (int)M.dims(3)); |
| | fprintf(pp, " 1=%d", (int)M.dims(2)); |
| | fprintf(pp, " 2=%d", (int)M.dims(1)); |
| | } |
| |
|
| | fprintf(pp, "\n"); |
| |
|
| | fwrite_tensor_proto_data(M, bp); |
| |
|
| | if (refcount <= 1) |
| | { |
| | continue; |
| | } |
| |
|
| | char splitname[256]; |
| | sprintf(splitname, "splitncnn_%d", internal_split); |
| | fprintf(pp, "%-16s %-24s %d %d", "Split", splitname, 1, refcount); |
| |
|
| | fprintf(pp, " %s", trunc_name(input_name).c_str()); |
| |
|
| | for (int k = 0; k < refcount; k++) |
| | { |
| | std::string split_name = input_name + "_splitncnn_" + std::to_string(k); |
| | fprintf(pp, " %s", trunc_name(split_name).c_str()); |
| | } |
| | fprintf(pp, "\n"); |
| |
|
| | internal_split++; |
| | } |
| |
|
| | for (int i = 0; i < node_count; i++) |
| | { |
| | const onnx::NodeProto& node = graph.node(i); |
| |
|
| | const std::string& op = node.op_type(); |
| |
|
| | |
| |
|
| | if (op == "noop_reducedncnn") |
| | { |
| | continue; |
| | } |
| |
|
| | std::string name = node.name(); |
| | if (name.empty()) |
| | { |
| | name = node.output(0); |
| | } |
| |
|
| | int input_size = node.input_size(); |
| | int output_size = node.output_size(); |
| |
|
| | for (int j = 0; j < (int)node.input_size(); j++) |
| | { |
| | const std::string& input_name = node.input(j); |
| |
|
| | |
| | if (weights.find(input_name) != weights.end() && node_reference[input_name] == 0) |
| | { |
| | input_size--; |
| | } |
| |
|
| | if (input_name.empty()) |
| | { |
| | input_size--; |
| | } |
| |
|
| | |
| | } |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if (op == "Abs") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Acos") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Add") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "Asin") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Atan") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "AveragePool" || op == "MaxPool") |
| | { |
| | std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); |
| | if (kernel_shape.size() == 1) |
| | { |
| | fprintf(pp, "%-16s", "Pooling1D"); |
| | } |
| | else |
| | { |
| | fprintf(pp, "%-16s", "Pooling"); |
| | } |
| | } |
| | else if (op == "BatchNormalization") |
| | { |
| | fprintf(pp, "%-16s", "BatchNorm"); |
| | } |
| | else if (op == "BiasGelu") |
| | { |
| | fprintf(pp, "%-16s", "BiasGelu"); |
| | } |
| | else if (op == "Ceil") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Celu") |
| | { |
| | fprintf(pp, "%-16s", "CELU"); |
| | } |
| | else if (op == "Clip") |
| | { |
| | fprintf(pp, "%-16s", "Clip"); |
| | } |
| | else if (op == "Concat") |
| | { |
| | fprintf(pp, "%-16s", "Concat"); |
| | } |
| | else if (op == "Constant") |
| | { |
| | continue; |
| | } |
| | else if (op == "Conv") |
| | { |
| | std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); |
| | if (kernel_shape.size() == 1) |
| | { |
| | fprintf(pp, "%-16s", "Convolution1D"); |
| | } |
| | else |
| | { |
| | int group = get_node_attr_i(node, "group", 1); |
| | if (group > 1) |
| | { |
| | fprintf(pp, "%-16s", "ConvolutionDepthWise"); |
| | } |
| | else |
| | { |
| | fprintf(pp, "%-16s", "Convolution"); |
| | } |
| | } |
| | } |
| | else if (op == "ConvTranspose") |
| | { |
| | int group = get_node_attr_i(node, "group", 1); |
| | if (group > 1) |
| | { |
| | fprintf(pp, "%-16s", "DeconvolutionDepthWise"); |
| | } |
| | else |
| | { |
| | fprintf(pp, "%-16s", "Deconvolution"); |
| | } |
| | } |
| | else if (op == "Cos") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Crop") |
| | { |
| | fprintf(pp, "%-16s", "Crop"); |
| | } |
| | else if (op == "DepthToSpace") |
| | { |
| | fprintf(pp, "%-16s", "PixelShuffle"); |
| | } |
| | else if (op == "Div") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "Dropout") |
| | { |
| | fprintf(pp, "%-16s", "Dropout"); |
| | output_size = 1; |
| | } |
| | else if (op == "Elu") |
| | { |
| | fprintf(pp, "%-16s", "ELU"); |
| | } |
| | else if (op == "EmbedLayerNormalization") |
| | { |
| | fprintf(pp, "%-16s", "EmbedLayerNormalization"); |
| | } |
| | else if (op == "Erf") |
| | { |
| | fprintf(pp, "%-16s", "Erf"); |
| | } |
| | else if (op == "Exp") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Flatten") |
| | { |
| | fprintf(pp, "%-16s", "Flatten"); |
| | } |
| | else if (op == "Floor") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Gelu") |
| | { |
| | fprintf(pp, "%-16s", "GELU"); |
| | } |
| | else if (op == "Gemm") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.f); |
| | float beta = get_node_attr_f(node, "beta", 1.f); |
| | int transA = get_node_attr_i(node, "transA", 0); |
| | int transB = get_node_attr_i(node, "transB", 0); |
| |
|
| | if (alpha == 1.f && beta == 1.f && transA == 0 && transB == 1) |
| | { |
| | |
| | fprintf(pp, "%-16s", "InnerProduct"); |
| | } |
| | else |
| | { |
| | fprintf(pp, "%-16s", "Gemm"); |
| | } |
| | } |
| | else if (op == "GlobalAveragePool") |
| | { |
| | fprintf(pp, "%-16s", "Pooling"); |
| | } |
| | else if (op == "GlobalMaxPool") |
| | { |
| | fprintf(pp, "%-16s", "Pooling"); |
| | } |
| | else if (op == "adaptive_avg_pool2d" || op == "adaptive_max_pool2d") |
| | { |
| | fprintf(pp, "%-16s", "Pooling"); |
| | } |
| | else if (op == "GroupNorm") |
| | { |
| | fprintf(pp, "%-16s", "GroupNorm"); |
| | } |
| | else if (op == "GRU") |
| | { |
| | fprintf(pp, "%-16s", "GRU"); |
| | } |
| | else if (op == "HardSigmoid") |
| | { |
| | fprintf(pp, "%-16s", "HardSigmoid"); |
| | } |
| | else if (op == "HardSwish") |
| | { |
| | fprintf(pp, "%-16s", "HardSwish"); |
| | } |
| | else if (op == "ImageScaler") |
| | { |
| | fprintf(pp, "%-16s", "Scale"); |
| | } |
| | else if (op == "InstanceNormalization") |
| | { |
| | fprintf(pp, "%-16s", "InstanceNorm"); |
| | } |
| | else if (op == "LayerNorm") |
| | { |
| | fprintf(pp, "%-16s", "LayerNorm"); |
| | } |
| | else if (op == "LeakyRelu") |
| | { |
| | fprintf(pp, "%-16s", "ReLU"); |
| | } |
| | else if (op == "Log") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "LRN") |
| | { |
| | fprintf(pp, "%-16s", "LRN"); |
| | } |
| | else if (op == "LSTM") |
| | { |
| | fprintf(pp, "%-16s", "LSTM"); |
| | } |
| | else if (op == "MatMul") |
| | { |
| | if (weights.find(node.input(1)) != weights.end() && weights[node.input(1)].dims_size() == 2) |
| | { |
| | fprintf(pp, "%-16s", "InnerProduct"); |
| | } |
| | else |
| | { |
| | fprintf(pp, "%-16s", "Gemm"); |
| | } |
| | } |
| | else if (op == "Max") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "Min") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "Mul") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "MultiHeadAttention") |
| | { |
| | fprintf(pp, "%-16s", "MultiHeadAttention"); |
| | } |
| | else if (op == "Neg") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Normalize") |
| | { |
| | fprintf(pp, "%-16s", "Normalize"); |
| | } |
| | else if (op == "Pad") |
| | { |
| | fprintf(pp, "%-16s", "Padding"); |
| | } |
| | else if (op == "PixelShuffle") |
| | { |
| | fprintf(pp, "%-16s", "PixelShuffle"); |
| | } |
| | else if (op == "Pow") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "PRelu") |
| | { |
| | fprintf(pp, "%-16s", "PReLU"); |
| | } |
| | else if (op == "Reciprocal") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "ReduceMax" || op == "ReduceMin" || op == "ReduceMean" || op == "ReduceProd" || op == "ReduceSum" || op == "ReduceSumSquare" || op == "ReduceL1" || op == "ReduceL2" || op == "ReduceLogSum" || op == "ReduceLogSumExp") |
| | { |
| | fprintf(pp, "%-16s", "Reduction"); |
| | } |
| | else if (op == "Relu") |
| | { |
| | fprintf(pp, "%-16s", "ReLU"); |
| | } |
| | else if (op == "Reorg") |
| | { |
| | fprintf(pp, "%-16s", "Reorg"); |
| | } |
| | else if (op == "Reshape") |
| | { |
| | fprintf(pp, "%-16s", "Reshape"); |
| | } |
| | else if (op == "RNN") |
| | { |
| | fprintf(pp, "%-16s", "RNN"); |
| | } |
| | else if (op == "RDiv") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "RSub") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "ShuffleChannel") |
| | { |
| | fprintf(pp, "%-16s", "ShuffleChannel"); |
| | } |
| | else if (op == "Sigmoid") |
| | { |
| | fprintf(pp, "%-16s", "Sigmoid"); |
| | } |
| | else if (op == "Sin") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "SkipLayerNormalization") |
| | { |
| | fprintf(pp, "%-16s", "SkipLayerNormalization"); |
| | } |
| | else if (op == "Slice") |
| | { |
| | fprintf(pp, "%-16s", "Crop"); |
| | } |
| | else if (op == "Softmax") |
| | { |
| | fprintf(pp, "%-16s", "Softmax"); |
| | } |
| | else if (op == "Softplus") |
| | { |
| | fprintf(pp, "%-16s", "Softplus"); |
| | } |
| | else if (op == "Split") |
| | { |
| | fprintf(pp, "%-16s", "Slice"); |
| | } |
| | else if (op == "Sqrt") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Squeeze") |
| | { |
| | fprintf(pp, "%-16s", "Squeeze"); |
| | } |
| | else if (op == "Sub") |
| | { |
| | fprintf(pp, "%-16s", "BinaryOp"); |
| | } |
| | else if (op == "Sum") |
| | { |
| | fprintf(pp, "%-16s", "Eltwise"); |
| | } |
| | else if (op == "Swish") |
| | { |
| | fprintf(pp, "%-16s", "Swish"); |
| | } |
| | else if (op == "Tan") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Tanh") |
| | { |
| | fprintf(pp, "%-16s", "UnaryOp"); |
| | } |
| | else if (op == "Transpose") |
| | { |
| | fprintf(pp, "%-16s", "Permute"); |
| | } |
| | else if (op == "Upsample" || op == "Resize") |
| | { |
| | fprintf(pp, "%-16s", "Interp"); |
| | } |
| | else if (op == "Unsqueeze") |
| | { |
| | fprintf(pp, "%-16s", "ExpandDims"); |
| | } |
| | else |
| | { |
| | |
| | fprintf(stderr, "%s not supported yet!\n", op.c_str()); |
| | fprintf(pp, "%-16s", op.c_str()); |
| | } |
| |
|
| | fprintf(pp, " %-24s %d %d", trunc_name(name).c_str(), input_size, output_size); |
| |
|
| | for (int j = 0; j < (int)node.input_size(); j++) |
| | { |
| | std::string input_name = node.input(j); |
| |
|
| | |
| | if (weights.find(input_name) != weights.end() && node_reference[input_name] == 0) |
| | { |
| | continue; |
| | } |
| |
|
| | if (input_name.empty()) |
| | { |
| | continue; |
| | } |
| |
|
| | if (split_node_reference.find(input_name) != split_node_reference.end()) |
| | { |
| | int refidx = split_node_reference[input_name] - 1; |
| | split_node_reference[input_name] = refidx; |
| |
|
| | char splitsuffix[256]; |
| | sprintf(splitsuffix, "_splitncnn_%d", refidx); |
| | input_name = input_name + splitsuffix; |
| | } |
| |
|
| | fprintf(pp, " %s", trunc_name(input_name).c_str()); |
| | } |
| |
|
| | for (int j = 0; j < output_size; j++) |
| | { |
| | const std::string& output_name = node.output(j); |
| |
|
| | fprintf(pp, " %s", trunc_name(output_name).c_str()); |
| | } |
| |
|
| | if (op == "Abs") |
| | { |
| | int op_type = 0; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Acos") |
| | { |
| | int op_type = 13; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Add") |
| | { |
| | int op_type = 0; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "Asin") |
| | { |
| | int op_type = 12; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Atan") |
| | { |
| | int op_type = 14; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "AveragePool" || op == "MaxPool") |
| | { |
| | std::string auto_pad = get_node_attr_s(node, "auto_pad"); |
| | int ceil_mode = get_node_attr_i(node, "ceil_mode", 0); |
| | std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); |
| | std::vector<int> strides = get_node_attr_ai(node, "strides"); |
| | std::vector<int> pads = get_node_attr_ai(node, "pads"); |
| |
|
| | int pool = op == "AveragePool" ? 1 : 0; |
| | int pad_mode = 1; |
| |
|
| | if (auto_pad == "SAME_UPPER") |
| | { |
| | pad_mode = 2; |
| | } |
| | else if (auto_pad == "SAME_LOWER") |
| | { |
| | pad_mode = 3; |
| | } |
| |
|
| | if (ceil_mode == 1) |
| | { |
| | pad_mode = 0; |
| | } |
| |
|
| | fprintf(pp, " 0=%d", pool); |
| |
|
| | if (kernel_shape.size() == 1) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[0]); |
| | } |
| | else if (kernel_shape.size() == 2) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[1]); |
| | fprintf(pp, " 11=%d", kernel_shape[0]); |
| | } |
| |
|
| | if (strides.size() == 1) |
| | { |
| | fprintf(pp, " 2=%d", strides[0]); |
| | } |
| | else if (strides.size() == 2) |
| | { |
| | fprintf(pp, " 2=%d", strides[1]); |
| | fprintf(pp, " 12=%d", strides[0]); |
| | } |
| |
|
| | if (pads.size() == 1) |
| | { |
| | fprintf(pp, " 3=%d", pads[0]); |
| | } |
| | else if (pads.size() == 2) |
| | { |
| | fprintf(pp, " 3=%d", pads[1]); |
| | fprintf(pp, " 13=%d", pads[0]); |
| | } |
| | else if (pads.size() == 4) |
| | { |
| | fprintf(pp, " 3=%d", pads[1]); |
| | fprintf(pp, " 13=%d", pads[0]); |
| | fprintf(pp, " 14=%d", pads[3]); |
| | fprintf(pp, " 15=%d", pads[2]); |
| | } |
| |
|
| | fprintf(pp, " 5=%d", pad_mode); |
| |
|
| | if (op == "AveragePool") |
| | { |
| | int avgpool_count_include_pad = get_node_attr_i(node, "count_include_pad", 0); |
| | fprintf(pp, " 6=%d", avgpool_count_include_pad); |
| | } |
| | } |
| | else if (op == "BatchNormalization") |
| | { |
| | float epsilon = get_node_attr_f(node, "epsilon", 1e-5f); |
| |
|
| | const onnx::TensorProto& scale = weights[node.input(1)]; |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| | const onnx::TensorProto& mean = weights[node.input(3)]; |
| | const onnx::TensorProto& var = weights[node.input(4)]; |
| |
|
| | int channels = get_tensor_proto_data_size(scale); |
| |
|
| | fprintf(pp, " 0=%d", channels); |
| |
|
| | fwrite_tensor_proto_data(scale, bp); |
| | fwrite_tensor_proto_data(mean, bp); |
| | |
| | { |
| | const float* v = var.has_raw_data() ? (const float*)var.raw_data().data() : var.float_data().data(); |
| |
|
| | for (int j = 0; j < channels; j++) |
| | { |
| | float ve = v[j] + epsilon; |
| | fwrite(&ve, sizeof(float), 1, bp); |
| | } |
| | } |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | else if (op == "BiasGelu") |
| | { |
| | const onnx::TensorProto& B = weights[node.input(1)]; |
| |
|
| | fprintf(pp, " 0=%d", get_tensor_proto_data_size(B)); |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | else if (op == "Ceil") |
| | { |
| | int op_type = 3; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "CeLU") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.0f); |
| |
|
| | fprintf(pp, " 0=%e", alpha); |
| | } |
| | else if (op == "Clip") |
| | { |
| | float min; |
| | float max; |
| | if (node.input_size() == 1) |
| | { |
| | min = get_node_attr_f(node, "min", -FLT_MAX); |
| | max = get_node_attr_f(node, "max", FLT_MAX); |
| | } |
| | else |
| | { |
| | min = weights.find(node.input(1)) != weights.end() ? get_node_attr_from_input_f(weights[node.input(1)]) : -FLT_MAX; |
| | max = weights.find(node.input(2)) != weights.end() ? get_node_attr_from_input_f(weights[node.input(2)]) : FLT_MAX; |
| | } |
| |
|
| | fprintf(pp, " 0=%e", min); |
| | fprintf(pp, " 1=%e", max); |
| | } |
| | else if (op == "Concat") |
| | { |
| | int axis = get_node_attr_i(node, "axis", 1); |
| | fprintf(pp, " 0=%d", axis > 0 ? axis - 1 : axis); |
| | } |
| | else if (op == "Constant") |
| | { |
| | |
| | } |
| | else if (op == "Conv") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(1)]; |
| |
|
| | int num_filter = W.dims(0); |
| | int has_bias = node.input_size() == 3 ? 1 : 0; |
| |
|
| | std::string auto_pad = get_node_attr_s(node, "auto_pad"); |
| | std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); |
| | std::vector<int> dilations = get_node_attr_ai(node, "dilations"); |
| | std::vector<int> strides = get_node_attr_ai(node, "strides"); |
| | std::vector<int> pads = get_node_attr_ai(node, "pads"); |
| | int group = get_node_attr_i(node, "group", 1); |
| |
|
| | fprintf(pp, " 0=%d", num_filter); |
| |
|
| | if (kernel_shape.size() == 1) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[0]); |
| | } |
| | else if (kernel_shape.size() == 2) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[1]); |
| | fprintf(pp, " 11=%d", kernel_shape[0]); |
| | } |
| |
|
| | if (dilations.size() == 1) |
| | { |
| | fprintf(pp, " 2=%d", dilations[0]); |
| | } |
| | else if (dilations.size() == 2) |
| | { |
| | fprintf(pp, " 2=%d", dilations[1]); |
| | fprintf(pp, " 12=%d", dilations[0]); |
| | } |
| |
|
| | if (strides.size() == 1) |
| | { |
| | fprintf(pp, " 3=%d", strides[0]); |
| | } |
| | else if (strides.size() == 2) |
| | { |
| | fprintf(pp, " 3=%d", strides[1]); |
| | fprintf(pp, " 13=%d", strides[0]); |
| | } |
| |
|
| | if (auto_pad == "SAME_UPPER") |
| | { |
| | fprintf(pp, " 4=-233"); |
| | } |
| | else if (auto_pad == "SAME_LOWER") |
| | { |
| | fprintf(pp, " 4=-234"); |
| | } |
| | else |
| | { |
| | if (pads.size() == 1) |
| | { |
| | fprintf(pp, " 4=%d", pads[0]); |
| | } |
| | else if (pads.size() == 2) |
| | { |
| | fprintf(pp, " 4=%d", pads[1]); |
| | fprintf(pp, " 14=%d", pads[0]); |
| | } |
| | else if (pads.size() == 4) |
| | { |
| | fprintf(pp, " 4=%d", pads[1]); |
| | fprintf(pp, " 14=%d", pads[0]); |
| | fprintf(pp, " 15=%d", pads[3]); |
| | fprintf(pp, " 16=%d", pads[2]); |
| | } |
| | } |
| |
|
| | fprintf(pp, " 5=%d", has_bias); |
| |
|
| | fprintf(pp, " 6=%d", get_tensor_proto_data_size(W)); |
| |
|
| | if (group > 1) |
| | { |
| | fprintf(pp, " 7=%d", group); |
| | } |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(W, bp); |
| |
|
| | if (has_bias) |
| | { |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | } |
| | else if (op == "ConvTranspose") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(1)]; |
| |
|
| | int has_bias = node.input_size() == 3 ? 1 : 0; |
| |
|
| | std::string auto_pad = get_node_attr_s(node, "auto_pad"); |
| | std::vector<int> kernel_shape = get_node_attr_ai(node, "kernel_shape"); |
| | std::vector<int> dilations = get_node_attr_ai(node, "dilations"); |
| | std::vector<int> strides = get_node_attr_ai(node, "strides"); |
| | std::vector<int> output_padding = get_node_attr_ai(node, "output_padding"); |
| | std::vector<int> output_shape = get_node_attr_ai(node, "output_shape"); |
| | std::vector<int> pads = get_node_attr_ai(node, "pads"); |
| | int group = get_node_attr_i(node, "group", 1); |
| | int num_filter = W.dims(1) * group; |
| |
|
| | fprintf(pp, " 0=%d", num_filter); |
| |
|
| | if (kernel_shape.size() == 1) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[0]); |
| | } |
| | else if (kernel_shape.size() == 2) |
| | { |
| | fprintf(pp, " 1=%d", kernel_shape[1]); |
| | fprintf(pp, " 11=%d", kernel_shape[0]); |
| | } |
| |
|
| | if (dilations.size() == 1) |
| | { |
| | fprintf(pp, " 2=%d", dilations[0]); |
| | } |
| | else if (dilations.size() == 2) |
| | { |
| | fprintf(pp, " 2=%d", dilations[1]); |
| | fprintf(pp, " 12=%d", dilations[0]); |
| | } |
| |
|
| | if (strides.size() == 1) |
| | { |
| | fprintf(pp, " 3=%d", strides[0]); |
| | } |
| | else if (strides.size() == 2) |
| | { |
| | fprintf(pp, " 3=%d", strides[1]); |
| | fprintf(pp, " 13=%d", strides[0]); |
| | } |
| |
|
| | if (auto_pad == "SAME_UPPER") |
| | { |
| | fprintf(pp, " 4=-233"); |
| | } |
| | else if (auto_pad == "SAME_LOWER") |
| | { |
| | fprintf(pp, " 4=-234"); |
| | } |
| | else |
| | { |
| | if (pads.size() == 1) |
| | { |
| | fprintf(pp, " 4=%d", pads[0]); |
| | } |
| | else if (pads.size() == 2) |
| | { |
| | fprintf(pp, " 4=%d", pads[1]); |
| | fprintf(pp, " 14=%d", pads[0]); |
| | } |
| | else if (pads.size() == 4) |
| | { |
| | fprintf(pp, " 4=%d", pads[1]); |
| | fprintf(pp, " 14=%d", pads[0]); |
| | fprintf(pp, " 15=%d", pads[3]); |
| | fprintf(pp, " 16=%d", pads[2]); |
| | } |
| | } |
| |
|
| | if (output_padding.size() == 1) |
| | { |
| | fprintf(pp, " 18=%d", output_padding[0]); |
| | } |
| | else if (output_padding.size() == 2) |
| | { |
| | fprintf(pp, " 18=%d", output_padding[1]); |
| | fprintf(pp, " 19=%d", output_padding[0]); |
| | } |
| |
|
| | if (output_shape.size() == 1) |
| | { |
| | fprintf(pp, " 20=%d", output_shape[0]); |
| | } |
| | else if (output_shape.size() == 2) |
| | { |
| | fprintf(pp, " 20=%d", output_shape[1]); |
| | fprintf(pp, " 21=%d", output_shape[0]); |
| | } |
| |
|
| | fprintf(pp, " 5=%d", has_bias); |
| |
|
| | fprintf(pp, " 6=%d", get_tensor_proto_data_size(W)); |
| |
|
| | if (group > 1) |
| | { |
| | fprintf(pp, " 7=%d", group); |
| | } |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int maxk = 0; |
| | if (kernel_shape.size() == 2) |
| | { |
| | maxk = kernel_shape[1] * kernel_shape[0]; |
| | } |
| | else |
| | { |
| | maxk = kernel_shape[0] * kernel_shape[0]; |
| | } |
| | int weight_data_size = get_tensor_proto_data_size(W); |
| | const float* weight_data = 0; |
| | if (W.has_raw_data()) |
| | { |
| | weight_data = (const float*)W.raw_data().data(); |
| | } |
| | else if (W.data_type() == 1) |
| | { |
| | weight_data = W.float_data().data(); |
| | } |
| | for (int g = 0; g < group; g++) |
| | { |
| | |
| | int num_filter_g = num_filter / group; |
| | int num_input = weight_data_size / maxk / num_filter_g / group; |
| | const float* weight_data_ptr = weight_data + g * maxk * num_filter_g * num_input; |
| | for (int k = 0; k < num_filter_g; k++) |
| | { |
| | for (int j = 0; j < num_input; j++) |
| | { |
| | fwrite(weight_data_ptr + (j * num_filter_g + k) * maxk, sizeof(float), maxk, bp); |
| | } |
| | } |
| | } |
| |
|
| | if (has_bias) |
| | { |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | } |
| | else if (op == "Cos") |
| | { |
| | int op_type = 10; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Crop") |
| | { |
| | auto starts = get_node_attr_ai(node, "starts"); |
| | fprintf(pp, " -23309=%zu", starts.size()); |
| | for (size_t j = 0; j < starts.size(); ++j) |
| | { |
| | fprintf(pp, ",%i", starts[j]); |
| | } |
| | auto ends = get_node_attr_ai(node, "ends"); |
| | fprintf(pp, " -23310=%zu", ends.size()); |
| | for (size_t j = 0; j < ends.size(); ++j) |
| | { |
| | fprintf(pp, ",%i", ends[j]); |
| | } |
| | auto axis = get_node_attr_ai(node, "axis"); |
| | fprintf(pp, " -23311=%zu", axis.size()); |
| | for (size_t j = 0; j < axis.size(); ++j) |
| | { |
| | fprintf(pp, ",%i", axis[j]); |
| | } |
| | } |
| | else if (op == "DepthToSpace") |
| | { |
| | |
| | int scale_factor = get_node_attr_i(node, "blocksize", 1); |
| | std::string mode = get_node_attr_s(node, "mode"); |
| | fprintf(pp, " 0=%d", scale_factor); |
| | if (mode == "CRD") |
| | { |
| | fprintf(pp, " 1=0"); |
| | } |
| | else if (mode == "DCR") |
| | { |
| | fprintf(pp, " 1=1"); |
| | } |
| | } |
| | else if (op == "Div") |
| | { |
| | int op_type = 3; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "Dropout") |
| | { |
| | |
| | } |
| | else if (op == "Elu") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.f); |
| | fprintf(pp, " 0=%e", alpha); |
| | } |
| | else if (op == "EmbedLayerNormalization") |
| | { |
| | const onnx::TensorProto& words = weights[node.input(2)]; |
| | const onnx::TensorProto& positions = weights[node.input(3)]; |
| | const onnx::TensorProto& W = weights[node.input(5)]; |
| | const onnx::TensorProto& B = weights[node.input(6)]; |
| |
|
| | fprintf(pp, " 0=%d", get_tensor_proto_data_size(B)); |
| | fprintf(pp, " 1=%d", get_tensor_proto_data_size(words)); |
| | fprintf(pp, " 2=%d", get_tensor_proto_data_size(positions)); |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(words, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(positions, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(W, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | else if (op == "Erf") |
| | { |
| | |
| | } |
| | else if (op == "Exp") |
| | { |
| | int op_type = 7; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Flatten") |
| | { |
| | int axis = get_node_attr_i(node, "axis", 1); |
| | if (axis != 1) |
| | { |
| | fprintf(stderr, "Unsupported Flatten axis %d!\n", axis); |
| | } |
| | } |
| | else if (op == "Floor") |
| | { |
| | int op_type = 2; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Gelu") |
| | { |
| | fprintf(pp, " 0=1"); |
| | } |
| | else if (op == "Gemm") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.f); |
| | float beta = get_node_attr_f(node, "beta", 1.f); |
| | int transA = get_node_attr_i(node, "transA", 0); |
| | int transB = get_node_attr_i(node, "transB", 0); |
| |
|
| | if (alpha == 1.f && beta == 1.f && transA == 0 && transB == 1) |
| | { |
| | |
| | const onnx::TensorProto& B = weights[node.input(1)]; |
| | const onnx::TensorProto& C = weights[node.input(2)]; |
| |
|
| | fprintf(pp, " 0=%d", get_tensor_proto_data_size(C)); |
| | fprintf(pp, " 1=1"); |
| | fprintf(pp, " 2=%d", get_tensor_proto_data_size(B)); |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(B, bp); |
| | fwrite_tensor_proto_data(C, bp); |
| | } |
| | else |
| | { |
| | |
| | fprintf(pp, " 0=%e", alpha); |
| | fprintf(pp, " 1=%e", beta); |
| | fprintf(pp, " 2=%d", transA); |
| | fprintf(pp, " 3=%d", transB); |
| | } |
| | } |
| | else if (op == "GlobalAveragePool") |
| | { |
| | int pool = 1; |
| | int global_pool = 1; |
| |
|
| | fprintf(pp, " 0=%d", pool); |
| | fprintf(pp, " 4=%d", global_pool); |
| | } |
| | else if (op == "GlobalMaxPool") |
| | { |
| | int pool = 0; |
| | int global_pool = 1; |
| |
|
| | fprintf(pp, " 0=%d", pool); |
| | fprintf(pp, " 4=%d", global_pool); |
| | } |
| | else if (op == "adaptive_avg_pool2d" || op == "adaptive_max_pool2d") |
| | { |
| | int pool = 0; |
| | if (op == "adaptive_avg_pool2d") |
| | { |
| | pool = 1; |
| | } |
| | int adaptive_pooling = 1; |
| | const onnx::TensorProto& out_shape_tp = weights[node.input(1)]; |
| | std::vector<int> out_shape = get_node_attr_from_input_ai(out_shape_tp); |
| |
|
| | fprintf(pp, " 0=%d", pool); |
| | fprintf(pp, " 7=%d", adaptive_pooling); |
| | if (out_shape.size() == 1) |
| | { |
| | fprintf(pp, " 8=%d", out_shape[0]); |
| | } |
| | else if (out_shape.size() == 2) |
| | { |
| | |
| | fprintf(pp, " 8=%d", out_shape[1]); |
| | |
| | fprintf(pp, " 18=%d", out_shape[0]); |
| | } |
| | } |
| | else if (op == "GroupNorm") |
| | { |
| | int groups = get_node_attr_i(node, "groups", 1); |
| | int channels = get_node_attr_i(node, "channels", 1); |
| | float eps = get_node_attr_f(node, "epsilon", 1e-5f); |
| | int affine = get_node_attr_i(node, "affine", 1); |
| |
|
| | if (affine) |
| | { |
| | |
| | std::vector<float> affine_S = get_node_attr_from_input_af(weights[node.input(1)]); |
| | std::vector<float> affine_B = get_node_attr_from_input_af(weights[node.input(2)]); |
| | if (affine_S.size() == 1 && affine_S[0] == 1.f && affine_B.size() == 1 && affine_B[0] == 0.f) |
| | { |
| | affine = 0; |
| | } |
| | else |
| | { |
| | affine = 0; |
| | { |
| | for (int j = 0; j < channels; j++) |
| | { |
| | if (affine_S[j] != 1.f || affine_B[j] != 0.f) |
| | { |
| | affine = 1; |
| | break; |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | fprintf(pp, " 0=%d", groups); |
| | fprintf(pp, " 1=%d", channels); |
| | fprintf(pp, " 2=%e", eps); |
| | fprintf(pp, " 3=%d", affine); |
| | if (affine) |
| | { |
| | const onnx::TensorProto& scale = weights[node.input(1)]; |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| |
|
| | fwrite_tensor_proto_data(scale, bp); |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | } |
| | else if (op == "GRU") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(1)]; |
| | const onnx::TensorProto& R = weights[node.input(2)]; |
| | const onnx::TensorProto& B = weights[node.input(3)]; |
| |
|
| | int hidden_size = get_node_attr_i(node, "hidden_size", 0); |
| | std::string direction = get_node_attr_s(node, "direction"); |
| |
|
| | int direction_type = 0; |
| | if (direction == "forward") |
| | { |
| | direction_type = 0; |
| | } |
| | else if (direction == "reverse") |
| | { |
| | direction_type = 1; |
| | } |
| | else if (direction == "bidirectional") |
| | { |
| | direction_type = 2; |
| | } |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(W); |
| |
|
| | fprintf(pp, " 0=%d", hidden_size); |
| | fprintf(pp, " 1=%d", weight_data_size); |
| | fprintf(pp, " 2=%d", direction_type); |
| |
|
| | int num_directions = direction_type == 2 ? 2 : 1; |
| |
|
| | int quantize_tag = 0; |
| |
|
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int weight_data_size_g = get_tensor_proto_data_size(W) / 3 / num_directions; |
| | const float* wptr = W.has_raw_data() ? (const float*)W.raw_data().data() : W.float_data().data(); |
| |
|
| | const float* uptr = wptr; |
| | const float* rptr = wptr + weight_data_size_g; |
| | const float* nptr = wptr + weight_data_size_g * 2; |
| | fwrite(rptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(uptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(nptr, sizeof(float), weight_data_size_g, bp); |
| |
|
| | if (direction_type == 2) |
| | { |
| | uptr += weight_data_size_g * 3; |
| | rptr += weight_data_size_g * 3; |
| | nptr += weight_data_size_g * 3; |
| | fwrite(rptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(uptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(nptr, sizeof(float), weight_data_size_g, bp); |
| | } |
| | } |
| |
|
| | |
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int bias_data_size_g = get_tensor_proto_data_size(B) / 2 / 3 / num_directions; |
| | const float* bptr = B.has_raw_data() ? (const float*)B.raw_data().data() : B.float_data().data(); |
| | const float* wuptr = bptr; |
| | const float* wrptr = bptr + bias_data_size_g; |
| | const float* wnptr = bptr + bias_data_size_g * 2; |
| | const float* buptr = bptr + bias_data_size_g * 3; |
| | const float* brptr = bptr + bias_data_size_g * 4; |
| | const float* bnptr = bptr + bias_data_size_g * 5; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = wrptr[j] + brptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = wuptr[j] + buptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | fwrite(wnptr, sizeof(float), bias_data_size_g, bp); |
| | fwrite(bnptr, sizeof(float), bias_data_size_g, bp); |
| |
|
| | if (direction_type == 2) |
| | { |
| | wuptr += bias_data_size_g * 6; |
| | wrptr += bias_data_size_g * 6; |
| | wnptr += bias_data_size_g * 6; |
| | buptr += bias_data_size_g * 6; |
| | brptr += bias_data_size_g * 6; |
| | bnptr += bias_data_size_g * 6; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = wrptr[j] + brptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = wuptr[j] + buptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | fwrite(wnptr, sizeof(float), bias_data_size_g, bp); |
| | fwrite(bnptr, sizeof(float), bias_data_size_g, bp); |
| | } |
| | } |
| |
|
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int weight_data_size_g = get_tensor_proto_data_size(R) / 3 / num_directions; |
| | const float* Rptr = R.has_raw_data() ? (const float*)R.raw_data().data() : R.float_data().data(); |
| |
|
| | const float* uptr = Rptr; |
| | const float* rptr = Rptr + weight_data_size_g; |
| | const float* nptr = Rptr + weight_data_size_g * 2; |
| | fwrite(rptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(uptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(nptr, sizeof(float), weight_data_size_g, bp); |
| |
|
| | if (direction_type == 2) |
| | { |
| | uptr += weight_data_size_g * 3; |
| | rptr += weight_data_size_g * 3; |
| | nptr += weight_data_size_g * 3; |
| | fwrite(rptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(uptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(nptr, sizeof(float), weight_data_size_g, bp); |
| | } |
| | } |
| | } |
| | else if (op == "HardSigmoid") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 0.2f); |
| | float beta = get_node_attr_f(node, "beta", 0.5f); |
| |
|
| | fprintf(pp, " 0=%e", alpha); |
| | fprintf(pp, " 1=%e", beta); |
| | } |
| | else if (op == "HardSwish") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 0.2f); |
| | float beta = get_node_attr_f(node, "beta", 0.5f); |
| |
|
| | fprintf(pp, " 0=%e", alpha); |
| | fprintf(pp, " 1=%e", beta); |
| | } |
| | else if (op == "ImageScaler") |
| | { |
| | std::vector<float> bias = get_node_attr_af(node, "bias"); |
| | float scale = get_node_attr_f(node, "scale", 1.f); |
| |
|
| | int channels = (int)bias.size(); |
| |
|
| | fprintf(pp, " 0=%d", channels); |
| | fprintf(pp, " 1=1"); |
| |
|
| | for (int j = 0; j < channels; j++) |
| | { |
| | fwrite(&scale, sizeof(float), 1, bp); |
| | } |
| | fwrite(&bias[0], sizeof(float), channels, bp); |
| | } |
| | else if (op == "InstanceNormalization") |
| | { |
| | float eps = get_node_attr_f(node, "epsilon", 1e-5f); |
| |
|
| | |
| | std::vector<float> affine_S = get_node_attr_from_input_af(weights[node.input(1)]); |
| | std::vector<float> affine_B = get_node_attr_from_input_af(weights[node.input(2)]); |
| | int channels = (int)affine_S.size(); |
| | int affine = 0; |
| | { |
| | for (int j = 0; j < channels; j++) |
| | { |
| | if (affine_S[j] != 1.f || affine_B[j] != 0.f) |
| | { |
| | affine = 1; |
| | break; |
| | } |
| | } |
| | } |
| |
|
| | fprintf(pp, " 0=%d", channels); |
| | fprintf(pp, " 1=%e", eps); |
| | fprintf(pp, " 2=%d", affine); |
| | if (affine) |
| | { |
| | const onnx::TensorProto& scale = weights[node.input(1)]; |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| |
|
| | fwrite_tensor_proto_data(scale, bp); |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | } |
| | else if (op == "LayerNorm") |
| | { |
| | float eps = get_node_attr_f(node, "epsilon", 1e-5f); |
| | int affine = get_node_attr_i(node, "affine", 1); |
| |
|
| | if (affine) |
| | { |
| | |
| | std::vector<float> affine_S = get_node_attr_from_input_af(weights[node.input(1)]); |
| | std::vector<float> affine_B = get_node_attr_from_input_af(weights[node.input(2)]); |
| | int affine_size = (int)affine_S.size(); |
| | affine = 0; |
| | { |
| | for (int j = 0; j < affine_size; j++) |
| | { |
| | if (affine_S[j] != 1.f || affine_B[j] != 0.f) |
| | { |
| | affine = 1; |
| | break; |
| | } |
| | } |
| | } |
| |
|
| | if (affine) |
| | { |
| | fprintf(pp, " 0=%d", affine_size); |
| | } |
| | } |
| |
|
| | fprintf(pp, " 1=%e", eps); |
| | fprintf(pp, " 2=%d", affine); |
| |
|
| | if (affine) |
| | { |
| | const onnx::TensorProto& scale = weights[node.input(1)]; |
| | const onnx::TensorProto& B = weights[node.input(2)]; |
| |
|
| | fwrite_tensor_proto_data(scale, bp); |
| | fwrite_tensor_proto_data(B, bp); |
| | } |
| | } |
| | else if (op == "LeakyRelu") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 0.01f); |
| |
|
| | fprintf(pp, " 0=%e", alpha); |
| | } |
| | else if (op == "Log") |
| | { |
| | int op_type = 8; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "LRN") |
| | { |
| | float alpha = get_node_attr_f(node, "alpha", 1.f); |
| | float beta = get_node_attr_f(node, "beta", 0.5f); |
| | float bias = get_node_attr_f(node, "bias", 1.f); |
| | int size = get_node_attr_i(node, "size", 1); |
| |
|
| | int norm_region = 0; |
| |
|
| | fprintf(pp, " 0=%d", norm_region); |
| | fprintf(pp, " 1=%d", size); |
| | fprintf(pp, " 2=%e", alpha); |
| | fprintf(pp, " 3=%e", beta); |
| | fprintf(pp, " 4=%e", bias); |
| | } |
| | else if (op == "LSTM") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(1)]; |
| | const onnx::TensorProto& R = weights[node.input(2)]; |
| | const onnx::TensorProto& B = weights[node.input(3)]; |
| |
|
| | int hidden_size = get_node_attr_i(node, "hidden_size", 0); |
| | std::string direction = get_node_attr_s(node, "direction"); |
| |
|
| | int direction_type = 0; |
| | if (direction == "forward") |
| | { |
| | direction_type = 0; |
| | } |
| | else if (direction == "reverse") |
| | { |
| | direction_type = 1; |
| | } |
| | else if (direction == "bidirectional") |
| | { |
| | direction_type = 2; |
| | } |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(W); |
| |
|
| | fprintf(pp, " 0=%d", hidden_size); |
| | fprintf(pp, " 1=%d", weight_data_size); |
| | fprintf(pp, " 2=%d", direction_type); |
| |
|
| | int num_directions = direction_type == 2 ? 2 : 1; |
| |
|
| | int quantize_tag = 0; |
| |
|
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int weight_data_size_g = get_tensor_proto_data_size(W) / 4 / num_directions; |
| | const float* wptr = W.has_raw_data() ? (const float*)W.raw_data().data() : W.float_data().data(); |
| |
|
| | const float* iptr = wptr; |
| | const float* optr = wptr + weight_data_size_g; |
| | const float* fptr = wptr + weight_data_size_g * 2; |
| | const float* gptr = wptr + weight_data_size_g * 3; |
| | fwrite(iptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(fptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(optr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(gptr, sizeof(float), weight_data_size_g, bp); |
| |
|
| | if (direction_type == 2) |
| | { |
| | iptr += weight_data_size_g * 4; |
| | optr += weight_data_size_g * 4; |
| | fptr += weight_data_size_g * 4; |
| | gptr += weight_data_size_g * 4; |
| | fwrite(iptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(fptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(optr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(gptr, sizeof(float), weight_data_size_g, bp); |
| | } |
| | } |
| |
|
| | |
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int bias_data_size_g = get_tensor_proto_data_size(B) / 2 / 4 / num_directions; |
| | const float* xcbptr = B.has_raw_data() ? (const float*)B.raw_data().data() : B.float_data().data(); |
| | const float* xiptr = xcbptr; |
| | const float* xoptr = xcbptr + bias_data_size_g; |
| | const float* xfptr = xcbptr + bias_data_size_g * 2; |
| | const float* xgptr = xcbptr + bias_data_size_g * 3; |
| | const float* hiptr = xcbptr + bias_data_size_g * 4; |
| | const float* hoptr = xcbptr + bias_data_size_g * 5; |
| | const float* hfptr = xcbptr + bias_data_size_g * 6; |
| | const float* hgptr = xcbptr + bias_data_size_g * 7; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xiptr[j] + hiptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xfptr[j] + hfptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xoptr[j] + hoptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xgptr[j] + hgptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| |
|
| | if (direction_type == 2) |
| | { |
| | xiptr += bias_data_size_g * 8; |
| | xoptr += bias_data_size_g * 8; |
| | xfptr += bias_data_size_g * 8; |
| | xgptr += bias_data_size_g * 8; |
| | hiptr += bias_data_size_g * 8; |
| | hoptr += bias_data_size_g * 8; |
| | hfptr += bias_data_size_g * 8; |
| | hgptr += bias_data_size_g * 8; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xiptr[j] + hiptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xfptr[j] + hfptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xoptr[j] + hoptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xgptr[j] + hgptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| |
|
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int weight_data_size_g = get_tensor_proto_data_size(R) / 4 / num_directions; |
| | const float* rptr = R.has_raw_data() ? (const float*)R.raw_data().data() : R.float_data().data(); |
| |
|
| | const float* iptr = rptr; |
| | const float* optr = rptr + weight_data_size_g; |
| | const float* fptr = rptr + weight_data_size_g * 2; |
| | const float* gptr = rptr + weight_data_size_g * 3; |
| | fwrite(iptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(fptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(optr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(gptr, sizeof(float), weight_data_size_g, bp); |
| |
|
| | if (direction_type == 2) |
| | { |
| | iptr += weight_data_size_g * 4; |
| | optr += weight_data_size_g * 4; |
| | fptr += weight_data_size_g * 4; |
| | gptr += weight_data_size_g * 4; |
| | fwrite(iptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(fptr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(optr, sizeof(float), weight_data_size_g, bp); |
| | fwrite(gptr, sizeof(float), weight_data_size_g, bp); |
| | } |
| | } |
| | } |
| | else if (op == "MatMul") |
| | { |
| | if (weights.find(node.input(1)) != weights.end() && weights[node.input(1)].dims_size() == 2) |
| | { |
| | |
| | const onnx::TensorProto& B = weights[node.input(1)]; |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(B); |
| |
|
| | int num_output = B.dims(B.dims_size() - 1); |
| | int num_input = weight_data_size / num_output; |
| |
|
| | fprintf(pp, " 0=%d", num_output); |
| | fprintf(pp, " 1=0"); |
| | fprintf(pp, " 2=%d", weight_data_size); |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | |
| | { |
| | const float* bptr = B.has_raw_data() ? (const float*)B.raw_data().data() : B.float_data().data(); |
| |
|
| | for (int j = 0; j < num_output; j++) |
| | { |
| | for (int k = 0; k < num_input; k++) |
| | { |
| | float vb = bptr[k * num_output + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| |
|
| | |
| | } |
| | else |
| | { |
| | |
| | } |
| | } |
| | else if (op == "Max") |
| | { |
| | int op_type = 4; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "Min") |
| | { |
| | int op_type = 5; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "Mul") |
| | { |
| | int op_type = 2; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "MultiHeadAttention") |
| | { |
| | int embed_dim = get_node_attr_i(node, "embed_dim", 0); |
| | int num_heads = get_node_attr_i(node, "num_heads", 0); |
| |
|
| | fprintf(pp, " 0=%d", embed_dim); |
| | fprintf(pp, " 1=%d", num_heads); |
| |
|
| | if (node.input_size() == 5) |
| | { |
| | const onnx::TensorProto& qkvw = weights[node.input(1)]; |
| | const onnx::TensorProto& qkvb = weights[node.input(2)]; |
| | const onnx::TensorProto& ow = weights[node.input(3)]; |
| | const onnx::TensorProto& ob = weights[node.input(4)]; |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(ow); |
| |
|
| | fprintf(pp, " 2=%d", weight_data_size); |
| |
|
| | int quantize_tag = 0; |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = qkvw.has_raw_data() ? (const float*)qkvw.raw_data().data() : qkvw.float_data().data(); |
| | const float* bptr = qkvb.has_raw_data() ? (const float*)qkvb.raw_data().data() : qkvb.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim * 3 + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| |
|
| | fwrite(bptr, sizeof(float), embed_dim, bp); |
| | } |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = qkvw.has_raw_data() ? (const float*)qkvw.raw_data().data() : qkvw.float_data().data(); |
| | const float* bptr = qkvb.has_raw_data() ? (const float*)qkvb.raw_data().data() : qkvb.float_data().data(); |
| | bptr += embed_dim; |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim * 3 + j + embed_dim]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| |
|
| | fwrite(bptr, sizeof(float), embed_dim, bp); |
| | } |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = qkvw.has_raw_data() ? (const float*)qkvw.raw_data().data() : qkvw.float_data().data(); |
| | const float* bptr = qkvb.has_raw_data() ? (const float*)qkvb.raw_data().data() : qkvb.float_data().data(); |
| | bptr += embed_dim * 2; |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim * 3 + j + embed_dim * 2]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| |
|
| | fwrite(bptr, sizeof(float), embed_dim, bp); |
| | } |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = ow.has_raw_data() ? (const float*)ow.raw_data().data() : ow.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| | fwrite_tensor_proto_data(ob, bp); |
| | } |
| | else |
| | { |
| | const onnx::TensorProto& qw = weights[node.input(3)]; |
| | const onnx::TensorProto& qb = weights[node.input(4)]; |
| | const onnx::TensorProto& kw = weights[node.input(5)]; |
| | const onnx::TensorProto& kb = weights[node.input(6)]; |
| | const onnx::TensorProto& vw = weights[node.input(7)]; |
| | const onnx::TensorProto& vb = weights[node.input(8)]; |
| | const onnx::TensorProto& ow = weights[node.input(9)]; |
| | const onnx::TensorProto& ob = weights[node.input(10)]; |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(qw); |
| |
|
| | fprintf(pp, " 2=%d", weight_data_size); |
| |
|
| | int quantize_tag = 0; |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = qw.has_raw_data() ? (const float*)qw.raw_data().data() : qw.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| | fwrite_tensor_proto_data(qb, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = kw.has_raw_data() ? (const float*)kw.raw_data().data() : kw.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| | fwrite_tensor_proto_data(kb, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = vw.has_raw_data() ? (const float*)vw.raw_data().data() : vw.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| | fwrite_tensor_proto_data(vb, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | |
| | { |
| | const float* wptr = ow.has_raw_data() ? (const float*)ow.raw_data().data() : ow.float_data().data(); |
| |
|
| | for (int j = 0; j < embed_dim; j++) |
| | { |
| | for (int k = 0; k < embed_dim; k++) |
| | { |
| | float vb = wptr[k * embed_dim + j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| | fwrite_tensor_proto_data(ob, bp); |
| | } |
| | } |
| | else if (op == "Neg") |
| | { |
| | int op_type = 1; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Normalize") |
| | { |
| | float eps = get_node_attr_f(node, "eps", 0.f); |
| | int scale_data_size = 1; |
| |
|
| | fprintf(pp, " 1=1"); |
| | fprintf(pp, " 2=%e", eps); |
| | fprintf(pp, " 3=%d", scale_data_size); |
| | fprintf(pp, " 9=1"); |
| |
|
| | const float scale_data[1] = {1.f}; |
| | fwrite(scale_data, sizeof(float), 1, bp); |
| | } |
| | else if (op == "Pad") |
| | { |
| | std::string mode = get_node_attr_s(node, "mode"); |
| | float value = get_node_attr_f(node, "value", 0.f); |
| |
|
| | std::vector<int> pads; |
| | if (node.input_size() == 1) |
| | { |
| | pads = get_node_attr_ai(node, "pads"); |
| | } |
| | else |
| | { |
| | pads = get_node_attr_from_input_ai(weights[node.input(1)]); |
| | } |
| |
|
| | int type = 0; |
| | if (mode == "constant") |
| | { |
| | type = 0; |
| | } |
| | else if (mode == "edge") |
| | { |
| | type = 1; |
| | } |
| | else if (mode == "reflect") |
| | { |
| | type = 2; |
| | } |
| |
|
| | int pad_size = (int)pads.size(); |
| | int top = 0; |
| | int bottom = 0; |
| | int left = 0; |
| | int right = 0; |
| | int front = 0; |
| | int behind = 0; |
| | if (pad_size == 8) |
| | { |
| | |
| | top = pads[2]; |
| | bottom = pads[6]; |
| | left = pads[3]; |
| | right = pads[7]; |
| | front = pads[1]; |
| | behind = pads[5]; |
| | } |
| | else if (pad_size == 6) |
| | { |
| | |
| | top = pads[1]; |
| | bottom = pads[4]; |
| | left = pads[2]; |
| | right = pads[5]; |
| | } |
| | else |
| | { |
| | |
| | left = pads[1]; |
| | right = pads[3]; |
| | } |
| |
|
| | fprintf(pp, " 0=%d", top); |
| | fprintf(pp, " 1=%d", bottom); |
| | fprintf(pp, " 2=%d", left); |
| | fprintf(pp, " 3=%d", right); |
| | fprintf(pp, " 4=%d", type); |
| | fprintf(pp, " 5=%e", value); |
| | fprintf(pp, " 7=%d", front); |
| | fprintf(pp, " 8=%d", behind); |
| | } |
| | else if (op == "Pow") |
| | { |
| | int op_type = 6; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "PixelShuffle") |
| | { |
| | int scale_factor = get_node_attr_i(node, "scale_factor", 1); |
| | fprintf(pp, " 0=%d", scale_factor); |
| | } |
| | else if (op == "PRelu") |
| | { |
| | const onnx::TensorProto& slope = weights[node.input(1)]; |
| |
|
| | int num_slope = get_tensor_proto_data_size(slope); |
| |
|
| | fprintf(pp, " 0=%d", num_slope); |
| |
|
| | fwrite_tensor_proto_data(slope, bp); |
| | } |
| | else if (op == "Reciprocal") |
| | { |
| | int op_type = 15; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "ReduceMax" || op == "ReduceMin" || op == "ReduceMean" || op == "ReduceProd" || op == "ReduceSum" || op == "ReduceSumSquare" || op == "ReduceL1" || op == "ReduceL2" || op == "ReduceLogSum" || op == "ReduceLogSumExp") |
| | { |
| | int op_type = -233; |
| | if (op == "ReduceSum") |
| | op_type = 0; |
| | else if (op == "ReduceSumSquare") |
| | op_type = 2; |
| | else if (op == "ReduceMean") |
| | op_type = 3; |
| | else if (op == "ReduceMax") |
| | op_type = 4; |
| | else if (op == "ReduceMin") |
| | op_type = 5; |
| | else if (op == "ReduceProd") |
| | op_type = 6; |
| | else if (op == "ReduceL1") |
| | op_type = 7; |
| | else if (op == "ReduceL2") |
| | op_type = 8; |
| | else if (op == "ReduceLogSum") |
| | op_type = 9; |
| | else if (op == "ReduceLogSumExp") |
| | op_type = 10; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | std::vector<int> axes = get_node_attr_ai(node, "axes"); |
| | int keepdims = get_node_attr_i(node, "keepdims", 1); |
| |
|
| | if (axes.size() > 0) |
| | { |
| | |
| | fprintf(pp, " 1=%d", 0); |
| | fprintf(pp, " -23303=%zu", axes.size()); |
| | for (size_t j = 0; j < axes.size(); j++) |
| | { |
| | if (axes[j] == 0 || axes[j] > 4 || axes[j] < -3) |
| | fprintf(stderr, "Unsupported reduction axes !\n"); |
| | fprintf(pp, ",%d", axes[j] > 0 ? axes[j] - 1 : axes[j]); |
| | } |
| | } |
| | else |
| | { |
| | |
| | fprintf(pp, " 1=%d", 1); |
| | } |
| | fprintf(pp, " 4=%d", keepdims); |
| | fprintf(pp, " 5=1"); |
| | } |
| | else if (op == "Reorg") |
| | { |
| | int stride = get_node_attr_i(node, "stride", 1); |
| | fprintf(pp, " 0=%d", stride); |
| | } |
| | else if (op == "Reshape") |
| | { |
| | std::vector<int> shape; |
| |
|
| | if (node.input_size() == 1) |
| | { |
| | shape = get_node_attr_ai(node, "shape"); |
| | } |
| | else |
| | { |
| | shape = get_node_attr_from_input_ai(weights[node.input(1)]); |
| | } |
| |
|
| | if (shape.size() == 1) |
| | { |
| | fprintf(pp, " 0=%d", shape[0]); |
| | } |
| | else if (shape.size() == 2) |
| | { |
| | fprintf(pp, " 0=%d", shape[1]); |
| | } |
| | else if (shape.size() == 3) |
| | { |
| | fprintf(pp, " 0=%d", shape[2]); |
| | fprintf(pp, " 1=%d", shape[1]); |
| | } |
| | else if (shape.size() == 4) |
| | { |
| | fprintf(pp, " 0=%d", shape[3]); |
| | fprintf(pp, " 1=%d", shape[2]); |
| | fprintf(pp, " 2=%d", shape[1]); |
| | } |
| | else if (shape.size() == 5) |
| | { |
| | fprintf(pp, " 0=%d", shape[4] * shape[3]); |
| | fprintf(pp, " 1=%d", shape[2]); |
| | fprintf(pp, " 2=%d", shape[1]); |
| | } |
| | } |
| | else if (op == "Resize") |
| | { |
| | std::string mode = get_node_attr_s(node, "mode"); |
| | std::string align = get_node_attr_s(node, "coordinate_transformation_mode"); |
| |
|
| | std::vector<float> scales; |
| | std::vector<int> sizes; |
| | if (node.input_size() == 2) |
| | { |
| | |
| | scales = get_node_attr_from_input_af(weights[node.input(1)]); |
| | } |
| | else |
| | { |
| | |
| | scales = get_node_attr_from_input_af(weights[node.input(2)]); |
| | if (node.input_size() >= 4) |
| | { |
| | sizes = get_node_attr_from_input_ai(weights[node.input(3)]); |
| | } |
| | } |
| |
|
| | int resize_type = 1; |
| | if (mode == "nearest") |
| | { |
| | resize_type = 1; |
| | } |
| | else if (mode == "linear") |
| | { |
| | resize_type = 2; |
| | } |
| | else if (mode == "cubic") |
| | { |
| | resize_type = 3; |
| | } |
| |
|
| | if (scales.empty() && sizes.empty()) |
| | { |
| | fprintf(stderr, "Unsupported Resize scales and sizes are all empty!\n"); |
| | } |
| |
|
| | float h_scale = 1.f; |
| | float w_scale = 1.f; |
| | if (scales.size() == 2) |
| | { |
| | w_scale = scales[1]; |
| | } |
| | else if (scales.size() == 3) |
| | { |
| | h_scale = scales[1]; |
| | w_scale = scales[2]; |
| | } |
| | else if (scales.size() == 4) |
| | { |
| | h_scale = scales[2]; |
| | w_scale = scales[3]; |
| |
|
| | if (scales[1] != 1.f) |
| | fprintf(stderr, "Unsupported Resize scales !\n"); |
| | } |
| |
|
| | int output_height = 0; |
| | int output_width = 0; |
| | if (sizes.size() == 2) |
| | { |
| | output_width = sizes[1]; |
| | } |
| | else if (sizes.size() == 3) |
| | { |
| | output_height = sizes[1]; |
| | output_width = sizes[2]; |
| | } |
| | else if (sizes.size() == 4) |
| | { |
| | output_height = sizes[2]; |
| | output_width = sizes[3]; |
| | } |
| |
|
| | int align_corner = 0; |
| | if (align == "align_corners") |
| | { |
| | align_corner = 1; |
| | } |
| |
|
| | fprintf(pp, " 0=%d", resize_type); |
| | fprintf(pp, " 1=%e", h_scale); |
| | fprintf(pp, " 2=%e", w_scale); |
| | fprintf(pp, " 3=%d", output_height); |
| | fprintf(pp, " 4=%d", output_width); |
| | fprintf(pp, " 6=%d", align_corner); |
| | } |
| | else if (op == "RNN") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(1)]; |
| | const onnx::TensorProto& R = weights[node.input(2)]; |
| | const onnx::TensorProto& B = weights[node.input(3)]; |
| |
|
| | int hidden_size = get_node_attr_i(node, "hidden_size", 0); |
| | std::string direction = get_node_attr_s(node, "direction"); |
| |
|
| | int direction_type = 0; |
| | if (direction == "forward") |
| | { |
| | direction_type = 0; |
| | } |
| | else if (direction == "reverse") |
| | { |
| | direction_type = 1; |
| | } |
| | else if (direction == "bidirectional") |
| | { |
| | direction_type = 2; |
| | } |
| |
|
| | int weight_data_size = get_tensor_proto_data_size(W); |
| |
|
| | fprintf(pp, " 0=%d", hidden_size); |
| | fprintf(pp, " 1=%d", weight_data_size); |
| | fprintf(pp, " 2=%d", direction_type); |
| |
|
| | int num_directions = direction_type == 2 ? 2 : 1; |
| |
|
| | int quantize_tag = 0; |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | fwrite_tensor_proto_data(W, bp); |
| |
|
| | |
| | { |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | int bias_data_size_g = get_tensor_proto_data_size(B) / 2 / num_directions; |
| | const float* bptr = B.has_raw_data() ? (const float*)B.raw_data().data() : B.float_data().data(); |
| | const float* xiptr = bptr; |
| | const float* hiptr = bptr + bias_data_size_g; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xiptr[j] + hiptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| |
|
| | if (direction_type == 2) |
| | { |
| | xiptr += bias_data_size_g * 2; |
| | hiptr += bias_data_size_g * 2; |
| |
|
| | for (int j = 0; j < bias_data_size_g; j++) |
| | { |
| | float vb = xiptr[j] + hiptr[j]; |
| | fwrite(&vb, sizeof(float), 1, bp); |
| | } |
| | } |
| | } |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| | fwrite_tensor_proto_data(R, bp); |
| | } |
| | else if (op == "RDiv") |
| | { |
| | int op_type = 8; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "RSub") |
| | { |
| | int op_type = 7; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "ShuffleChannel") |
| | { |
| | int group = get_node_attr_i(node, "group", 1); |
| | int reverse = get_node_attr_i(node, "reverse", 0); |
| | fprintf(pp, " 0=%d", group); |
| | fprintf(pp, " 1=%d", reverse); |
| | } |
| | else if (op == "Sigmoid") |
| | { |
| | |
| | } |
| | else if (op == "Sin") |
| | { |
| | int op_type = 9; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "SkipLayerNormalization") |
| | { |
| | const onnx::TensorProto& W = weights[node.input(2)]; |
| | const onnx::TensorProto& B = weights[node.input(3)]; |
| | const onnx::TensorProto& B2 = weights[node.input(4)]; |
| |
|
| | fprintf(pp, " 0=%d", get_tensor_proto_data_size(B)); |
| |
|
| | int quantize_tag = 0; |
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(W, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(B, bp); |
| |
|
| | fwrite(&quantize_tag, sizeof(int), 1, bp); |
| |
|
| | fwrite_tensor_proto_data(B2, bp); |
| | } |
| | else if (op == "Slice") |
| | { |
| | std::vector<int> starts; |
| | std::vector<int> ends; |
| | std::vector<int> axes; |
| | std::vector<int> steps; |
| | if (node.input_size() == 1) |
| | { |
| | starts = get_node_attr_ai(node, "starts"); |
| | ends = get_node_attr_ai(node, "ends"); |
| | axes = get_node_attr_ai(node, "axes"); |
| | steps = get_node_attr_ai(node, "steps"); |
| | } |
| | else |
| | { |
| | starts = get_node_attr_from_input_ai(weights[node.input(1)]); |
| | ends = get_node_attr_from_input_ai(weights[node.input(2)]); |
| | if (node.input_size() >= 4) |
| | axes = get_node_attr_from_input_ai(weights[node.input(3)]); |
| | if (node.input_size() >= 5) |
| | steps = get_node_attr_from_input_ai(weights[node.input(4)]); |
| | } |
| |
|
| | |
| | for (int i = 0; i < (int)steps.size(); i++) |
| | { |
| | if (steps[i] != 1) |
| | fprintf(stderr, "Unsupported slice step !\n"); |
| | } |
| |
|
| | |
| | if (!axes.empty()) |
| | { |
| | for (int i = 0; i < (int)axes.size(); i++) |
| | { |
| | int axis = axes[i]; |
| | if (axis == 0) |
| | { |
| | starts.erase(starts.begin() + i); |
| | ends.erase(ends.begin() + i); |
| | axes.erase(axes.begin() + i); |
| | break; |
| | } |
| | } |
| | } |
| |
|
| | fprintf(pp, " -23309=%d", (int)starts.size()); |
| | for (int i = 0; i < (int)starts.size(); i++) |
| | { |
| | fprintf(pp, ",%d", starts[i]); |
| | } |
| | fprintf(pp, " -23310=%d", (int)ends.size()); |
| | for (int i = 0; i < (int)ends.size(); i++) |
| | { |
| | fprintf(pp, ",%d", ends[i]); |
| | } |
| | if (!axes.empty()) |
| | { |
| | fprintf(pp, " -23311=%d", (int)axes.size()); |
| | for (int i = 0; i < (int)axes.size(); i++) |
| | { |
| | int axis = axes[i]; |
| | if (axis == 0 || axis > 3 || axis < -3) |
| | fprintf(stderr, "Unsupported slice axes !\n"); |
| |
|
| | if (axis > 0) |
| | axis = axis - 1; |
| |
|
| | fprintf(pp, ",%d", axis); |
| | } |
| | } |
| | } |
| | else if (op == "Softmax") |
| | { |
| | int axis = get_node_attr_i(node, "axis", 1); |
| | fprintf(pp, " 0=%d", axis - 1); |
| | fprintf(pp, " 1=1"); |
| | } |
| | else if (op == "Split") |
| | { |
| | int axis = get_node_attr_i(node, "axis", 0); |
| | std::vector<int> split = get_node_attr_ai(node, "split"); |
| | if (axis < 1) |
| | fprintf(stderr, "Unsupported split axis !\n"); |
| |
|
| | fprintf(pp, " -23300=%d", output_size); |
| | if (split.empty()) |
| | { |
| | for (int i = 0; i < output_size; i++) |
| | { |
| | fprintf(pp, ",-233"); |
| | } |
| | } |
| | else |
| | { |
| | for (size_t i = 0; i < split.size() - 1; i++) |
| | { |
| | fprintf(pp, ",%d", split[i]); |
| | } |
| | fprintf(pp, ",-233"); |
| | } |
| | fprintf(pp, " 1=%d", axis - 1); |
| | } |
| | else if (op == "Sqrt") |
| | { |
| | int op_type = 5; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Squeeze") |
| | { |
| | std::vector<int> axes = get_node_attr_ai(node, "axes"); |
| |
|
| | if (axes.empty()) |
| | { |
| | fprintf(pp, " 0=1"); |
| | fprintf(pp, " 1=1"); |
| | fprintf(pp, " 2=1"); |
| | } |
| | else |
| | { |
| | fprintf(pp, " -23303=%zu", axes.size()); |
| | for (int i = 0; i < (int)axes.size(); i++) |
| | { |
| | if (axes[i] == 0 || axes[i] > 4 || axes[i] < -3) |
| | fprintf(stderr, "Unsupported squeeze axes !\n"); |
| | fprintf(pp, ",%d", axes[i] > 0 ? axes[i] - 1 : axes[i]); |
| | } |
| | } |
| | } |
| | else if (op == "Sub") |
| | { |
| | int op_type = 1; |
| | fprintf(pp, " 0=%d", op_type); |
| |
|
| | int with_scalar = get_node_attr_i(node, "with_scalar", 0); |
| | float b = get_node_attr_f(node, "b", 0.f); |
| | if (with_scalar) |
| | { |
| | fprintf(pp, " 1=%d", with_scalar); |
| | fprintf(pp, " 2=%e", b); |
| | } |
| | } |
| | else if (op == "Sum") |
| | { |
| | int op_type = 1; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Swish") |
| | { |
| | |
| | } |
| | else if (op == "Tan") |
| | { |
| | int op_type = 11; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Tanh") |
| | { |
| | int op_type = 16; |
| | fprintf(pp, " 0=%d", op_type); |
| | } |
| | else if (op == "Transpose") |
| | { |
| | std::vector<int> perm = get_node_attr_ai(node, "perm"); |
| |
|
| | if (perm.size() == 3) |
| | { |
| | if (perm[1] == 1 && perm[2] == 2) |
| | fprintf(pp, " 0=0"); |
| | else if (perm[1] == 2 && perm[2] == 1) |
| | fprintf(pp, " 0=1"); |
| | else if (perm[0] == 1 && perm[1] == 0 && perm[2] == 2) |
| | fprintf(pp, " 0=0"); |
| | else if (perm[0] == 2 && perm[1] == 0 && perm[2] == 1) |
| | fprintf(pp, " 0=1"); |
| | } |
| | else if (perm.size() == 4) |
| | { |
| | if (perm[1] == 1 && perm[2] == 2 && perm[3] == 3) |
| | fprintf(pp, " 0=0"); |
| | else if (perm[1] == 1 && perm[2] == 3 && perm[3] == 2) |
| | fprintf(pp, " 0=1"); |
| | else if (perm[1] == 2 && perm[2] == 1 && perm[3] == 3) |
| | fprintf(pp, " 0=2"); |
| | else if (perm[1] == 2 && perm[2] == 3 && perm[3] == 1) |
| | fprintf(pp, " 0=3"); |
| | else if (perm[1] == 3 && perm[2] == 1 && perm[3] == 2) |
| | fprintf(pp, " 0=4"); |
| | else if (perm[1] == 3 && perm[2] == 2 && perm[3] == 1) |
| | fprintf(pp, " 0=5"); |
| | else |
| | fprintf(stderr, "Unsupported transpose type !\n"); |
| | } |
| | else if (perm.size() == 5) |
| | { |
| | if (perm[1] == 1 && perm[2] == 2 && perm[3] == 3 && perm[4] == 4) |
| | fprintf(pp, " 0=0"); |
| | else if (perm[1] == 1 && perm[2] == 3 && perm[3] == 4 && perm[4] == 2) |
| | fprintf(pp, " 0=1"); |
| | else if (perm[1] == 2 && perm[2] == 1 && perm[3] == 3 && perm[4] == 4) |
| | fprintf(pp, " 0=2"); |
| | else if (perm[1] == 2 && perm[2] == 3 && perm[3] == 4 && perm[4] == 1) |
| | fprintf(pp, " 0=3"); |
| | else if (perm[1] == 3 && perm[2] == 4 && perm[3] == 1 && perm[4] == 2) |
| | fprintf(pp, " 0=4"); |
| | else if (perm[1] == 3 && perm[2] == 4 && perm[3] == 2 && perm[4] == 1) |
| | fprintf(pp, " 0=5"); |
| | else |
| | fprintf(stderr, "Unsupported transpose type !\n"); |
| | } |
| | } |
| | else if (op == "Upsample") |
| | { |
| | std::string mode = get_node_attr_s(node, "mode"); |
| | std::string align = get_node_attr_s(node, "coordinate_transformation_mode"); |
| |
|
| | std::vector<float> scales; |
| |
|
| | if (node.input_size() == 1) |
| | { |
| | scales = get_node_attr_af(node, "scales"); |
| | } |
| | else |
| | { |
| | scales = get_node_attr_from_input_af(weights[node.input(1)]); |
| | } |
| |
|
| | int resize_type = 1; |
| | if (mode == "nearest") |
| | { |
| | resize_type = 1; |
| | } |
| | else if (mode == "bilinear" || mode == "linear") |
| | { |
| | resize_type = 2; |
| | } |
| | else if (mode == "trilinear") |
| | { |
| | fprintf(stderr, "Unsupported Upsample mode !\n"); |
| | } |
| |
|
| | float h_scale = 1.f; |
| | float w_scale = 1.f; |
| | if (scales.size() == 2) |
| | { |
| | w_scale = scales[1]; |
| | } |
| | else if (scales.size() == 3) |
| | { |
| | h_scale = scales[1]; |
| | w_scale = scales[2]; |
| | } |
| | else if (scales.size() == 4) |
| | { |
| | h_scale = scales[2]; |
| | w_scale = scales[3]; |
| |
|
| | if (scales[1] != 1.f) |
| | fprintf(stderr, "Unsupported Upsample scales !\n"); |
| | } |
| | else |
| | { |
| | fprintf(stderr, "Unsupported Upsample scales !\n"); |
| | } |
| |
|
| | int align_corner = 0; |
| | if (align == "align_corners") |
| | { |
| | align_corner = 1; |
| | } |
| |
|
| | fprintf(pp, " 0=%d", resize_type); |
| | fprintf(pp, " 1=%e", h_scale); |
| | fprintf(pp, " 2=%e", w_scale); |
| | fprintf(pp, " 6=%d", align_corner); |
| | } |
| | else if (op == "Unsqueeze") |
| | { |
| | std::vector<int> axes = get_node_attr_ai(node, "axes"); |
| |
|
| | fprintf(pp, " -23303=%zu", axes.size()); |
| | for (int i = 0; i < (int)axes.size(); i++) |
| | { |
| | if (axes[i] == 0 || axes[i] > 4 || axes[i] < -4) |
| | fprintf(stderr, "Unsupported unsqueeze axes !\n"); |
| | fprintf(pp, ",%d", axes[i] > 0 ? axes[i] - 1 : axes[i]); |
| | } |
| | } |
| | else |
| | { |
| | |
| | for (int j = 0; j < node.attribute_size(); j++) |
| | { |
| | const onnx::AttributeProto& attr = node.attribute(j); |
| | if (attr.type() == 1) |
| | { |
| | fprintf(stderr, " # %s=%g\n", attr.name().c_str(), attr.f()); |
| | } |
| | else if (attr.type() == 2) |
| | { |
| | fprintf(stderr, " # %s=%lld\n", attr.name().c_str(), (long long)attr.i()); |
| | } |
| | else if (attr.type() == 3) |
| | { |
| | fprintf(stderr, " # %s=%s\n", attr.name().c_str(), attr.s().c_str()); |
| | } |
| | else |
| | { |
| | fprintf(stderr, " # %s %d\n", attr.name().c_str(), attr.type()); |
| | } |
| | } |
| | } |
| |
|
| | fprintf(pp, "\n"); |
| |
|
| | for (int j = 0; j < output_size; j++) |
| | { |
| | const std::string& output_name = node.output(j); |
| | if (node_reference.find(output_name) != node_reference.end()) |
| | { |
| | int refcount = node_reference[output_name]; |
| | if (refcount > 1) |
| | { |
| | char splitname[256]; |
| | sprintf(splitname, "splitncnn_%d", internal_split); |
| | fprintf(pp, "%-16s %-24s %d %d", "Split", splitname, 1, refcount); |
| |
|
| | fprintf(pp, " %s", trunc_name(output_name).c_str()); |
| |
|
| | for (int k = 0; k < refcount; k++) |
| | { |
| | std::string split_name = output_name + "_splitncnn_" + std::to_string(k); |
| | fprintf(pp, " %s", trunc_name(split_name).c_str()); |
| | } |
| | fprintf(pp, "\n"); |
| |
|
| | internal_split++; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | fclose(pp); |
| | fclose(bp); |
| |
|
| | return 0; |
| | } |
| |
|