diff --git "a/az-speechto-text-HF/WhisperEncoder.mlmodelc/model.espresso.net" "b/az-speechto-text-HF/WhisperEncoder.mlmodelc/model.espresso.net" deleted file mode 100644--- "a/az-speechto-text-HF/WhisperEncoder.mlmodelc/model.espresso.net" +++ /dev/null @@ -1,6883 +0,0 @@ -{ - "storage" : "model.espresso.weights", - "analyses" : { - - }, - "properties" : { - - }, - "format_version" : 200, - "metadata_in_weights" : [ - - ], - "layers" : [ - { - "size_of_axes" : 1, - "bottom" : "input_features", - "axes_0" : -2, - "weights" : { - - }, - "nd_axis" : 0, - "debug_info" : "expand_dims_0", - "top" : "expand_dims_0", - "type" : "expand_dims", - "name" : "expand_dims_0" - }, - { - "pad_r" : 1, - "fused_relu" : 0, - "fused_tanh" : 0, - "debug_info" : "conv_0", - "pad_fill_mode" : 0, - "pad_b" : 0, - "pad_l" : 1, - "top" : "conv_0", - "blob_weights" : 3, - "K" : 80, - "blob_biases" : 1, - "name" : "conv_0", - "has_batch_norm" : 0, - "type" : "convolution", - "n_groups" : 1, - "pad_t" : 0, - "has_biases" : 1, - "C" : 768, - "bottom" : "expand_dims_0", - "weights" : { - - }, - "Nx" : 3, - "pad_mode" : 0, - "pad_value" : 0, - "Ny" : 1, - "n_parallel" : 1 - }, - { - "size_of_axes" : 1, - "bottom" : "conv_0", - "axes_0" : -2, - "weights" : { - - }, - "nd_axis" : 0, - "debug_info" : "52", - "version" : 1, - "top" : "52", - "type" : "squeeze", - "name" : "52" - }, - { - "bottom" : "52", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.1", - "top" : "input.1", - "type" : "activation", - "name" : "input.1" - }, - { - "size_of_axes" : 1, - "bottom" : "input.1", - "axes_0" : -2, - "weights" : { - - }, - "nd_axis" : 0, - "debug_info" : "expand_dims_2", - "top" : "expand_dims_2", - "type" : "expand_dims", - "name" : "expand_dims_2" - }, - { - "pad_r" : 1, - "fused_relu" : 0, - "fused_tanh" : 0, - "debug_info" : "conv_1", - "pad_fill_mode" : 0, - "pad_b" : 0, - "pad_l" : 1, - "top" : "conv_1", - "blob_weights" : 7, - "K" : 768, - "blob_biases" : 5, - "stride_x" : 2, - "name" : "conv_1", - "has_batch_norm" : 0, - "type" : "convolution", - "n_groups" : 1, - "pad_t" : 0, - "has_biases" : 1, - "C" : 768, - "bottom" : "expand_dims_2", - "weights" : { - - }, - "Nx" : 3, - "pad_mode" : 0, - "pad_value" : 0, - "Ny" : 1, - "n_parallel" : 1 - }, - { - "size_of_axes" : 1, - "bottom" : "conv_1", - "axes_0" : -2, - "weights" : { - - }, - "nd_axis" : 0, - "debug_info" : "60", - "version" : 1, - "top" : "60", - "type" : "squeeze", - "name" : "60" - }, - { - "bottom" : "60", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "inputs_embeds.1", - "top" : "inputs_embeds.1", - "type" : "activation", - "name" : "inputs_embeds.1" - }, - { - "axis_h" : 0, - "axis_w" : 1, - "bottom" : "inputs_embeds.1", - "axis_k" : 2, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_108", - "top" : "transpose_108", - "type" : "transpose", - "name" : "transpose_108" - }, - { - "top" : "encoder.embed_positions.weight", - "w" : 768, - "h" : 1500, - "name" : "encoder.embed_positions.weight", - "nd_rank" : 2, - "type" : "load_constant", - "k" : 1, - "bottom" : "", - "debug_info" : "encoder.embed_positions.weight", - "n" : 1, - "weights" : { - - }, - "constant_blob" : 9 - }, - { - "bottom" : "encoder.embed_positions.weight,transpose_108", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.3", - "top" : "input.3", - "type" : "elementwise", - "name" : "input.3", - "beta" : 0 - }, - { - "name" : "hidden_states.1_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.3", - "debug_info" : "hidden_states.1_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.1_reshape" - }, - { - "bottom" : "hidden_states.1_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.1_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.1_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.1_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.1_scale", - "constant_blob" : 11, - "top" : "hidden_states.1_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.1_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.1_scale", - "constant_blob" : 13, - "top" : "hidden_states.1_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.1_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.1_mvn,hidden_states.1_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.1_scale", - "top" : "hidden_states.1_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.1_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.1_scale_mul_out,hidden_states.1_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.1_scale", - "top" : "hidden_states.1_scale", - "type" : "elementwise", - "name" : "hidden_states.1_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.1", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.1_scale", - "debug_info" : "hidden_states.1", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.1" - }, - { - "nB" : 768, - "top" : "83", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 17, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.1", - "blob_biases" : 15, - "has_tanh" : 0, - "debug_info" : "83", - "name" : "83", - "has_prelu" : 0 - }, - { - "bottom" : "83", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.1", - "top" : "tensor.1", - "type" : "elementwise", - "name" : "tensor.1", - "beta" : 0 - }, - { - "name" : "87", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.1", - "debug_info" : "87", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "87" - }, - { - "nB" : 768, - "top" : "tensor.3", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 21, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.1", - "blob_biases" : 19, - "has_tanh" : 0, - "debug_info" : "tensor.3", - "name" : "tensor.3", - "has_prelu" : 0 - }, - { - "name" : "93", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.3", - "debug_info" : "93", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "93" - }, - { - "nB" : 768, - "top" : "tensor.5", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 25, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.1", - "blob_biases" : 23, - "has_tanh" : 0, - "debug_info" : "tensor.5", - "name" : "tensor.5", - "has_prelu" : 0 - }, - { - "name" : "100", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.5", - "debug_info" : "100", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "100" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "93", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_105", - "top" : "transpose_105", - "type" : "transpose", - "name" : "transpose_105" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "87", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_106", - "top" : "transpose_106", - "type" : "transpose", - "name" : "transpose_106" - }, - { - "bottom" : "transpose_106,transpose_105", - "weights" : { - - }, - "debug_info" : "input.7", - "top" : "input.7", - "type" : "batch_matmul", - "name" : "input.7", - "channel_mode" : false - }, - { - "bottom" : "input.7", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.9", - "top" : "input.9", - "type" : "softmax_nd", - "name" : "input.9" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "100", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_107", - "top" : "transpose_107", - "type" : "transpose", - "name" : "transpose_107" - }, - { - "bottom" : "input.9,transpose_107", - "weights" : { - - }, - "debug_info" : "attn_output.1", - "top" : "attn_output.1", - "type" : "batch_matmul", - "name" : "attn_output.1", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.1", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_104", - "top" : "transpose_104", - "type" : "transpose", - "name" : "transpose_104" - }, - { - "name" : "input.11", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_104", - "debug_info" : "input.11", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.11" - }, - { - "nB" : 768, - "top" : "input.13", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 29, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.11", - "blob_biases" : 27, - "has_tanh" : 0, - "debug_info" : "input.13", - "name" : "input.13", - "has_prelu" : 0 - }, - { - "bottom" : "input.3,input.13", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.15", - "top" : "input.15", - "type" : "elementwise", - "name" : "input.15", - "beta" : 0 - }, - { - "name" : "input.17_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.15", - "debug_info" : "input.17_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.17_reshape" - }, - { - "bottom" : "input.17_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.17_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.17_mvn", - "type" : "l2_normalize", - "name" : "input.17_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.17_scale", - "constant_blob" : 31, - "top" : "input.17_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.17_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.17_scale", - "constant_blob" : 33, - "top" : "input.17_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.17_scale_constant_in_bias" - }, - { - "bottom" : "input.17_mvn,input.17_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.17_scale", - "top" : "input.17_scale_mul_out", - "type" : "elementwise", - "name" : "input.17_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.17_scale_mul_out,input.17_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.17_scale", - "top" : "input.17_scale", - "type" : "elementwise", - "name" : "input.17_scale", - "beta" : 0 - }, - { - "name" : "input.17", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.17_scale", - "debug_info" : "input.17", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.17" - }, - { - "nB" : 768, - "top" : "input.19", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 37, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.17", - "blob_biases" : 35, - "has_tanh" : 0, - "debug_info" : "input.19", - "name" : "input.19", - "has_prelu" : 0 - }, - { - "bottom" : "input.19", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.21", - "top" : "input.21", - "type" : "activation", - "name" : "input.21" - }, - { - "nB" : 3072, - "top" : "input.25", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 41, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.21", - "blob_biases" : 39, - "has_tanh" : 0, - "debug_info" : "input.25", - "name" : "input.25", - "has_prelu" : 0 - }, - { - "bottom" : "input.15,input.25", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.27", - "top" : "input.27", - "type" : "elementwise", - "name" : "input.27", - "beta" : 0 - }, - { - "name" : "hidden_states.7_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.27", - "debug_info" : "hidden_states.7_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.7_reshape" - }, - { - "bottom" : "hidden_states.7_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.7_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.7_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.7_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.7_scale", - "constant_blob" : 43, - "top" : "hidden_states.7_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.7_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.7_scale", - "constant_blob" : 45, - "top" : "hidden_states.7_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.7_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.7_mvn,hidden_states.7_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.7_scale", - "top" : "hidden_states.7_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.7_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.7_scale_mul_out,hidden_states.7_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.7_scale", - "top" : "hidden_states.7_scale", - "type" : "elementwise", - "name" : "hidden_states.7_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.7", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.7_scale", - "debug_info" : "hidden_states.7", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.7" - }, - { - "nB" : 768, - "top" : "147", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 49, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.7", - "blob_biases" : 47, - "has_tanh" : 0, - "debug_info" : "147", - "name" : "147", - "has_prelu" : 0 - }, - { - "bottom" : "147", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.7", - "top" : "tensor.7", - "type" : "elementwise", - "name" : "tensor.7", - "beta" : 0 - }, - { - "name" : "151", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.7", - "debug_info" : "151", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "151" - }, - { - "nB" : 768, - "top" : "tensor.9", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 53, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.7", - "blob_biases" : 51, - "has_tanh" : 0, - "debug_info" : "tensor.9", - "name" : "tensor.9", - "has_prelu" : 0 - }, - { - "name" : "157", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.9", - "debug_info" : "157", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "157" - }, - { - "nB" : 768, - "top" : "tensor.11", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 57, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.7", - "blob_biases" : 55, - "has_tanh" : 0, - "debug_info" : "tensor.11", - "name" : "tensor.11", - "has_prelu" : 0 - }, - { - "name" : "164", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.11", - "debug_info" : "164", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "164" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "157", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_101", - "top" : "transpose_101", - "type" : "transpose", - "name" : "transpose_101" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "151", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_102", - "top" : "transpose_102", - "type" : "transpose", - "name" : "transpose_102" - }, - { - "bottom" : "transpose_102,transpose_101", - "weights" : { - - }, - "debug_info" : "input.29", - "top" : "input.29", - "type" : "batch_matmul", - "name" : "input.29", - "channel_mode" : false - }, - { - "bottom" : "input.29", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.31", - "top" : "input.31", - "type" : "softmax_nd", - "name" : "input.31" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "164", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_103", - "top" : "transpose_103", - "type" : "transpose", - "name" : "transpose_103" - }, - { - "bottom" : "input.31,transpose_103", - "weights" : { - - }, - "debug_info" : "attn_output.5", - "top" : "attn_output.5", - "type" : "batch_matmul", - "name" : "attn_output.5", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.5", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_100", - "top" : "transpose_100", - "type" : "transpose", - "name" : "transpose_100" - }, - { - "name" : "input.33", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_100", - "debug_info" : "input.33", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.33" - }, - { - "nB" : 768, - "top" : "input.35", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 61, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.33", - "blob_biases" : 59, - "has_tanh" : 0, - "debug_info" : "input.35", - "name" : "input.35", - "has_prelu" : 0 - }, - { - "bottom" : "input.27,input.35", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.37", - "top" : "input.37", - "type" : "elementwise", - "name" : "input.37", - "beta" : 0 - }, - { - "name" : "input.39_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.37", - "debug_info" : "input.39_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.39_reshape" - }, - { - "bottom" : "input.39_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.39_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.39_mvn", - "type" : "l2_normalize", - "name" : "input.39_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.39_scale", - "constant_blob" : 63, - "top" : "input.39_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.39_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.39_scale", - "constant_blob" : 65, - "top" : "input.39_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.39_scale_constant_in_bias" - }, - { - "bottom" : "input.39_mvn,input.39_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.39_scale", - "top" : "input.39_scale_mul_out", - "type" : "elementwise", - "name" : "input.39_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.39_scale_mul_out,input.39_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.39_scale", - "top" : "input.39_scale", - "type" : "elementwise", - "name" : "input.39_scale", - "beta" : 0 - }, - { - "name" : "input.39", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.39_scale", - "debug_info" : "input.39", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.39" - }, - { - "nB" : 768, - "top" : "input.41", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 69, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.39", - "blob_biases" : 67, - "has_tanh" : 0, - "debug_info" : "input.41", - "name" : "input.41", - "has_prelu" : 0 - }, - { - "bottom" : "input.41", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.43", - "top" : "input.43", - "type" : "activation", - "name" : "input.43" - }, - { - "nB" : 3072, - "top" : "input.47", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 73, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.43", - "blob_biases" : 71, - "has_tanh" : 0, - "debug_info" : "input.47", - "name" : "input.47", - "has_prelu" : 0 - }, - { - "bottom" : "input.37,input.47", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.49", - "top" : "input.49", - "type" : "elementwise", - "name" : "input.49", - "beta" : 0 - }, - { - "name" : "hidden_states.13_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.49", - "debug_info" : "hidden_states.13_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.13_reshape" - }, - { - "bottom" : "hidden_states.13_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.13_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.13_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.13_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.13_scale", - "constant_blob" : 75, - "top" : "hidden_states.13_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.13_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.13_scale", - "constant_blob" : 77, - "top" : "hidden_states.13_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.13_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.13_mvn,hidden_states.13_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.13_scale", - "top" : "hidden_states.13_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.13_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.13_scale_mul_out,hidden_states.13_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.13_scale", - "top" : "hidden_states.13_scale", - "type" : "elementwise", - "name" : "hidden_states.13_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.13", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.13_scale", - "debug_info" : "hidden_states.13", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.13" - }, - { - "nB" : 768, - "top" : "211", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 81, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.13", - "blob_biases" : 79, - "has_tanh" : 0, - "debug_info" : "211", - "name" : "211", - "has_prelu" : 0 - }, - { - "bottom" : "211", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.13", - "top" : "tensor.13", - "type" : "elementwise", - "name" : "tensor.13", - "beta" : 0 - }, - { - "name" : "215", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.13", - "debug_info" : "215", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "215" - }, - { - "nB" : 768, - "top" : "tensor.15", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 85, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.13", - "blob_biases" : 83, - "has_tanh" : 0, - "debug_info" : "tensor.15", - "name" : "tensor.15", - "has_prelu" : 0 - }, - { - "name" : "221", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.15", - "debug_info" : "221", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "221" - }, - { - "nB" : 768, - "top" : "tensor.17", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 89, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.13", - "blob_biases" : 87, - "has_tanh" : 0, - "debug_info" : "tensor.17", - "name" : "tensor.17", - "has_prelu" : 0 - }, - { - "name" : "228", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.17", - "debug_info" : "228", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "228" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "221", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_97", - "top" : "transpose_97", - "type" : "transpose", - "name" : "transpose_97" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "215", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_98", - "top" : "transpose_98", - "type" : "transpose", - "name" : "transpose_98" - }, - { - "bottom" : "transpose_98,transpose_97", - "weights" : { - - }, - "debug_info" : "input.51", - "top" : "input.51", - "type" : "batch_matmul", - "name" : "input.51", - "channel_mode" : false - }, - { - "bottom" : "input.51", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.53", - "top" : "input.53", - "type" : "softmax_nd", - "name" : "input.53" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "228", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_99", - "top" : "transpose_99", - "type" : "transpose", - "name" : "transpose_99" - }, - { - "bottom" : "input.53,transpose_99", - "weights" : { - - }, - "debug_info" : "attn_output.9", - "top" : "attn_output.9", - "type" : "batch_matmul", - "name" : "attn_output.9", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.9", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_96", - "top" : "transpose_96", - "type" : "transpose", - "name" : "transpose_96" - }, - { - "name" : "input.55", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_96", - "debug_info" : "input.55", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.55" - }, - { - "nB" : 768, - "top" : "input.57", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 93, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.55", - "blob_biases" : 91, - "has_tanh" : 0, - "debug_info" : "input.57", - "name" : "input.57", - "has_prelu" : 0 - }, - { - "bottom" : "input.49,input.57", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.59", - "top" : "input.59", - "type" : "elementwise", - "name" : "input.59", - "beta" : 0 - }, - { - "name" : "input.61_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.59", - "debug_info" : "input.61_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.61_reshape" - }, - { - "bottom" : "input.61_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.61_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.61_mvn", - "type" : "l2_normalize", - "name" : "input.61_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.61_scale", - "constant_blob" : 95, - "top" : "input.61_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.61_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.61_scale", - "constant_blob" : 97, - "top" : "input.61_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.61_scale_constant_in_bias" - }, - { - "bottom" : "input.61_mvn,input.61_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.61_scale", - "top" : "input.61_scale_mul_out", - "type" : "elementwise", - "name" : "input.61_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.61_scale_mul_out,input.61_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.61_scale", - "top" : "input.61_scale", - "type" : "elementwise", - "name" : "input.61_scale", - "beta" : 0 - }, - { - "name" : "input.61", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.61_scale", - "debug_info" : "input.61", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.61" - }, - { - "nB" : 768, - "top" : "input.63", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 101, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.61", - "blob_biases" : 99, - "has_tanh" : 0, - "debug_info" : "input.63", - "name" : "input.63", - "has_prelu" : 0 - }, - { - "bottom" : "input.63", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.65", - "top" : "input.65", - "type" : "activation", - "name" : "input.65" - }, - { - "nB" : 3072, - "top" : "input.69", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 105, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.65", - "blob_biases" : 103, - "has_tanh" : 0, - "debug_info" : "input.69", - "name" : "input.69", - "has_prelu" : 0 - }, - { - "bottom" : "input.59,input.69", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.71", - "top" : "input.71", - "type" : "elementwise", - "name" : "input.71", - "beta" : 0 - }, - { - "name" : "hidden_states.19_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.71", - "debug_info" : "hidden_states.19_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.19_reshape" - }, - { - "bottom" : "hidden_states.19_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.19_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.19_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.19_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.19_scale", - "constant_blob" : 107, - "top" : "hidden_states.19_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.19_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.19_scale", - "constant_blob" : 109, - "top" : "hidden_states.19_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.19_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.19_mvn,hidden_states.19_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.19_scale", - "top" : "hidden_states.19_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.19_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.19_scale_mul_out,hidden_states.19_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.19_scale", - "top" : "hidden_states.19_scale", - "type" : "elementwise", - "name" : "hidden_states.19_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.19", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.19_scale", - "debug_info" : "hidden_states.19", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.19" - }, - { - "nB" : 768, - "top" : "275", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 113, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.19", - "blob_biases" : 111, - "has_tanh" : 0, - "debug_info" : "275", - "name" : "275", - "has_prelu" : 0 - }, - { - "bottom" : "275", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.19", - "top" : "tensor.19", - "type" : "elementwise", - "name" : "tensor.19", - "beta" : 0 - }, - { - "name" : "279", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.19", - "debug_info" : "279", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "279" - }, - { - "nB" : 768, - "top" : "tensor.21", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 117, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.19", - "blob_biases" : 115, - "has_tanh" : 0, - "debug_info" : "tensor.21", - "name" : "tensor.21", - "has_prelu" : 0 - }, - { - "name" : "285", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.21", - "debug_info" : "285", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "285" - }, - { - "nB" : 768, - "top" : "tensor.23", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 121, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.19", - "blob_biases" : 119, - "has_tanh" : 0, - "debug_info" : "tensor.23", - "name" : "tensor.23", - "has_prelu" : 0 - }, - { - "name" : "292", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.23", - "debug_info" : "292", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "292" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "285", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_93", - "top" : "transpose_93", - "type" : "transpose", - "name" : "transpose_93" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "279", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_94", - "top" : "transpose_94", - "type" : "transpose", - "name" : "transpose_94" - }, - { - "bottom" : "transpose_94,transpose_93", - "weights" : { - - }, - "debug_info" : "input.73", - "top" : "input.73", - "type" : "batch_matmul", - "name" : "input.73", - "channel_mode" : false - }, - { - "bottom" : "input.73", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.75", - "top" : "input.75", - "type" : "softmax_nd", - "name" : "input.75" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "292", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_95", - "top" : "transpose_95", - "type" : "transpose", - "name" : "transpose_95" - }, - { - "bottom" : "input.75,transpose_95", - "weights" : { - - }, - "debug_info" : "attn_output.13", - "top" : "attn_output.13", - "type" : "batch_matmul", - "name" : "attn_output.13", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.13", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_92", - "top" : "transpose_92", - "type" : "transpose", - "name" : "transpose_92" - }, - { - "name" : "input.77", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_92", - "debug_info" : "input.77", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.77" - }, - { - "nB" : 768, - "top" : "input.79", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 125, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.77", - "blob_biases" : 123, - "has_tanh" : 0, - "debug_info" : "input.79", - "name" : "input.79", - "has_prelu" : 0 - }, - { - "bottom" : "input.71,input.79", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.81", - "top" : "input.81", - "type" : "elementwise", - "name" : "input.81", - "beta" : 0 - }, - { - "name" : "input.83_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.81", - "debug_info" : "input.83_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.83_reshape" - }, - { - "bottom" : "input.83_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.83_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.83_mvn", - "type" : "l2_normalize", - "name" : "input.83_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.83_scale", - "constant_blob" : 127, - "top" : "input.83_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.83_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.83_scale", - "constant_blob" : 129, - "top" : "input.83_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.83_scale_constant_in_bias" - }, - { - "bottom" : "input.83_mvn,input.83_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.83_scale", - "top" : "input.83_scale_mul_out", - "type" : "elementwise", - "name" : "input.83_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.83_scale_mul_out,input.83_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.83_scale", - "top" : "input.83_scale", - "type" : "elementwise", - "name" : "input.83_scale", - "beta" : 0 - }, - { - "name" : "input.83", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.83_scale", - "debug_info" : "input.83", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.83" - }, - { - "nB" : 768, - "top" : "input.85", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 133, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.83", - "blob_biases" : 131, - "has_tanh" : 0, - "debug_info" : "input.85", - "name" : "input.85", - "has_prelu" : 0 - }, - { - "bottom" : "input.85", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.87", - "top" : "input.87", - "type" : "activation", - "name" : "input.87" - }, - { - "nB" : 3072, - "top" : "input.91", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 137, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.87", - "blob_biases" : 135, - "has_tanh" : 0, - "debug_info" : "input.91", - "name" : "input.91", - "has_prelu" : 0 - }, - { - "bottom" : "input.81,input.91", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.93", - "top" : "input.93", - "type" : "elementwise", - "name" : "input.93", - "beta" : 0 - }, - { - "name" : "hidden_states.25_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.93", - "debug_info" : "hidden_states.25_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.25_reshape" - }, - { - "bottom" : "hidden_states.25_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.25_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.25_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.25_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.25_scale", - "constant_blob" : 139, - "top" : "hidden_states.25_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.25_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.25_scale", - "constant_blob" : 141, - "top" : "hidden_states.25_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.25_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.25_mvn,hidden_states.25_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.25_scale", - "top" : "hidden_states.25_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.25_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.25_scale_mul_out,hidden_states.25_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.25_scale", - "top" : "hidden_states.25_scale", - "type" : "elementwise", - "name" : "hidden_states.25_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.25", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.25_scale", - "debug_info" : "hidden_states.25", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.25" - }, - { - "nB" : 768, - "top" : "339", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 145, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.25", - "blob_biases" : 143, - "has_tanh" : 0, - "debug_info" : "339", - "name" : "339", - "has_prelu" : 0 - }, - { - "bottom" : "339", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.25", - "top" : "tensor.25", - "type" : "elementwise", - "name" : "tensor.25", - "beta" : 0 - }, - { - "name" : "343", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.25", - "debug_info" : "343", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "343" - }, - { - "nB" : 768, - "top" : "tensor.27", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 149, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.25", - "blob_biases" : 147, - "has_tanh" : 0, - "debug_info" : "tensor.27", - "name" : "tensor.27", - "has_prelu" : 0 - }, - { - "name" : "349", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.27", - "debug_info" : "349", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "349" - }, - { - "nB" : 768, - "top" : "tensor.29", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 153, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.25", - "blob_biases" : 151, - "has_tanh" : 0, - "debug_info" : "tensor.29", - "name" : "tensor.29", - "has_prelu" : 0 - }, - { - "name" : "356", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.29", - "debug_info" : "356", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "356" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "349", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_89", - "top" : "transpose_89", - "type" : "transpose", - "name" : "transpose_89" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "343", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_90", - "top" : "transpose_90", - "type" : "transpose", - "name" : "transpose_90" - }, - { - "bottom" : "transpose_90,transpose_89", - "weights" : { - - }, - "debug_info" : "input.95", - "top" : "input.95", - "type" : "batch_matmul", - "name" : "input.95", - "channel_mode" : false - }, - { - "bottom" : "input.95", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.97", - "top" : "input.97", - "type" : "softmax_nd", - "name" : "input.97" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "356", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_91", - "top" : "transpose_91", - "type" : "transpose", - "name" : "transpose_91" - }, - { - "bottom" : "input.97,transpose_91", - "weights" : { - - }, - "debug_info" : "attn_output.17", - "top" : "attn_output.17", - "type" : "batch_matmul", - "name" : "attn_output.17", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.17", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_88", - "top" : "transpose_88", - "type" : "transpose", - "name" : "transpose_88" - }, - { - "name" : "input.99", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_88", - "debug_info" : "input.99", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.99" - }, - { - "nB" : 768, - "top" : "input.101", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 157, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.99", - "blob_biases" : 155, - "has_tanh" : 0, - "debug_info" : "input.101", - "name" : "input.101", - "has_prelu" : 0 - }, - { - "bottom" : "input.93,input.101", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.103", - "top" : "input.103", - "type" : "elementwise", - "name" : "input.103", - "beta" : 0 - }, - { - "name" : "input.105_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.103", - "debug_info" : "input.105_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.105_reshape" - }, - { - "bottom" : "input.105_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.105_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.105_mvn", - "type" : "l2_normalize", - "name" : "input.105_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.105_scale", - "constant_blob" : 159, - "top" : "input.105_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.105_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.105_scale", - "constant_blob" : 161, - "top" : "input.105_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.105_scale_constant_in_bias" - }, - { - "bottom" : "input.105_mvn,input.105_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.105_scale", - "top" : "input.105_scale_mul_out", - "type" : "elementwise", - "name" : "input.105_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.105_scale_mul_out,input.105_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.105_scale", - "top" : "input.105_scale", - "type" : "elementwise", - "name" : "input.105_scale", - "beta" : 0 - }, - { - "name" : "input.105", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.105_scale", - "debug_info" : "input.105", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.105" - }, - { - "nB" : 768, - "top" : "input.107", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 165, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.105", - "blob_biases" : 163, - "has_tanh" : 0, - "debug_info" : "input.107", - "name" : "input.107", - "has_prelu" : 0 - }, - { - "bottom" : "input.107", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.109", - "top" : "input.109", - "type" : "activation", - "name" : "input.109" - }, - { - "nB" : 3072, - "top" : "input.113", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 169, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.109", - "blob_biases" : 167, - "has_tanh" : 0, - "debug_info" : "input.113", - "name" : "input.113", - "has_prelu" : 0 - }, - { - "bottom" : "input.103,input.113", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.115", - "top" : "input.115", - "type" : "elementwise", - "name" : "input.115", - "beta" : 0 - }, - { - "name" : "hidden_states.31_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.115", - "debug_info" : "hidden_states.31_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.31_reshape" - }, - { - "bottom" : "hidden_states.31_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.31_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.31_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.31_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.31_scale", - "constant_blob" : 171, - "top" : "hidden_states.31_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.31_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.31_scale", - "constant_blob" : 173, - "top" : "hidden_states.31_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.31_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.31_mvn,hidden_states.31_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.31_scale", - "top" : "hidden_states.31_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.31_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.31_scale_mul_out,hidden_states.31_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.31_scale", - "top" : "hidden_states.31_scale", - "type" : "elementwise", - "name" : "hidden_states.31_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.31", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.31_scale", - "debug_info" : "hidden_states.31", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.31" - }, - { - "nB" : 768, - "top" : "403", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 177, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.31", - "blob_biases" : 175, - "has_tanh" : 0, - "debug_info" : "403", - "name" : "403", - "has_prelu" : 0 - }, - { - "bottom" : "403", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.31", - "top" : "tensor.31", - "type" : "elementwise", - "name" : "tensor.31", - "beta" : 0 - }, - { - "name" : "407", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.31", - "debug_info" : "407", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "407" - }, - { - "nB" : 768, - "top" : "tensor.33", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 181, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.31", - "blob_biases" : 179, - "has_tanh" : 0, - "debug_info" : "tensor.33", - "name" : "tensor.33", - "has_prelu" : 0 - }, - { - "name" : "413", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.33", - "debug_info" : "413", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "413" - }, - { - "nB" : 768, - "top" : "tensor.35", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 185, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.31", - "blob_biases" : 183, - "has_tanh" : 0, - "debug_info" : "tensor.35", - "name" : "tensor.35", - "has_prelu" : 0 - }, - { - "name" : "420", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.35", - "debug_info" : "420", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "420" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "413", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_85", - "top" : "transpose_85", - "type" : "transpose", - "name" : "transpose_85" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "407", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_86", - "top" : "transpose_86", - "type" : "transpose", - "name" : "transpose_86" - }, - { - "bottom" : "transpose_86,transpose_85", - "weights" : { - - }, - "debug_info" : "input.117", - "top" : "input.117", - "type" : "batch_matmul", - "name" : "input.117", - "channel_mode" : false - }, - { - "bottom" : "input.117", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.119", - "top" : "input.119", - "type" : "softmax_nd", - "name" : "input.119" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "420", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_87", - "top" : "transpose_87", - "type" : "transpose", - "name" : "transpose_87" - }, - { - "bottom" : "input.119,transpose_87", - "weights" : { - - }, - "debug_info" : "attn_output.21", - "top" : "attn_output.21", - "type" : "batch_matmul", - "name" : "attn_output.21", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.21", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_84", - "top" : "transpose_84", - "type" : "transpose", - "name" : "transpose_84" - }, - { - "name" : "input.121", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_84", - "debug_info" : "input.121", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.121" - }, - { - "nB" : 768, - "top" : "input.123", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 189, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.121", - "blob_biases" : 187, - "has_tanh" : 0, - "debug_info" : "input.123", - "name" : "input.123", - "has_prelu" : 0 - }, - { - "bottom" : "input.115,input.123", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.125", - "top" : "input.125", - "type" : "elementwise", - "name" : "input.125", - "beta" : 0 - }, - { - "name" : "input.127_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.125", - "debug_info" : "input.127_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.127_reshape" - }, - { - "bottom" : "input.127_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.127_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.127_mvn", - "type" : "l2_normalize", - "name" : "input.127_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.127_scale", - "constant_blob" : 191, - "top" : "input.127_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.127_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.127_scale", - "constant_blob" : 193, - "top" : "input.127_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.127_scale_constant_in_bias" - }, - { - "bottom" : "input.127_mvn,input.127_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.127_scale", - "top" : "input.127_scale_mul_out", - "type" : "elementwise", - "name" : "input.127_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.127_scale_mul_out,input.127_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.127_scale", - "top" : "input.127_scale", - "type" : "elementwise", - "name" : "input.127_scale", - "beta" : 0 - }, - { - "name" : "input.127", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.127_scale", - "debug_info" : "input.127", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.127" - }, - { - "nB" : 768, - "top" : "input.129", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 197, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.127", - "blob_biases" : 195, - "has_tanh" : 0, - "debug_info" : "input.129", - "name" : "input.129", - "has_prelu" : 0 - }, - { - "bottom" : "input.129", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.131", - "top" : "input.131", - "type" : "activation", - "name" : "input.131" - }, - { - "nB" : 3072, - "top" : "input.135", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 201, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.131", - "blob_biases" : 199, - "has_tanh" : 0, - "debug_info" : "input.135", - "name" : "input.135", - "has_prelu" : 0 - }, - { - "bottom" : "input.125,input.135", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.137", - "top" : "input.137", - "type" : "elementwise", - "name" : "input.137", - "beta" : 0 - }, - { - "name" : "hidden_states.37_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.137", - "debug_info" : "hidden_states.37_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.37_reshape" - }, - { - "bottom" : "hidden_states.37_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.37_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.37_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.37_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.37_scale", - "constant_blob" : 203, - "top" : "hidden_states.37_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.37_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.37_scale", - "constant_blob" : 205, - "top" : "hidden_states.37_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.37_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.37_mvn,hidden_states.37_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.37_scale", - "top" : "hidden_states.37_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.37_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.37_scale_mul_out,hidden_states.37_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.37_scale", - "top" : "hidden_states.37_scale", - "type" : "elementwise", - "name" : "hidden_states.37_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.37", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.37_scale", - "debug_info" : "hidden_states.37", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.37" - }, - { - "nB" : 768, - "top" : "467", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 209, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.37", - "blob_biases" : 207, - "has_tanh" : 0, - "debug_info" : "467", - "name" : "467", - "has_prelu" : 0 - }, - { - "bottom" : "467", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.37", - "top" : "tensor.37", - "type" : "elementwise", - "name" : "tensor.37", - "beta" : 0 - }, - { - "name" : "471", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.37", - "debug_info" : "471", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "471" - }, - { - "nB" : 768, - "top" : "tensor.39", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 213, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.37", - "blob_biases" : 211, - "has_tanh" : 0, - "debug_info" : "tensor.39", - "name" : "tensor.39", - "has_prelu" : 0 - }, - { - "name" : "477", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.39", - "debug_info" : "477", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "477" - }, - { - "nB" : 768, - "top" : "tensor.41", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 217, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.37", - "blob_biases" : 215, - "has_tanh" : 0, - "debug_info" : "tensor.41", - "name" : "tensor.41", - "has_prelu" : 0 - }, - { - "name" : "484", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.41", - "debug_info" : "484", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "484" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "477", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_81", - "top" : "transpose_81", - "type" : "transpose", - "name" : "transpose_81" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "471", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_82", - "top" : "transpose_82", - "type" : "transpose", - "name" : "transpose_82" - }, - { - "bottom" : "transpose_82,transpose_81", - "weights" : { - - }, - "debug_info" : "input.139", - "top" : "input.139", - "type" : "batch_matmul", - "name" : "input.139", - "channel_mode" : false - }, - { - "bottom" : "input.139", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.141", - "top" : "input.141", - "type" : "softmax_nd", - "name" : "input.141" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "484", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_83", - "top" : "transpose_83", - "type" : "transpose", - "name" : "transpose_83" - }, - { - "bottom" : "input.141,transpose_83", - "weights" : { - - }, - "debug_info" : "attn_output.25", - "top" : "attn_output.25", - "type" : "batch_matmul", - "name" : "attn_output.25", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.25", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_80", - "top" : "transpose_80", - "type" : "transpose", - "name" : "transpose_80" - }, - { - "name" : "input.143", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_80", - "debug_info" : "input.143", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.143" - }, - { - "nB" : 768, - "top" : "input.145", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 221, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.143", - "blob_biases" : 219, - "has_tanh" : 0, - "debug_info" : "input.145", - "name" : "input.145", - "has_prelu" : 0 - }, - { - "bottom" : "input.137,input.145", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.147", - "top" : "input.147", - "type" : "elementwise", - "name" : "input.147", - "beta" : 0 - }, - { - "name" : "input.149_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.147", - "debug_info" : "input.149_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.149_reshape" - }, - { - "bottom" : "input.149_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.149_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.149_mvn", - "type" : "l2_normalize", - "name" : "input.149_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.149_scale", - "constant_blob" : 223, - "top" : "input.149_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.149_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.149_scale", - "constant_blob" : 225, - "top" : "input.149_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.149_scale_constant_in_bias" - }, - { - "bottom" : "input.149_mvn,input.149_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.149_scale", - "top" : "input.149_scale_mul_out", - "type" : "elementwise", - "name" : "input.149_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.149_scale_mul_out,input.149_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.149_scale", - "top" : "input.149_scale", - "type" : "elementwise", - "name" : "input.149_scale", - "beta" : 0 - }, - { - "name" : "input.149", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.149_scale", - "debug_info" : "input.149", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.149" - }, - { - "nB" : 768, - "top" : "input.151", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 229, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.149", - "blob_biases" : 227, - "has_tanh" : 0, - "debug_info" : "input.151", - "name" : "input.151", - "has_prelu" : 0 - }, - { - "bottom" : "input.151", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.153", - "top" : "input.153", - "type" : "activation", - "name" : "input.153" - }, - { - "nB" : 3072, - "top" : "input.157", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 233, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.153", - "blob_biases" : 231, - "has_tanh" : 0, - "debug_info" : "input.157", - "name" : "input.157", - "has_prelu" : 0 - }, - { - "bottom" : "input.147,input.157", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.159", - "top" : "input.159", - "type" : "elementwise", - "name" : "input.159", - "beta" : 0 - }, - { - "name" : "hidden_states.43_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.159", - "debug_info" : "hidden_states.43_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.43_reshape" - }, - { - "bottom" : "hidden_states.43_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.43_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.43_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.43_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.43_scale", - "constant_blob" : 235, - "top" : "hidden_states.43_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.43_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.43_scale", - "constant_blob" : 237, - "top" : "hidden_states.43_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.43_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.43_mvn,hidden_states.43_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.43_scale", - "top" : "hidden_states.43_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.43_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.43_scale_mul_out,hidden_states.43_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.43_scale", - "top" : "hidden_states.43_scale", - "type" : "elementwise", - "name" : "hidden_states.43_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.43", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.43_scale", - "debug_info" : "hidden_states.43", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.43" - }, - { - "nB" : 768, - "top" : "531", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 241, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.43", - "blob_biases" : 239, - "has_tanh" : 0, - "debug_info" : "531", - "name" : "531", - "has_prelu" : 0 - }, - { - "bottom" : "531", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.43", - "top" : "tensor.43", - "type" : "elementwise", - "name" : "tensor.43", - "beta" : 0 - }, - { - "name" : "535", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.43", - "debug_info" : "535", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "535" - }, - { - "nB" : 768, - "top" : "tensor.45", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 245, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.43", - "blob_biases" : 243, - "has_tanh" : 0, - "debug_info" : "tensor.45", - "name" : "tensor.45", - "has_prelu" : 0 - }, - { - "name" : "541", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.45", - "debug_info" : "541", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "541" - }, - { - "nB" : 768, - "top" : "tensor.47", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 249, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.43", - "blob_biases" : 247, - "has_tanh" : 0, - "debug_info" : "tensor.47", - "name" : "tensor.47", - "has_prelu" : 0 - }, - { - "name" : "548", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.47", - "debug_info" : "548", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "548" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "541", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_77", - "top" : "transpose_77", - "type" : "transpose", - "name" : "transpose_77" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "535", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_78", - "top" : "transpose_78", - "type" : "transpose", - "name" : "transpose_78" - }, - { - "bottom" : "transpose_78,transpose_77", - "weights" : { - - }, - "debug_info" : "input.161", - "top" : "input.161", - "type" : "batch_matmul", - "name" : "input.161", - "channel_mode" : false - }, - { - "bottom" : "input.161", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.163", - "top" : "input.163", - "type" : "softmax_nd", - "name" : "input.163" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "548", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_79", - "top" : "transpose_79", - "type" : "transpose", - "name" : "transpose_79" - }, - { - "bottom" : "input.163,transpose_79", - "weights" : { - - }, - "debug_info" : "attn_output.29", - "top" : "attn_output.29", - "type" : "batch_matmul", - "name" : "attn_output.29", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.29", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_76", - "top" : "transpose_76", - "type" : "transpose", - "name" : "transpose_76" - }, - { - "name" : "input.165", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_76", - "debug_info" : "input.165", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.165" - }, - { - "nB" : 768, - "top" : "input.167", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 253, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.165", - "blob_biases" : 251, - "has_tanh" : 0, - "debug_info" : "input.167", - "name" : "input.167", - "has_prelu" : 0 - }, - { - "bottom" : "input.159,input.167", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.169", - "top" : "input.169", - "type" : "elementwise", - "name" : "input.169", - "beta" : 0 - }, - { - "name" : "input.171_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.169", - "debug_info" : "input.171_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.171_reshape" - }, - { - "bottom" : "input.171_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.171_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.171_mvn", - "type" : "l2_normalize", - "name" : "input.171_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.171_scale", - "constant_blob" : 255, - "top" : "input.171_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.171_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.171_scale", - "constant_blob" : 257, - "top" : "input.171_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.171_scale_constant_in_bias" - }, - { - "bottom" : "input.171_mvn,input.171_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.171_scale", - "top" : "input.171_scale_mul_out", - "type" : "elementwise", - "name" : "input.171_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.171_scale_mul_out,input.171_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.171_scale", - "top" : "input.171_scale", - "type" : "elementwise", - "name" : "input.171_scale", - "beta" : 0 - }, - { - "name" : "input.171", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.171_scale", - "debug_info" : "input.171", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.171" - }, - { - "nB" : 768, - "top" : "input.173", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 261, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.171", - "blob_biases" : 259, - "has_tanh" : 0, - "debug_info" : "input.173", - "name" : "input.173", - "has_prelu" : 0 - }, - { - "bottom" : "input.173", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.175", - "top" : "input.175", - "type" : "activation", - "name" : "input.175" - }, - { - "nB" : 3072, - "top" : "input.179", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 265, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.175", - "blob_biases" : 263, - "has_tanh" : 0, - "debug_info" : "input.179", - "name" : "input.179", - "has_prelu" : 0 - }, - { - "bottom" : "input.169,input.179", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.181", - "top" : "input.181", - "type" : "elementwise", - "name" : "input.181", - "beta" : 0 - }, - { - "name" : "hidden_states.49_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.181", - "debug_info" : "hidden_states.49_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.49_reshape" - }, - { - "bottom" : "hidden_states.49_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.49_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.49_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.49_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.49_scale", - "constant_blob" : 267, - "top" : "hidden_states.49_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.49_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.49_scale", - "constant_blob" : 269, - "top" : "hidden_states.49_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.49_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.49_mvn,hidden_states.49_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.49_scale", - "top" : "hidden_states.49_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.49_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.49_scale_mul_out,hidden_states.49_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.49_scale", - "top" : "hidden_states.49_scale", - "type" : "elementwise", - "name" : "hidden_states.49_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.49", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.49_scale", - "debug_info" : "hidden_states.49", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.49" - }, - { - "nB" : 768, - "top" : "595", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 273, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.49", - "blob_biases" : 271, - "has_tanh" : 0, - "debug_info" : "595", - "name" : "595", - "has_prelu" : 0 - }, - { - "bottom" : "595", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.49", - "top" : "tensor.49", - "type" : "elementwise", - "name" : "tensor.49", - "beta" : 0 - }, - { - "name" : "599", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.49", - "debug_info" : "599", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "599" - }, - { - "nB" : 768, - "top" : "tensor.51", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 277, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.49", - "blob_biases" : 275, - "has_tanh" : 0, - "debug_info" : "tensor.51", - "name" : "tensor.51", - "has_prelu" : 0 - }, - { - "name" : "605", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.51", - "debug_info" : "605", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "605" - }, - { - "nB" : 768, - "top" : "tensor.53", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 281, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.49", - "blob_biases" : 279, - "has_tanh" : 0, - "debug_info" : "tensor.53", - "name" : "tensor.53", - "has_prelu" : 0 - }, - { - "name" : "612", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.53", - "debug_info" : "612", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "612" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "605", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_73", - "top" : "transpose_73", - "type" : "transpose", - "name" : "transpose_73" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "599", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_74", - "top" : "transpose_74", - "type" : "transpose", - "name" : "transpose_74" - }, - { - "bottom" : "transpose_74,transpose_73", - "weights" : { - - }, - "debug_info" : "input.183", - "top" : "input.183", - "type" : "batch_matmul", - "name" : "input.183", - "channel_mode" : false - }, - { - "bottom" : "input.183", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.185", - "top" : "input.185", - "type" : "softmax_nd", - "name" : "input.185" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "612", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_75", - "top" : "transpose_75", - "type" : "transpose", - "name" : "transpose_75" - }, - { - "bottom" : "input.185,transpose_75", - "weights" : { - - }, - "debug_info" : "attn_output.33", - "top" : "attn_output.33", - "type" : "batch_matmul", - "name" : "attn_output.33", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.33", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_72", - "top" : "transpose_72", - "type" : "transpose", - "name" : "transpose_72" - }, - { - "name" : "input.187", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_72", - "debug_info" : "input.187", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.187" - }, - { - "nB" : 768, - "top" : "input.189", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 285, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.187", - "blob_biases" : 283, - "has_tanh" : 0, - "debug_info" : "input.189", - "name" : "input.189", - "has_prelu" : 0 - }, - { - "bottom" : "input.181,input.189", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.191", - "top" : "input.191", - "type" : "elementwise", - "name" : "input.191", - "beta" : 0 - }, - { - "name" : "input.193_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.191", - "debug_info" : "input.193_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.193_reshape" - }, - { - "bottom" : "input.193_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.193_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.193_mvn", - "type" : "l2_normalize", - "name" : "input.193_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.193_scale", - "constant_blob" : 287, - "top" : "input.193_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.193_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.193_scale", - "constant_blob" : 289, - "top" : "input.193_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.193_scale_constant_in_bias" - }, - { - "bottom" : "input.193_mvn,input.193_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.193_scale", - "top" : "input.193_scale_mul_out", - "type" : "elementwise", - "name" : "input.193_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.193_scale_mul_out,input.193_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.193_scale", - "top" : "input.193_scale", - "type" : "elementwise", - "name" : "input.193_scale", - "beta" : 0 - }, - { - "name" : "input.193", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.193_scale", - "debug_info" : "input.193", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.193" - }, - { - "nB" : 768, - "top" : "input.195", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 293, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.193", - "blob_biases" : 291, - "has_tanh" : 0, - "debug_info" : "input.195", - "name" : "input.195", - "has_prelu" : 0 - }, - { - "bottom" : "input.195", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.197", - "top" : "input.197", - "type" : "activation", - "name" : "input.197" - }, - { - "nB" : 3072, - "top" : "input.201", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 297, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.197", - "blob_biases" : 295, - "has_tanh" : 0, - "debug_info" : "input.201", - "name" : "input.201", - "has_prelu" : 0 - }, - { - "bottom" : "input.191,input.201", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.203", - "top" : "input.203", - "type" : "elementwise", - "name" : "input.203", - "beta" : 0 - }, - { - "name" : "hidden_states.55_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.203", - "debug_info" : "hidden_states.55_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.55_reshape" - }, - { - "bottom" : "hidden_states.55_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.55_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.55_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.55_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.55_scale", - "constant_blob" : 299, - "top" : "hidden_states.55_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.55_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.55_scale", - "constant_blob" : 301, - "top" : "hidden_states.55_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.55_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.55_mvn,hidden_states.55_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.55_scale", - "top" : "hidden_states.55_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.55_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.55_scale_mul_out,hidden_states.55_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.55_scale", - "top" : "hidden_states.55_scale", - "type" : "elementwise", - "name" : "hidden_states.55_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.55", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.55_scale", - "debug_info" : "hidden_states.55", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.55" - }, - { - "nB" : 768, - "top" : "659", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 305, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.55", - "blob_biases" : 303, - "has_tanh" : 0, - "debug_info" : "659", - "name" : "659", - "has_prelu" : 0 - }, - { - "bottom" : "659", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.55", - "top" : "tensor.55", - "type" : "elementwise", - "name" : "tensor.55", - "beta" : 0 - }, - { - "name" : "663", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.55", - "debug_info" : "663", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "663" - }, - { - "nB" : 768, - "top" : "tensor.57", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 309, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.55", - "blob_biases" : 307, - "has_tanh" : 0, - "debug_info" : "tensor.57", - "name" : "tensor.57", - "has_prelu" : 0 - }, - { - "name" : "669", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.57", - "debug_info" : "669", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "669" - }, - { - "nB" : 768, - "top" : "tensor.59", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 313, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.55", - "blob_biases" : 311, - "has_tanh" : 0, - "debug_info" : "tensor.59", - "name" : "tensor.59", - "has_prelu" : 0 - }, - { - "name" : "676", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.59", - "debug_info" : "676", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "676" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "669", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_69", - "top" : "transpose_69", - "type" : "transpose", - "name" : "transpose_69" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "663", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_70", - "top" : "transpose_70", - "type" : "transpose", - "name" : "transpose_70" - }, - { - "bottom" : "transpose_70,transpose_69", - "weights" : { - - }, - "debug_info" : "input.205", - "top" : "input.205", - "type" : "batch_matmul", - "name" : "input.205", - "channel_mode" : false - }, - { - "bottom" : "input.205", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.207", - "top" : "input.207", - "type" : "softmax_nd", - "name" : "input.207" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "676", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_71", - "top" : "transpose_71", - "type" : "transpose", - "name" : "transpose_71" - }, - { - "bottom" : "input.207,transpose_71", - "weights" : { - - }, - "debug_info" : "attn_output.37", - "top" : "attn_output.37", - "type" : "batch_matmul", - "name" : "attn_output.37", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.37", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_68", - "top" : "transpose_68", - "type" : "transpose", - "name" : "transpose_68" - }, - { - "name" : "input.209", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_68", - "debug_info" : "input.209", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.209" - }, - { - "nB" : 768, - "top" : "input.211", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 317, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.209", - "blob_biases" : 315, - "has_tanh" : 0, - "debug_info" : "input.211", - "name" : "input.211", - "has_prelu" : 0 - }, - { - "bottom" : "input.203,input.211", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.213", - "top" : "input.213", - "type" : "elementwise", - "name" : "input.213", - "beta" : 0 - }, - { - "name" : "input.215_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.213", - "debug_info" : "input.215_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.215_reshape" - }, - { - "bottom" : "input.215_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.215_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.215_mvn", - "type" : "l2_normalize", - "name" : "input.215_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.215_scale", - "constant_blob" : 319, - "top" : "input.215_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.215_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.215_scale", - "constant_blob" : 321, - "top" : "input.215_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.215_scale_constant_in_bias" - }, - { - "bottom" : "input.215_mvn,input.215_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.215_scale", - "top" : "input.215_scale_mul_out", - "type" : "elementwise", - "name" : "input.215_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.215_scale_mul_out,input.215_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.215_scale", - "top" : "input.215_scale", - "type" : "elementwise", - "name" : "input.215_scale", - "beta" : 0 - }, - { - "name" : "input.215", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.215_scale", - "debug_info" : "input.215", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.215" - }, - { - "nB" : 768, - "top" : "input.217", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 325, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.215", - "blob_biases" : 323, - "has_tanh" : 0, - "debug_info" : "input.217", - "name" : "input.217", - "has_prelu" : 0 - }, - { - "bottom" : "input.217", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.219", - "top" : "input.219", - "type" : "activation", - "name" : "input.219" - }, - { - "nB" : 3072, - "top" : "input.223", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 329, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.219", - "blob_biases" : 327, - "has_tanh" : 0, - "debug_info" : "input.223", - "name" : "input.223", - "has_prelu" : 0 - }, - { - "bottom" : "input.213,input.223", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.225", - "top" : "input.225", - "type" : "elementwise", - "name" : "input.225", - "beta" : 0 - }, - { - "name" : "hidden_states.61_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.225", - "debug_info" : "hidden_states.61_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.61_reshape" - }, - { - "bottom" : "hidden_states.61_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.61_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.61_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.61_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.61_scale", - "constant_blob" : 331, - "top" : "hidden_states.61_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.61_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.61_scale", - "constant_blob" : 333, - "top" : "hidden_states.61_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.61_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.61_mvn,hidden_states.61_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.61_scale", - "top" : "hidden_states.61_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.61_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.61_scale_mul_out,hidden_states.61_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.61_scale", - "top" : "hidden_states.61_scale", - "type" : "elementwise", - "name" : "hidden_states.61_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.61", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.61_scale", - "debug_info" : "hidden_states.61", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.61" - }, - { - "nB" : 768, - "top" : "723", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 337, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.61", - "blob_biases" : 335, - "has_tanh" : 0, - "debug_info" : "723", - "name" : "723", - "has_prelu" : 0 - }, - { - "bottom" : "723", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.61", - "top" : "tensor.61", - "type" : "elementwise", - "name" : "tensor.61", - "beta" : 0 - }, - { - "name" : "727", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.61", - "debug_info" : "727", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "727" - }, - { - "nB" : 768, - "top" : "tensor.63", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 341, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.61", - "blob_biases" : 339, - "has_tanh" : 0, - "debug_info" : "tensor.63", - "name" : "tensor.63", - "has_prelu" : 0 - }, - { - "name" : "733", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.63", - "debug_info" : "733", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "733" - }, - { - "nB" : 768, - "top" : "tensor.65", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 345, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.61", - "blob_biases" : 343, - "has_tanh" : 0, - "debug_info" : "tensor.65", - "name" : "tensor.65", - "has_prelu" : 0 - }, - { - "name" : "740", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.65", - "debug_info" : "740", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "740" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "733", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_65", - "top" : "transpose_65", - "type" : "transpose", - "name" : "transpose_65" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "727", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_66", - "top" : "transpose_66", - "type" : "transpose", - "name" : "transpose_66" - }, - { - "bottom" : "transpose_66,transpose_65", - "weights" : { - - }, - "debug_info" : "input.227", - "top" : "input.227", - "type" : "batch_matmul", - "name" : "input.227", - "channel_mode" : false - }, - { - "bottom" : "input.227", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.229", - "top" : "input.229", - "type" : "softmax_nd", - "name" : "input.229" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "740", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_67", - "top" : "transpose_67", - "type" : "transpose", - "name" : "transpose_67" - }, - { - "bottom" : "input.229,transpose_67", - "weights" : { - - }, - "debug_info" : "attn_output.41", - "top" : "attn_output.41", - "type" : "batch_matmul", - "name" : "attn_output.41", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.41", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_64", - "top" : "transpose_64", - "type" : "transpose", - "name" : "transpose_64" - }, - { - "name" : "input.231", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_64", - "debug_info" : "input.231", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.231" - }, - { - "nB" : 768, - "top" : "input.233", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 349, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.231", - "blob_biases" : 347, - "has_tanh" : 0, - "debug_info" : "input.233", - "name" : "input.233", - "has_prelu" : 0 - }, - { - "bottom" : "input.225,input.233", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.235", - "top" : "input.235", - "type" : "elementwise", - "name" : "input.235", - "beta" : 0 - }, - { - "name" : "input.237_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.235", - "debug_info" : "input.237_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.237_reshape" - }, - { - "bottom" : "input.237_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.237_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.237_mvn", - "type" : "l2_normalize", - "name" : "input.237_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.237_scale", - "constant_blob" : 351, - "top" : "input.237_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.237_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.237_scale", - "constant_blob" : 353, - "top" : "input.237_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.237_scale_constant_in_bias" - }, - { - "bottom" : "input.237_mvn,input.237_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.237_scale", - "top" : "input.237_scale_mul_out", - "type" : "elementwise", - "name" : "input.237_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.237_scale_mul_out,input.237_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.237_scale", - "top" : "input.237_scale", - "type" : "elementwise", - "name" : "input.237_scale", - "beta" : 0 - }, - { - "name" : "input.237", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.237_scale", - "debug_info" : "input.237", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.237" - }, - { - "nB" : 768, - "top" : "input.239", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 357, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.237", - "blob_biases" : 355, - "has_tanh" : 0, - "debug_info" : "input.239", - "name" : "input.239", - "has_prelu" : 0 - }, - { - "bottom" : "input.239", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.241", - "top" : "input.241", - "type" : "activation", - "name" : "input.241" - }, - { - "nB" : 3072, - "top" : "input.245", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 361, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.241", - "blob_biases" : 359, - "has_tanh" : 0, - "debug_info" : "input.245", - "name" : "input.245", - "has_prelu" : 0 - }, - { - "bottom" : "input.235,input.245", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.247", - "top" : "input.247", - "type" : "elementwise", - "name" : "input.247", - "beta" : 0 - }, - { - "name" : "hidden_states.67_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.247", - "debug_info" : "hidden_states.67_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "hidden_states.67_reshape" - }, - { - "bottom" : "hidden_states.67_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "hidden_states.67_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "hidden_states.67_mvn", - "type" : "l2_normalize", - "name" : "hidden_states.67_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.67_scale", - "constant_blob" : 363, - "top" : "hidden_states.67_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.67_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "hidden_states.67_scale", - "constant_blob" : 365, - "top" : "hidden_states.67_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "hidden_states.67_scale_constant_in_bias" - }, - { - "bottom" : "hidden_states.67_mvn,hidden_states.67_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.67_scale", - "top" : "hidden_states.67_scale_mul_out", - "type" : "elementwise", - "name" : "hidden_states.67_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "hidden_states.67_scale_mul_out,hidden_states.67_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "hidden_states.67_scale", - "top" : "hidden_states.67_scale", - "type" : "elementwise", - "name" : "hidden_states.67_scale", - "beta" : 0 - }, - { - "name" : "hidden_states.67", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "hidden_states.67_scale", - "debug_info" : "hidden_states.67", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "hidden_states.67" - }, - { - "nB" : 768, - "top" : "787", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 369, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.67", - "blob_biases" : 367, - "has_tanh" : 0, - "debug_info" : "787", - "name" : "787", - "has_prelu" : 0 - }, - { - "bottom" : "787", - "alpha" : 0.125, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "tensor.67", - "top" : "tensor.67", - "type" : "elementwise", - "name" : "tensor.67", - "beta" : 0 - }, - { - "name" : "791", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.67", - "debug_info" : "791", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "791" - }, - { - "nB" : 768, - "top" : "tensor.69", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 373, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.67", - "blob_biases" : 371, - "has_tanh" : 0, - "debug_info" : "tensor.69", - "name" : "tensor.69", - "has_prelu" : 0 - }, - { - "name" : "797", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor.69", - "debug_info" : "797", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "797" - }, - { - "nB" : 768, - "top" : "tensor", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 377, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "hidden_states.67", - "blob_biases" : 375, - "has_tanh" : 0, - "debug_info" : "tensor", - "name" : "tensor", - "has_prelu" : 0 - }, - { - "name" : "804", - "weights" : { - - }, - "dst_w" : 64, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 12, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "tensor", - "debug_info" : "804", - "dst_seq" : 1, - "dst_k" : -1, - "top" : "804" - }, - { - "axis_h" : 0, - "axis_w" : 2, - "bottom" : "797", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_61", - "top" : "transpose_61", - "type" : "transpose", - "name" : "transpose_61" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "791", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_62", - "top" : "transpose_62", - "type" : "transpose", - "name" : "transpose_62" - }, - { - "bottom" : "transpose_62,transpose_61", - "weights" : { - - }, - "debug_info" : "input.249", - "top" : "input.249", - "type" : "batch_matmul", - "name" : "input.249", - "channel_mode" : false - }, - { - "bottom" : "input.249", - "weights" : { - - }, - "nd_axis" : -1, - "debug_info" : "input.251", - "top" : "input.251", - "type" : "softmax_nd", - "name" : "input.251" - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "804", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_63", - "top" : "transpose_63", - "type" : "transpose", - "name" : "transpose_63" - }, - { - "bottom" : "input.251,transpose_63", - "weights" : { - - }, - "debug_info" : "attn_output.45", - "top" : "attn_output.45", - "type" : "batch_matmul", - "name" : "attn_output.45", - "channel_mode" : false - }, - { - "axis_h" : 2, - "axis_w" : 0, - "bottom" : "attn_output.45", - "axis_k" : 1, - "axis_n" : 3, - "axis_seq" : 4, - "weights" : { - - }, - "debug_info" : "transpose_60", - "top" : "transpose_60", - "type" : "transpose", - "name" : "transpose_60" - }, - { - "name" : "input.253", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "transpose_60", - "debug_info" : "input.253", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.253" - }, - { - "nB" : 768, - "top" : "input.255", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 381, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.253", - "blob_biases" : 379, - "has_tanh" : 0, - "debug_info" : "input.255", - "name" : "input.255", - "has_prelu" : 0 - }, - { - "bottom" : "input.247,input.255", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.257", - "top" : "input.257", - "type" : "elementwise", - "name" : "input.257", - "beta" : 0 - }, - { - "name" : "input.259_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.257", - "debug_info" : "input.259_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "input.259_reshape" - }, - { - "bottom" : "input.259_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "input.259_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "input.259_mvn", - "type" : "l2_normalize", - "name" : "input.259_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.259_scale", - "constant_blob" : 383, - "top" : "input.259_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "input.259_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "input.259_scale", - "constant_blob" : 385, - "top" : "input.259_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "input.259_scale_constant_in_bias" - }, - { - "bottom" : "input.259_mvn,input.259_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.259_scale", - "top" : "input.259_scale_mul_out", - "type" : "elementwise", - "name" : "input.259_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "input.259_scale_mul_out,input.259_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input.259_scale", - "top" : "input.259_scale", - "type" : "elementwise", - "name" : "input.259_scale", - "beta" : 0 - }, - { - "name" : "input.259", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input.259_scale", - "debug_info" : "input.259", - "dst_seq" : 1, - "dst_k" : 1, - "top" : "input.259" - }, - { - "nB" : 768, - "top" : "input.261", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 3072, - "blob_weights" : 389, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.259", - "blob_biases" : 387, - "has_tanh" : 0, - "debug_info" : "input.261", - "name" : "input.261", - "has_prelu" : 0 - }, - { - "bottom" : "input.261", - "weights" : { - - }, - "mode" : 19, - "debug_info" : "input.263", - "top" : "input.263", - "type" : "activation", - "name" : "input.263" - }, - { - "nB" : 3072, - "top" : "input.267", - "has_biases" : 1, - "weights" : { - - }, - "nC" : 768, - "blob_weights" : 393, - "type" : "inner_product", - "has_relu" : 0, - "bottom" : "input.263", - "blob_biases" : 391, - "has_tanh" : 0, - "debug_info" : "input.267", - "name" : "input.267", - "has_prelu" : 0 - }, - { - "bottom" : "input.257,input.267", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "input", - "top" : "input", - "type" : "elementwise", - "name" : "input", - "beta" : 0 - }, - { - "name" : "837_reshape", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 4, - "type" : "reshape", - "dst_h" : 1, - "mode" : 0, - "dynamic_shape" : false, - "bottom" : "input", - "debug_info" : "837_reshape", - "dst_seq" : 1, - "dst_k" : 1500, - "top" : "837_reshape" - }, - { - "bottom" : "837_reshape", - "weights" : { - - }, - "eps" : 9.9999997473787516e-06, - "debug_info" : "837_mvn", - "axis" : 1, - "normalization_mode" : 1, - "top" : "837_mvn", - "type" : "l2_normalize", - "name" : "837_mvn" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "837_scale", - "constant_blob" : 395, - "top" : "837_scale_constant_in_scale", - "w" : 768, - "type" : "load_constant", - "name" : "837_scale_constant_in_scale" - }, - { - "bottom" : "", - "k" : 1, - "n" : 1, - "weights" : { - - }, - "h" : 1, - "debug_info" : "837_scale", - "constant_blob" : 397, - "top" : "837_scale_constant_in_bias", - "w" : 768, - "type" : "load_constant", - "name" : "837_scale_constant_in_bias" - }, - { - "bottom" : "837_mvn,837_scale_constant_in_scale", - "alpha" : 1, - "operation" : 1, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "837_scale", - "top" : "837_scale_mul_out", - "type" : "elementwise", - "name" : "837_scale_mul_out", - "beta" : 0 - }, - { - "bottom" : "837_scale_mul_out,837_scale_constant_in_bias", - "alpha" : 1, - "operation" : 0, - "weights" : { - - }, - "fused_relu" : 0, - "debug_info" : "837_scale", - "top" : "837_scale", - "type" : "elementwise", - "name" : "837_scale", - "beta" : 0 - }, - { - "name" : "837", - "weights" : { - - }, - "dst_w" : 768, - "version" : 1, - "dst_n" : 1, - "dst_nd_rank" : 3, - "type" : "reshape", - "dst_h" : 1500, - "mode" : 0, - "attributes" : { - "is_output" : 1 - }, - "dynamic_shape" : false, - "bottom" : "837_scale", - "debug_info" : "837", - "dst_k" : 1, - "dst_seq" : 1, - "top" : "last_hidden_state" - } - ] -} \ No newline at end of file