diff --git "a/WhisperEncoder.mlmodelc/model.espresso.net" "b/WhisperEncoder.mlmodelc/model.espresso.net" new file mode 100644--- /dev/null +++ "b/WhisperEncoder.mlmodelc/model.espresso.net" @@ -0,0 +1,6883 @@ +{ + "storage" : "model.espresso.weights", + "analyses" : { + + }, + "properties" : { + + }, + "format_version" : 200, + "metadata_in_weights" : [ + + ], + "layers" : [ + { + "size_of_axes" : 1, + "bottom" : "input_features", + "axes_0" : -2, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "expand_dims_0", + "top" : "expand_dims_0", + "type" : "expand_dims", + "name" : "expand_dims_0" + }, + { + "pad_r" : 1, + "fused_relu" : 0, + "fused_tanh" : 0, + "debug_info" : "conv_0", + "pad_fill_mode" : 0, + "pad_b" : 0, + "pad_l" : 1, + "top" : "conv_0", + "blob_weights" : 3, + "K" : 80, + "blob_biases" : 1, + "name" : "conv_0", + "has_batch_norm" : 0, + "type" : "convolution", + "n_groups" : 1, + "pad_t" : 0, + "has_biases" : 1, + "C" : 768, + "bottom" : "expand_dims_0", + "weights" : { + + }, + "Nx" : 3, + "pad_mode" : 0, + "pad_value" : 0, + "Ny" : 1, + "n_parallel" : 1 + }, + { + "size_of_axes" : 1, + "bottom" : "conv_0", + "axes_0" : -2, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "52", + "version" : 1, + "top" : "52", + "type" : "squeeze", + "name" : "52" + }, + { + "bottom" : "52", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.1", + "top" : "input.1", + "type" : "activation", + "name" : "input.1" + }, + { + "size_of_axes" : 1, + "bottom" : "input.1", + "axes_0" : -2, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "expand_dims_2", + "top" : "expand_dims_2", + "type" : "expand_dims", + "name" : "expand_dims_2" + }, + { + "pad_r" : 1, + "fused_relu" : 0, + "fused_tanh" : 0, + "debug_info" : "conv_1", + "pad_fill_mode" : 0, + "pad_b" : 0, + "pad_l" : 1, + "top" : "conv_1", + "blob_weights" : 7, + "K" : 768, + "blob_biases" : 5, + "stride_x" : 2, + "name" : "conv_1", + "has_batch_norm" : 0, + "type" : "convolution", + "n_groups" : 1, + "pad_t" : 0, + "has_biases" : 1, + "C" : 768, + "bottom" : "expand_dims_2", + "weights" : { + + }, + "Nx" : 3, + "pad_mode" : 0, + "pad_value" : 0, + "Ny" : 1, + "n_parallel" : 1 + }, + { + "size_of_axes" : 1, + "bottom" : "conv_1", + "axes_0" : -2, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "60", + "version" : 1, + "top" : "60", + "type" : "squeeze", + "name" : "60" + }, + { + "bottom" : "60", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "inputs_embeds.1", + "top" : "inputs_embeds.1", + "type" : "activation", + "name" : "inputs_embeds.1" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "inputs_embeds.1", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_108", + "top" : "transpose_108", + "type" : "transpose", + "name" : "transpose_108" + }, + { + "top" : "encoder.embed_positions.weight", + "w" : 768, + "h" : 1500, + "name" : "encoder.embed_positions.weight", + "nd_rank" : 2, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "encoder.embed_positions.weight", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 9 + }, + { + "bottom" : "encoder.embed_positions.weight,transpose_108", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.3", + "top" : "input.3", + "type" : "elementwise", + "name" : "input.3", + "beta" : 0 + }, + { + "name" : "hidden_states.1_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.3", + "debug_info" : "hidden_states.1_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.1_reshape" + }, + { + "bottom" : "hidden_states.1_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.1_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.1_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.1_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.1_scale", + "constant_blob" : 11, + "top" : "hidden_states.1_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.1_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.1_scale", + "constant_blob" : 13, + "top" : "hidden_states.1_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.1_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.1_mvn,hidden_states.1_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.1_scale", + "top" : "hidden_states.1_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.1_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.1_scale_mul_out,hidden_states.1_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.1_scale", + "top" : "hidden_states.1_scale", + "type" : "elementwise", + "name" : "hidden_states.1_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.1", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.1_scale", + "debug_info" : "hidden_states.1", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.1" + }, + { + "nB" : 768, + "top" : "83", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 17, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.1", + "blob_biases" : 15, + "has_tanh" : 0, + "debug_info" : "83", + "name" : "83", + "has_prelu" : 0 + }, + { + "bottom" : "83", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.1", + "top" : "tensor.1", + "type" : "elementwise", + "name" : "tensor.1", + "beta" : 0 + }, + { + "name" : "87", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.1", + "debug_info" : "87", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "87" + }, + { + "nB" : 768, + "top" : "tensor.3", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 21, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.1", + "blob_biases" : 19, + "has_tanh" : 0, + "debug_info" : "tensor.3", + "name" : "tensor.3", + "has_prelu" : 0 + }, + { + "name" : "93", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.3", + "debug_info" : "93", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "93" + }, + { + "nB" : 768, + "top" : "tensor.5", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 25, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.1", + "blob_biases" : 23, + "has_tanh" : 0, + "debug_info" : "tensor.5", + "name" : "tensor.5", + "has_prelu" : 0 + }, + { + "name" : "100", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.5", + "debug_info" : "100", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "100" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "93", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_105", + "top" : "transpose_105", + "type" : "transpose", + "name" : "transpose_105" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "87", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_106", + "top" : "transpose_106", + "type" : "transpose", + "name" : "transpose_106" + }, + { + "bottom" : "transpose_106,transpose_105", + "weights" : { + + }, + "debug_info" : "input.7", + "top" : "input.7", + "type" : "batch_matmul", + "name" : "input.7", + "channel_mode" : false + }, + { + "bottom" : "input.7", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.9", + "top" : "input.9", + "type" : "softmax_nd", + "name" : "input.9" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "100", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_107", + "top" : "transpose_107", + "type" : "transpose", + "name" : "transpose_107" + }, + { + "bottom" : "input.9,transpose_107", + "weights" : { + + }, + "debug_info" : "attn_output.1", + "top" : "attn_output.1", + "type" : "batch_matmul", + "name" : "attn_output.1", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.1", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_104", + "top" : "transpose_104", + "type" : "transpose", + "name" : "transpose_104" + }, + { + "name" : "input.11", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_104", + "debug_info" : "input.11", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.11" + }, + { + "nB" : 768, + "top" : "input.13", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 29, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.11", + "blob_biases" : 27, + "has_tanh" : 0, + "debug_info" : "input.13", + "name" : "input.13", + "has_prelu" : 0 + }, + { + "bottom" : "input.3,input.13", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.15", + "top" : "input.15", + "type" : "elementwise", + "name" : "input.15", + "beta" : 0 + }, + { + "name" : "input.17_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.15", + "debug_info" : "input.17_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.17_reshape" + }, + { + "bottom" : "input.17_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.17_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.17_mvn", + "type" : "l2_normalize", + "name" : "input.17_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.17_scale", + "constant_blob" : 31, + "top" : "input.17_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.17_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.17_scale", + "constant_blob" : 33, + "top" : "input.17_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.17_scale_constant_in_bias" + }, + { + "bottom" : "input.17_mvn,input.17_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.17_scale", + "top" : "input.17_scale_mul_out", + "type" : "elementwise", + "name" : "input.17_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.17_scale_mul_out,input.17_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.17_scale", + "top" : "input.17_scale", + "type" : "elementwise", + "name" : "input.17_scale", + "beta" : 0 + }, + { + "name" : "input.17", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.17_scale", + "debug_info" : "input.17", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.17" + }, + { + "nB" : 768, + "top" : "input.19", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 37, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.17", + "blob_biases" : 35, + "has_tanh" : 0, + "debug_info" : "input.19", + "name" : "input.19", + "has_prelu" : 0 + }, + { + "bottom" : "input.19", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.21", + "top" : "input.21", + "type" : "activation", + "name" : "input.21" + }, + { + "nB" : 3072, + "top" : "input.25", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 41, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.21", + "blob_biases" : 39, + "has_tanh" : 0, + "debug_info" : "input.25", + "name" : "input.25", + "has_prelu" : 0 + }, + { + "bottom" : "input.15,input.25", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.27", + "top" : "input.27", + "type" : "elementwise", + "name" : "input.27", + "beta" : 0 + }, + { + "name" : "hidden_states.7_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.27", + "debug_info" : "hidden_states.7_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.7_reshape" + }, + { + "bottom" : "hidden_states.7_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.7_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.7_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.7_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.7_scale", + "constant_blob" : 43, + "top" : "hidden_states.7_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.7_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.7_scale", + "constant_blob" : 45, + "top" : "hidden_states.7_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.7_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.7_mvn,hidden_states.7_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.7_scale", + "top" : "hidden_states.7_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.7_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.7_scale_mul_out,hidden_states.7_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.7_scale", + "top" : "hidden_states.7_scale", + "type" : "elementwise", + "name" : "hidden_states.7_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.7", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.7_scale", + "debug_info" : "hidden_states.7", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.7" + }, + { + "nB" : 768, + "top" : "147", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 49, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.7", + "blob_biases" : 47, + "has_tanh" : 0, + "debug_info" : "147", + "name" : "147", + "has_prelu" : 0 + }, + { + "bottom" : "147", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.7", + "top" : "tensor.7", + "type" : "elementwise", + "name" : "tensor.7", + "beta" : 0 + }, + { + "name" : "151", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.7", + "debug_info" : "151", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "151" + }, + { + "nB" : 768, + "top" : "tensor.9", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 53, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.7", + "blob_biases" : 51, + "has_tanh" : 0, + "debug_info" : "tensor.9", + "name" : "tensor.9", + "has_prelu" : 0 + }, + { + "name" : "157", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.9", + "debug_info" : "157", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "157" + }, + { + "nB" : 768, + "top" : "tensor.11", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 57, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.7", + "blob_biases" : 55, + "has_tanh" : 0, + "debug_info" : "tensor.11", + "name" : "tensor.11", + "has_prelu" : 0 + }, + { + "name" : "164", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.11", + "debug_info" : "164", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "164" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "157", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_101", + "top" : "transpose_101", + "type" : "transpose", + "name" : "transpose_101" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "151", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_102", + "top" : "transpose_102", + "type" : "transpose", + "name" : "transpose_102" + }, + { + "bottom" : "transpose_102,transpose_101", + "weights" : { + + }, + "debug_info" : "input.29", + "top" : "input.29", + "type" : "batch_matmul", + "name" : "input.29", + "channel_mode" : false + }, + { + "bottom" : "input.29", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.31", + "top" : "input.31", + "type" : "softmax_nd", + "name" : "input.31" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "164", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_103", + "top" : "transpose_103", + "type" : "transpose", + "name" : "transpose_103" + }, + { + "bottom" : "input.31,transpose_103", + "weights" : { + + }, + "debug_info" : "attn_output.5", + "top" : "attn_output.5", + "type" : "batch_matmul", + "name" : "attn_output.5", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.5", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_100", + "top" : "transpose_100", + "type" : "transpose", + "name" : "transpose_100" + }, + { + "name" : "input.33", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_100", + "debug_info" : "input.33", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.33" + }, + { + "nB" : 768, + "top" : "input.35", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 61, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.33", + "blob_biases" : 59, + "has_tanh" : 0, + "debug_info" : "input.35", + "name" : "input.35", + "has_prelu" : 0 + }, + { + "bottom" : "input.27,input.35", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.37", + "top" : "input.37", + "type" : "elementwise", + "name" : "input.37", + "beta" : 0 + }, + { + "name" : "input.39_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.37", + "debug_info" : "input.39_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.39_reshape" + }, + { + "bottom" : "input.39_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.39_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.39_mvn", + "type" : "l2_normalize", + "name" : "input.39_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.39_scale", + "constant_blob" : 63, + "top" : "input.39_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.39_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.39_scale", + "constant_blob" : 65, + "top" : "input.39_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.39_scale_constant_in_bias" + }, + { + "bottom" : "input.39_mvn,input.39_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.39_scale", + "top" : "input.39_scale_mul_out", + "type" : "elementwise", + "name" : "input.39_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.39_scale_mul_out,input.39_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.39_scale", + "top" : "input.39_scale", + "type" : "elementwise", + "name" : "input.39_scale", + "beta" : 0 + }, + { + "name" : "input.39", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.39_scale", + "debug_info" : "input.39", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.39" + }, + { + "nB" : 768, + "top" : "input.41", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 69, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.39", + "blob_biases" : 67, + "has_tanh" : 0, + "debug_info" : "input.41", + "name" : "input.41", + "has_prelu" : 0 + }, + { + "bottom" : "input.41", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.43", + "top" : "input.43", + "type" : "activation", + "name" : "input.43" + }, + { + "nB" : 3072, + "top" : "input.47", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 73, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.43", + "blob_biases" : 71, + "has_tanh" : 0, + "debug_info" : "input.47", + "name" : "input.47", + "has_prelu" : 0 + }, + { + "bottom" : "input.37,input.47", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.49", + "top" : "input.49", + "type" : "elementwise", + "name" : "input.49", + "beta" : 0 + }, + { + "name" : "hidden_states.13_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.49", + "debug_info" : "hidden_states.13_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.13_reshape" + }, + { + "bottom" : "hidden_states.13_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.13_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.13_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.13_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.13_scale", + "constant_blob" : 75, + "top" : "hidden_states.13_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.13_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.13_scale", + "constant_blob" : 77, + "top" : "hidden_states.13_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.13_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.13_mvn,hidden_states.13_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.13_scale", + "top" : "hidden_states.13_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.13_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.13_scale_mul_out,hidden_states.13_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.13_scale", + "top" : "hidden_states.13_scale", + "type" : "elementwise", + "name" : "hidden_states.13_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.13", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.13_scale", + "debug_info" : "hidden_states.13", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.13" + }, + { + "nB" : 768, + "top" : "211", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 81, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.13", + "blob_biases" : 79, + "has_tanh" : 0, + "debug_info" : "211", + "name" : "211", + "has_prelu" : 0 + }, + { + "bottom" : "211", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.13", + "top" : "tensor.13", + "type" : "elementwise", + "name" : "tensor.13", + "beta" : 0 + }, + { + "name" : "215", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.13", + "debug_info" : "215", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "215" + }, + { + "nB" : 768, + "top" : "tensor.15", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 85, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.13", + "blob_biases" : 83, + "has_tanh" : 0, + "debug_info" : "tensor.15", + "name" : "tensor.15", + "has_prelu" : 0 + }, + { + "name" : "221", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.15", + "debug_info" : "221", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "221" + }, + { + "nB" : 768, + "top" : "tensor.17", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 89, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.13", + "blob_biases" : 87, + "has_tanh" : 0, + "debug_info" : "tensor.17", + "name" : "tensor.17", + "has_prelu" : 0 + }, + { + "name" : "228", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.17", + "debug_info" : "228", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "228" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "221", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_97", + "top" : "transpose_97", + "type" : "transpose", + "name" : "transpose_97" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "215", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_98", + "top" : "transpose_98", + "type" : "transpose", + "name" : "transpose_98" + }, + { + "bottom" : "transpose_98,transpose_97", + "weights" : { + + }, + "debug_info" : "input.51", + "top" : "input.51", + "type" : "batch_matmul", + "name" : "input.51", + "channel_mode" : false + }, + { + "bottom" : "input.51", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.53", + "top" : "input.53", + "type" : "softmax_nd", + "name" : "input.53" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "228", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_99", + "top" : "transpose_99", + "type" : "transpose", + "name" : "transpose_99" + }, + { + "bottom" : "input.53,transpose_99", + "weights" : { + + }, + "debug_info" : "attn_output.9", + "top" : "attn_output.9", + "type" : "batch_matmul", + "name" : "attn_output.9", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.9", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_96", + "top" : "transpose_96", + "type" : "transpose", + "name" : "transpose_96" + }, + { + "name" : "input.55", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_96", + "debug_info" : "input.55", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.55" + }, + { + "nB" : 768, + "top" : "input.57", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 93, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.55", + "blob_biases" : 91, + "has_tanh" : 0, + "debug_info" : "input.57", + "name" : "input.57", + "has_prelu" : 0 + }, + { + "bottom" : "input.49,input.57", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.59", + "top" : "input.59", + "type" : "elementwise", + "name" : "input.59", + "beta" : 0 + }, + { + "name" : "input.61_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.59", + "debug_info" : "input.61_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.61_reshape" + }, + { + "bottom" : "input.61_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.61_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.61_mvn", + "type" : "l2_normalize", + "name" : "input.61_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.61_scale", + "constant_blob" : 95, + "top" : "input.61_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.61_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.61_scale", + "constant_blob" : 97, + "top" : "input.61_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.61_scale_constant_in_bias" + }, + { + "bottom" : "input.61_mvn,input.61_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.61_scale", + "top" : "input.61_scale_mul_out", + "type" : "elementwise", + "name" : "input.61_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.61_scale_mul_out,input.61_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.61_scale", + "top" : "input.61_scale", + "type" : "elementwise", + "name" : "input.61_scale", + "beta" : 0 + }, + { + "name" : "input.61", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.61_scale", + "debug_info" : "input.61", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.61" + }, + { + "nB" : 768, + "top" : "input.63", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 101, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.61", + "blob_biases" : 99, + "has_tanh" : 0, + "debug_info" : "input.63", + "name" : "input.63", + "has_prelu" : 0 + }, + { + "bottom" : "input.63", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.65", + "top" : "input.65", + "type" : "activation", + "name" : "input.65" + }, + { + "nB" : 3072, + "top" : "input.69", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 105, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.65", + "blob_biases" : 103, + "has_tanh" : 0, + "debug_info" : "input.69", + "name" : "input.69", + "has_prelu" : 0 + }, + { + "bottom" : "input.59,input.69", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.71", + "top" : "input.71", + "type" : "elementwise", + "name" : "input.71", + "beta" : 0 + }, + { + "name" : "hidden_states.19_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.71", + "debug_info" : "hidden_states.19_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.19_reshape" + }, + { + "bottom" : "hidden_states.19_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.19_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.19_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.19_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.19_scale", + "constant_blob" : 107, + "top" : "hidden_states.19_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.19_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.19_scale", + "constant_blob" : 109, + "top" : "hidden_states.19_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.19_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.19_mvn,hidden_states.19_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.19_scale", + "top" : "hidden_states.19_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.19_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.19_scale_mul_out,hidden_states.19_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.19_scale", + "top" : "hidden_states.19_scale", + "type" : "elementwise", + "name" : "hidden_states.19_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.19", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.19_scale", + "debug_info" : "hidden_states.19", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.19" + }, + { + "nB" : 768, + "top" : "275", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 113, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.19", + "blob_biases" : 111, + "has_tanh" : 0, + "debug_info" : "275", + "name" : "275", + "has_prelu" : 0 + }, + { + "bottom" : "275", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.19", + "top" : "tensor.19", + "type" : "elementwise", + "name" : "tensor.19", + "beta" : 0 + }, + { + "name" : "279", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.19", + "debug_info" : "279", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "279" + }, + { + "nB" : 768, + "top" : "tensor.21", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 117, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.19", + "blob_biases" : 115, + "has_tanh" : 0, + "debug_info" : "tensor.21", + "name" : "tensor.21", + "has_prelu" : 0 + }, + { + "name" : "285", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.21", + "debug_info" : "285", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "285" + }, + { + "nB" : 768, + "top" : "tensor.23", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 121, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.19", + "blob_biases" : 119, + "has_tanh" : 0, + "debug_info" : "tensor.23", + "name" : "tensor.23", + "has_prelu" : 0 + }, + { + "name" : "292", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.23", + "debug_info" : "292", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "292" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "285", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_93", + "top" : "transpose_93", + "type" : "transpose", + "name" : "transpose_93" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "279", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_94", + "top" : "transpose_94", + "type" : "transpose", + "name" : "transpose_94" + }, + { + "bottom" : "transpose_94,transpose_93", + "weights" : { + + }, + "debug_info" : "input.73", + "top" : "input.73", + "type" : "batch_matmul", + "name" : "input.73", + "channel_mode" : false + }, + { + "bottom" : "input.73", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.75", + "top" : "input.75", + "type" : "softmax_nd", + "name" : "input.75" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "292", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_95", + "top" : "transpose_95", + "type" : "transpose", + "name" : "transpose_95" + }, + { + "bottom" : "input.75,transpose_95", + "weights" : { + + }, + "debug_info" : "attn_output.13", + "top" : "attn_output.13", + "type" : "batch_matmul", + "name" : "attn_output.13", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.13", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_92", + "top" : "transpose_92", + "type" : "transpose", + "name" : "transpose_92" + }, + { + "name" : "input.77", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_92", + "debug_info" : "input.77", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.77" + }, + { + "nB" : 768, + "top" : "input.79", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 125, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.77", + "blob_biases" : 123, + "has_tanh" : 0, + "debug_info" : "input.79", + "name" : "input.79", + "has_prelu" : 0 + }, + { + "bottom" : "input.71,input.79", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.81", + "top" : "input.81", + "type" : "elementwise", + "name" : "input.81", + "beta" : 0 + }, + { + "name" : "input.83_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.81", + "debug_info" : "input.83_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.83_reshape" + }, + { + "bottom" : "input.83_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.83_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.83_mvn", + "type" : "l2_normalize", + "name" : "input.83_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.83_scale", + "constant_blob" : 127, + "top" : "input.83_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.83_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.83_scale", + "constant_blob" : 129, + "top" : "input.83_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.83_scale_constant_in_bias" + }, + { + "bottom" : "input.83_mvn,input.83_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.83_scale", + "top" : "input.83_scale_mul_out", + "type" : "elementwise", + "name" : "input.83_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.83_scale_mul_out,input.83_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.83_scale", + "top" : "input.83_scale", + "type" : "elementwise", + "name" : "input.83_scale", + "beta" : 0 + }, + { + "name" : "input.83", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.83_scale", + "debug_info" : "input.83", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.83" + }, + { + "nB" : 768, + "top" : "input.85", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 133, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.83", + "blob_biases" : 131, + "has_tanh" : 0, + "debug_info" : "input.85", + "name" : "input.85", + "has_prelu" : 0 + }, + { + "bottom" : "input.85", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.87", + "top" : "input.87", + "type" : "activation", + "name" : "input.87" + }, + { + "nB" : 3072, + "top" : "input.91", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 137, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.87", + "blob_biases" : 135, + "has_tanh" : 0, + "debug_info" : "input.91", + "name" : "input.91", + "has_prelu" : 0 + }, + { + "bottom" : "input.81,input.91", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.93", + "top" : "input.93", + "type" : "elementwise", + "name" : "input.93", + "beta" : 0 + }, + { + "name" : "hidden_states.25_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.93", + "debug_info" : "hidden_states.25_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.25_reshape" + }, + { + "bottom" : "hidden_states.25_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.25_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.25_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.25_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.25_scale", + "constant_blob" : 139, + "top" : "hidden_states.25_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.25_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.25_scale", + "constant_blob" : 141, + "top" : "hidden_states.25_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.25_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.25_mvn,hidden_states.25_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.25_scale", + "top" : "hidden_states.25_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.25_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.25_scale_mul_out,hidden_states.25_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.25_scale", + "top" : "hidden_states.25_scale", + "type" : "elementwise", + "name" : "hidden_states.25_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.25", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.25_scale", + "debug_info" : "hidden_states.25", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.25" + }, + { + "nB" : 768, + "top" : "339", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 145, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.25", + "blob_biases" : 143, + "has_tanh" : 0, + "debug_info" : "339", + "name" : "339", + "has_prelu" : 0 + }, + { + "bottom" : "339", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.25", + "top" : "tensor.25", + "type" : "elementwise", + "name" : "tensor.25", + "beta" : 0 + }, + { + "name" : "343", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.25", + "debug_info" : "343", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "343" + }, + { + "nB" : 768, + "top" : "tensor.27", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 149, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.25", + "blob_biases" : 147, + "has_tanh" : 0, + "debug_info" : "tensor.27", + "name" : "tensor.27", + "has_prelu" : 0 + }, + { + "name" : "349", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.27", + "debug_info" : "349", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "349" + }, + { + "nB" : 768, + "top" : "tensor.29", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 153, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.25", + "blob_biases" : 151, + "has_tanh" : 0, + "debug_info" : "tensor.29", + "name" : "tensor.29", + "has_prelu" : 0 + }, + { + "name" : "356", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.29", + "debug_info" : "356", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "356" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "349", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_89", + "top" : "transpose_89", + "type" : "transpose", + "name" : "transpose_89" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "343", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_90", + "top" : "transpose_90", + "type" : "transpose", + "name" : "transpose_90" + }, + { + "bottom" : "transpose_90,transpose_89", + "weights" : { + + }, + "debug_info" : "input.95", + "top" : "input.95", + "type" : "batch_matmul", + "name" : "input.95", + "channel_mode" : false + }, + { + "bottom" : "input.95", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.97", + "top" : "input.97", + "type" : "softmax_nd", + "name" : "input.97" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "356", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_91", + "top" : "transpose_91", + "type" : "transpose", + "name" : "transpose_91" + }, + { + "bottom" : "input.97,transpose_91", + "weights" : { + + }, + "debug_info" : "attn_output.17", + "top" : "attn_output.17", + "type" : "batch_matmul", + "name" : "attn_output.17", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.17", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_88", + "top" : "transpose_88", + "type" : "transpose", + "name" : "transpose_88" + }, + { + "name" : "input.99", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_88", + "debug_info" : "input.99", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.99" + }, + { + "nB" : 768, + "top" : "input.101", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 157, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.99", + "blob_biases" : 155, + "has_tanh" : 0, + "debug_info" : "input.101", + "name" : "input.101", + "has_prelu" : 0 + }, + { + "bottom" : "input.93,input.101", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.103", + "top" : "input.103", + "type" : "elementwise", + "name" : "input.103", + "beta" : 0 + }, + { + "name" : "input.105_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.103", + "debug_info" : "input.105_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.105_reshape" + }, + { + "bottom" : "input.105_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.105_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.105_mvn", + "type" : "l2_normalize", + "name" : "input.105_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.105_scale", + "constant_blob" : 159, + "top" : "input.105_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.105_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.105_scale", + "constant_blob" : 161, + "top" : "input.105_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.105_scale_constant_in_bias" + }, + { + "bottom" : "input.105_mvn,input.105_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.105_scale", + "top" : "input.105_scale_mul_out", + "type" : "elementwise", + "name" : "input.105_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.105_scale_mul_out,input.105_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.105_scale", + "top" : "input.105_scale", + "type" : "elementwise", + "name" : "input.105_scale", + "beta" : 0 + }, + { + "name" : "input.105", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.105_scale", + "debug_info" : "input.105", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.105" + }, + { + "nB" : 768, + "top" : "input.107", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 165, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.105", + "blob_biases" : 163, + "has_tanh" : 0, + "debug_info" : "input.107", + "name" : "input.107", + "has_prelu" : 0 + }, + { + "bottom" : "input.107", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.109", + "top" : "input.109", + "type" : "activation", + "name" : "input.109" + }, + { + "nB" : 3072, + "top" : "input.113", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 169, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.109", + "blob_biases" : 167, + "has_tanh" : 0, + "debug_info" : "input.113", + "name" : "input.113", + "has_prelu" : 0 + }, + { + "bottom" : "input.103,input.113", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.115", + "top" : "input.115", + "type" : "elementwise", + "name" : "input.115", + "beta" : 0 + }, + { + "name" : "hidden_states.31_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.115", + "debug_info" : "hidden_states.31_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.31_reshape" + }, + { + "bottom" : "hidden_states.31_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.31_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.31_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.31_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.31_scale", + "constant_blob" : 171, + "top" : "hidden_states.31_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.31_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.31_scale", + "constant_blob" : 173, + "top" : "hidden_states.31_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.31_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.31_mvn,hidden_states.31_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.31_scale", + "top" : "hidden_states.31_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.31_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.31_scale_mul_out,hidden_states.31_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.31_scale", + "top" : "hidden_states.31_scale", + "type" : "elementwise", + "name" : "hidden_states.31_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.31", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.31_scale", + "debug_info" : "hidden_states.31", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.31" + }, + { + "nB" : 768, + "top" : "403", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 177, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.31", + "blob_biases" : 175, + "has_tanh" : 0, + "debug_info" : "403", + "name" : "403", + "has_prelu" : 0 + }, + { + "bottom" : "403", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.31", + "top" : "tensor.31", + "type" : "elementwise", + "name" : "tensor.31", + "beta" : 0 + }, + { + "name" : "407", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.31", + "debug_info" : "407", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "407" + }, + { + "nB" : 768, + "top" : "tensor.33", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 181, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.31", + "blob_biases" : 179, + "has_tanh" : 0, + "debug_info" : "tensor.33", + "name" : "tensor.33", + "has_prelu" : 0 + }, + { + "name" : "413", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.33", + "debug_info" : "413", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "413" + }, + { + "nB" : 768, + "top" : "tensor.35", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 185, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.31", + "blob_biases" : 183, + "has_tanh" : 0, + "debug_info" : "tensor.35", + "name" : "tensor.35", + "has_prelu" : 0 + }, + { + "name" : "420", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.35", + "debug_info" : "420", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "420" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "413", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_85", + "top" : "transpose_85", + "type" : "transpose", + "name" : "transpose_85" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "407", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_86", + "top" : "transpose_86", + "type" : "transpose", + "name" : "transpose_86" + }, + { + "bottom" : "transpose_86,transpose_85", + "weights" : { + + }, + "debug_info" : "input.117", + "top" : "input.117", + "type" : "batch_matmul", + "name" : "input.117", + "channel_mode" : false + }, + { + "bottom" : "input.117", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.119", + "top" : "input.119", + "type" : "softmax_nd", + "name" : "input.119" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "420", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_87", + "top" : "transpose_87", + "type" : "transpose", + "name" : "transpose_87" + }, + { + "bottom" : "input.119,transpose_87", + "weights" : { + + }, + "debug_info" : "attn_output.21", + "top" : "attn_output.21", + "type" : "batch_matmul", + "name" : "attn_output.21", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.21", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_84", + "top" : "transpose_84", + "type" : "transpose", + "name" : "transpose_84" + }, + { + "name" : "input.121", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_84", + "debug_info" : "input.121", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.121" + }, + { + "nB" : 768, + "top" : "input.123", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 189, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.121", + "blob_biases" : 187, + "has_tanh" : 0, + "debug_info" : "input.123", + "name" : "input.123", + "has_prelu" : 0 + }, + { + "bottom" : "input.115,input.123", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.125", + "top" : "input.125", + "type" : "elementwise", + "name" : "input.125", + "beta" : 0 + }, + { + "name" : "input.127_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.125", + "debug_info" : "input.127_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.127_reshape" + }, + { + "bottom" : "input.127_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.127_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.127_mvn", + "type" : "l2_normalize", + "name" : "input.127_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.127_scale", + "constant_blob" : 191, + "top" : "input.127_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.127_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.127_scale", + "constant_blob" : 193, + "top" : "input.127_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.127_scale_constant_in_bias" + }, + { + "bottom" : "input.127_mvn,input.127_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.127_scale", + "top" : "input.127_scale_mul_out", + "type" : "elementwise", + "name" : "input.127_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.127_scale_mul_out,input.127_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.127_scale", + "top" : "input.127_scale", + "type" : "elementwise", + "name" : "input.127_scale", + "beta" : 0 + }, + { + "name" : "input.127", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.127_scale", + "debug_info" : "input.127", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.127" + }, + { + "nB" : 768, + "top" : "input.129", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 197, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.127", + "blob_biases" : 195, + "has_tanh" : 0, + "debug_info" : "input.129", + "name" : "input.129", + "has_prelu" : 0 + }, + { + "bottom" : "input.129", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.131", + "top" : "input.131", + "type" : "activation", + "name" : "input.131" + }, + { + "nB" : 3072, + "top" : "input.135", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 201, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.131", + "blob_biases" : 199, + "has_tanh" : 0, + "debug_info" : "input.135", + "name" : "input.135", + "has_prelu" : 0 + }, + { + "bottom" : "input.125,input.135", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.137", + "top" : "input.137", + "type" : "elementwise", + "name" : "input.137", + "beta" : 0 + }, + { + "name" : "hidden_states.37_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.137", + "debug_info" : "hidden_states.37_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.37_reshape" + }, + { + "bottom" : "hidden_states.37_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.37_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.37_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.37_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.37_scale", + "constant_blob" : 203, + "top" : "hidden_states.37_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.37_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.37_scale", + "constant_blob" : 205, + "top" : "hidden_states.37_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.37_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.37_mvn,hidden_states.37_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.37_scale", + "top" : "hidden_states.37_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.37_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.37_scale_mul_out,hidden_states.37_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.37_scale", + "top" : "hidden_states.37_scale", + "type" : "elementwise", + "name" : "hidden_states.37_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.37", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.37_scale", + "debug_info" : "hidden_states.37", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.37" + }, + { + "nB" : 768, + "top" : "467", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 209, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.37", + "blob_biases" : 207, + "has_tanh" : 0, + "debug_info" : "467", + "name" : "467", + "has_prelu" : 0 + }, + { + "bottom" : "467", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.37", + "top" : "tensor.37", + "type" : "elementwise", + "name" : "tensor.37", + "beta" : 0 + }, + { + "name" : "471", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.37", + "debug_info" : "471", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "471" + }, + { + "nB" : 768, + "top" : "tensor.39", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 213, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.37", + "blob_biases" : 211, + "has_tanh" : 0, + "debug_info" : "tensor.39", + "name" : "tensor.39", + "has_prelu" : 0 + }, + { + "name" : "477", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.39", + "debug_info" : "477", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "477" + }, + { + "nB" : 768, + "top" : "tensor.41", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 217, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.37", + "blob_biases" : 215, + "has_tanh" : 0, + "debug_info" : "tensor.41", + "name" : "tensor.41", + "has_prelu" : 0 + }, + { + "name" : "484", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.41", + "debug_info" : "484", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "484" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "477", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_81", + "top" : "transpose_81", + "type" : "transpose", + "name" : "transpose_81" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "471", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_82", + "top" : "transpose_82", + "type" : "transpose", + "name" : "transpose_82" + }, + { + "bottom" : "transpose_82,transpose_81", + "weights" : { + + }, + "debug_info" : "input.139", + "top" : "input.139", + "type" : "batch_matmul", + "name" : "input.139", + "channel_mode" : false + }, + { + "bottom" : "input.139", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.141", + "top" : "input.141", + "type" : "softmax_nd", + "name" : "input.141" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "484", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_83", + "top" : "transpose_83", + "type" : "transpose", + "name" : "transpose_83" + }, + { + "bottom" : "input.141,transpose_83", + "weights" : { + + }, + "debug_info" : "attn_output.25", + "top" : "attn_output.25", + "type" : "batch_matmul", + "name" : "attn_output.25", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.25", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_80", + "top" : "transpose_80", + "type" : "transpose", + "name" : "transpose_80" + }, + { + "name" : "input.143", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_80", + "debug_info" : "input.143", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.143" + }, + { + "nB" : 768, + "top" : "input.145", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 221, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.143", + "blob_biases" : 219, + "has_tanh" : 0, + "debug_info" : "input.145", + "name" : "input.145", + "has_prelu" : 0 + }, + { + "bottom" : "input.137,input.145", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.147", + "top" : "input.147", + "type" : "elementwise", + "name" : "input.147", + "beta" : 0 + }, + { + "name" : "input.149_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.147", + "debug_info" : "input.149_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.149_reshape" + }, + { + "bottom" : "input.149_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.149_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.149_mvn", + "type" : "l2_normalize", + "name" : "input.149_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.149_scale", + "constant_blob" : 223, + "top" : "input.149_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.149_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.149_scale", + "constant_blob" : 225, + "top" : "input.149_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.149_scale_constant_in_bias" + }, + { + "bottom" : "input.149_mvn,input.149_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.149_scale", + "top" : "input.149_scale_mul_out", + "type" : "elementwise", + "name" : "input.149_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.149_scale_mul_out,input.149_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.149_scale", + "top" : "input.149_scale", + "type" : "elementwise", + "name" : "input.149_scale", + "beta" : 0 + }, + { + "name" : "input.149", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.149_scale", + "debug_info" : "input.149", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.149" + }, + { + "nB" : 768, + "top" : "input.151", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 229, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.149", + "blob_biases" : 227, + "has_tanh" : 0, + "debug_info" : "input.151", + "name" : "input.151", + "has_prelu" : 0 + }, + { + "bottom" : "input.151", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.153", + "top" : "input.153", + "type" : "activation", + "name" : "input.153" + }, + { + "nB" : 3072, + "top" : "input.157", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 233, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.153", + "blob_biases" : 231, + "has_tanh" : 0, + "debug_info" : "input.157", + "name" : "input.157", + "has_prelu" : 0 + }, + { + "bottom" : "input.147,input.157", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.159", + "top" : "input.159", + "type" : "elementwise", + "name" : "input.159", + "beta" : 0 + }, + { + "name" : "hidden_states.43_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.159", + "debug_info" : "hidden_states.43_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.43_reshape" + }, + { + "bottom" : "hidden_states.43_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.43_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.43_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.43_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.43_scale", + "constant_blob" : 235, + "top" : "hidden_states.43_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.43_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.43_scale", + "constant_blob" : 237, + "top" : "hidden_states.43_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.43_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.43_mvn,hidden_states.43_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.43_scale", + "top" : "hidden_states.43_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.43_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.43_scale_mul_out,hidden_states.43_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.43_scale", + "top" : "hidden_states.43_scale", + "type" : "elementwise", + "name" : "hidden_states.43_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.43", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.43_scale", + "debug_info" : "hidden_states.43", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.43" + }, + { + "nB" : 768, + "top" : "531", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 241, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.43", + "blob_biases" : 239, + "has_tanh" : 0, + "debug_info" : "531", + "name" : "531", + "has_prelu" : 0 + }, + { + "bottom" : "531", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.43", + "top" : "tensor.43", + "type" : "elementwise", + "name" : "tensor.43", + "beta" : 0 + }, + { + "name" : "535", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.43", + "debug_info" : "535", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "535" + }, + { + "nB" : 768, + "top" : "tensor.45", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 245, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.43", + "blob_biases" : 243, + "has_tanh" : 0, + "debug_info" : "tensor.45", + "name" : "tensor.45", + "has_prelu" : 0 + }, + { + "name" : "541", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.45", + "debug_info" : "541", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "541" + }, + { + "nB" : 768, + "top" : "tensor.47", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 249, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.43", + "blob_biases" : 247, + "has_tanh" : 0, + "debug_info" : "tensor.47", + "name" : "tensor.47", + "has_prelu" : 0 + }, + { + "name" : "548", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.47", + "debug_info" : "548", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "548" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "541", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_77", + "top" : "transpose_77", + "type" : "transpose", + "name" : "transpose_77" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "535", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_78", + "top" : "transpose_78", + "type" : "transpose", + "name" : "transpose_78" + }, + { + "bottom" : "transpose_78,transpose_77", + "weights" : { + + }, + "debug_info" : "input.161", + "top" : "input.161", + "type" : "batch_matmul", + "name" : "input.161", + "channel_mode" : false + }, + { + "bottom" : "input.161", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.163", + "top" : "input.163", + "type" : "softmax_nd", + "name" : "input.163" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "548", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_79", + "top" : "transpose_79", + "type" : "transpose", + "name" : "transpose_79" + }, + { + "bottom" : "input.163,transpose_79", + "weights" : { + + }, + "debug_info" : "attn_output.29", + "top" : "attn_output.29", + "type" : "batch_matmul", + "name" : "attn_output.29", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.29", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_76", + "top" : "transpose_76", + "type" : "transpose", + "name" : "transpose_76" + }, + { + "name" : "input.165", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_76", + "debug_info" : "input.165", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.165" + }, + { + "nB" : 768, + "top" : "input.167", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 253, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.165", + "blob_biases" : 251, + "has_tanh" : 0, + "debug_info" : "input.167", + "name" : "input.167", + "has_prelu" : 0 + }, + { + "bottom" : "input.159,input.167", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.169", + "top" : "input.169", + "type" : "elementwise", + "name" : "input.169", + "beta" : 0 + }, + { + "name" : "input.171_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.169", + "debug_info" : "input.171_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.171_reshape" + }, + { + "bottom" : "input.171_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.171_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.171_mvn", + "type" : "l2_normalize", + "name" : "input.171_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.171_scale", + "constant_blob" : 255, + "top" : "input.171_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.171_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.171_scale", + "constant_blob" : 257, + "top" : "input.171_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.171_scale_constant_in_bias" + }, + { + "bottom" : "input.171_mvn,input.171_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.171_scale", + "top" : "input.171_scale_mul_out", + "type" : "elementwise", + "name" : "input.171_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.171_scale_mul_out,input.171_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.171_scale", + "top" : "input.171_scale", + "type" : "elementwise", + "name" : "input.171_scale", + "beta" : 0 + }, + { + "name" : "input.171", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.171_scale", + "debug_info" : "input.171", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.171" + }, + { + "nB" : 768, + "top" : "input.173", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 261, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.171", + "blob_biases" : 259, + "has_tanh" : 0, + "debug_info" : "input.173", + "name" : "input.173", + "has_prelu" : 0 + }, + { + "bottom" : "input.173", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.175", + "top" : "input.175", + "type" : "activation", + "name" : "input.175" + }, + { + "nB" : 3072, + "top" : "input.179", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 265, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.175", + "blob_biases" : 263, + "has_tanh" : 0, + "debug_info" : "input.179", + "name" : "input.179", + "has_prelu" : 0 + }, + { + "bottom" : "input.169,input.179", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.181", + "top" : "input.181", + "type" : "elementwise", + "name" : "input.181", + "beta" : 0 + }, + { + "name" : "hidden_states.49_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.181", + "debug_info" : "hidden_states.49_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.49_reshape" + }, + { + "bottom" : "hidden_states.49_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.49_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.49_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.49_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.49_scale", + "constant_blob" : 267, + "top" : "hidden_states.49_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.49_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.49_scale", + "constant_blob" : 269, + "top" : "hidden_states.49_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.49_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.49_mvn,hidden_states.49_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.49_scale", + "top" : "hidden_states.49_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.49_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.49_scale_mul_out,hidden_states.49_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.49_scale", + "top" : "hidden_states.49_scale", + "type" : "elementwise", + "name" : "hidden_states.49_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.49", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.49_scale", + "debug_info" : "hidden_states.49", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.49" + }, + { + "nB" : 768, + "top" : "595", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 273, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.49", + "blob_biases" : 271, + "has_tanh" : 0, + "debug_info" : "595", + "name" : "595", + "has_prelu" : 0 + }, + { + "bottom" : "595", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.49", + "top" : "tensor.49", + "type" : "elementwise", + "name" : "tensor.49", + "beta" : 0 + }, + { + "name" : "599", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.49", + "debug_info" : "599", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "599" + }, + { + "nB" : 768, + "top" : "tensor.51", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 277, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.49", + "blob_biases" : 275, + "has_tanh" : 0, + "debug_info" : "tensor.51", + "name" : "tensor.51", + "has_prelu" : 0 + }, + { + "name" : "605", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.51", + "debug_info" : "605", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "605" + }, + { + "nB" : 768, + "top" : "tensor.53", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 281, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.49", + "blob_biases" : 279, + "has_tanh" : 0, + "debug_info" : "tensor.53", + "name" : "tensor.53", + "has_prelu" : 0 + }, + { + "name" : "612", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.53", + "debug_info" : "612", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "612" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "605", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_73", + "top" : "transpose_73", + "type" : "transpose", + "name" : "transpose_73" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "599", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_74", + "top" : "transpose_74", + "type" : "transpose", + "name" : "transpose_74" + }, + { + "bottom" : "transpose_74,transpose_73", + "weights" : { + + }, + "debug_info" : "input.183", + "top" : "input.183", + "type" : "batch_matmul", + "name" : "input.183", + "channel_mode" : false + }, + { + "bottom" : "input.183", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.185", + "top" : "input.185", + "type" : "softmax_nd", + "name" : "input.185" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "612", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_75", + "top" : "transpose_75", + "type" : "transpose", + "name" : "transpose_75" + }, + { + "bottom" : "input.185,transpose_75", + "weights" : { + + }, + "debug_info" : "attn_output.33", + "top" : "attn_output.33", + "type" : "batch_matmul", + "name" : "attn_output.33", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.33", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_72", + "top" : "transpose_72", + "type" : "transpose", + "name" : "transpose_72" + }, + { + "name" : "input.187", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_72", + "debug_info" : "input.187", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.187" + }, + { + "nB" : 768, + "top" : "input.189", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 285, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.187", + "blob_biases" : 283, + "has_tanh" : 0, + "debug_info" : "input.189", + "name" : "input.189", + "has_prelu" : 0 + }, + { + "bottom" : "input.181,input.189", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.191", + "top" : "input.191", + "type" : "elementwise", + "name" : "input.191", + "beta" : 0 + }, + { + "name" : "input.193_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.191", + "debug_info" : "input.193_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.193_reshape" + }, + { + "bottom" : "input.193_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.193_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.193_mvn", + "type" : "l2_normalize", + "name" : "input.193_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.193_scale", + "constant_blob" : 287, + "top" : "input.193_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.193_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.193_scale", + "constant_blob" : 289, + "top" : "input.193_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.193_scale_constant_in_bias" + }, + { + "bottom" : "input.193_mvn,input.193_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.193_scale", + "top" : "input.193_scale_mul_out", + "type" : "elementwise", + "name" : "input.193_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.193_scale_mul_out,input.193_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.193_scale", + "top" : "input.193_scale", + "type" : "elementwise", + "name" : "input.193_scale", + "beta" : 0 + }, + { + "name" : "input.193", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.193_scale", + "debug_info" : "input.193", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.193" + }, + { + "nB" : 768, + "top" : "input.195", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 293, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.193", + "blob_biases" : 291, + "has_tanh" : 0, + "debug_info" : "input.195", + "name" : "input.195", + "has_prelu" : 0 + }, + { + "bottom" : "input.195", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.197", + "top" : "input.197", + "type" : "activation", + "name" : "input.197" + }, + { + "nB" : 3072, + "top" : "input.201", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 297, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.197", + "blob_biases" : 295, + "has_tanh" : 0, + "debug_info" : "input.201", + "name" : "input.201", + "has_prelu" : 0 + }, + { + "bottom" : "input.191,input.201", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.203", + "top" : "input.203", + "type" : "elementwise", + "name" : "input.203", + "beta" : 0 + }, + { + "name" : "hidden_states.55_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.203", + "debug_info" : "hidden_states.55_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.55_reshape" + }, + { + "bottom" : "hidden_states.55_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.55_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.55_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.55_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.55_scale", + "constant_blob" : 299, + "top" : "hidden_states.55_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.55_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.55_scale", + "constant_blob" : 301, + "top" : "hidden_states.55_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.55_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.55_mvn,hidden_states.55_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.55_scale", + "top" : "hidden_states.55_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.55_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.55_scale_mul_out,hidden_states.55_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.55_scale", + "top" : "hidden_states.55_scale", + "type" : "elementwise", + "name" : "hidden_states.55_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.55", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.55_scale", + "debug_info" : "hidden_states.55", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.55" + }, + { + "nB" : 768, + "top" : "659", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 305, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.55", + "blob_biases" : 303, + "has_tanh" : 0, + "debug_info" : "659", + "name" : "659", + "has_prelu" : 0 + }, + { + "bottom" : "659", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.55", + "top" : "tensor.55", + "type" : "elementwise", + "name" : "tensor.55", + "beta" : 0 + }, + { + "name" : "663", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.55", + "debug_info" : "663", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "663" + }, + { + "nB" : 768, + "top" : "tensor.57", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 309, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.55", + "blob_biases" : 307, + "has_tanh" : 0, + "debug_info" : "tensor.57", + "name" : "tensor.57", + "has_prelu" : 0 + }, + { + "name" : "669", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.57", + "debug_info" : "669", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "669" + }, + { + "nB" : 768, + "top" : "tensor.59", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 313, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.55", + "blob_biases" : 311, + "has_tanh" : 0, + "debug_info" : "tensor.59", + "name" : "tensor.59", + "has_prelu" : 0 + }, + { + "name" : "676", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.59", + "debug_info" : "676", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "676" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "669", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_69", + "top" : "transpose_69", + "type" : "transpose", + "name" : "transpose_69" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "663", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_70", + "top" : "transpose_70", + "type" : "transpose", + "name" : "transpose_70" + }, + { + "bottom" : "transpose_70,transpose_69", + "weights" : { + + }, + "debug_info" : "input.205", + "top" : "input.205", + "type" : "batch_matmul", + "name" : "input.205", + "channel_mode" : false + }, + { + "bottom" : "input.205", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.207", + "top" : "input.207", + "type" : "softmax_nd", + "name" : "input.207" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "676", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_71", + "top" : "transpose_71", + "type" : "transpose", + "name" : "transpose_71" + }, + { + "bottom" : "input.207,transpose_71", + "weights" : { + + }, + "debug_info" : "attn_output.37", + "top" : "attn_output.37", + "type" : "batch_matmul", + "name" : "attn_output.37", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.37", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_68", + "top" : "transpose_68", + "type" : "transpose", + "name" : "transpose_68" + }, + { + "name" : "input.209", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_68", + "debug_info" : "input.209", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.209" + }, + { + "nB" : 768, + "top" : "input.211", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 317, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.209", + "blob_biases" : 315, + "has_tanh" : 0, + "debug_info" : "input.211", + "name" : "input.211", + "has_prelu" : 0 + }, + { + "bottom" : "input.203,input.211", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.213", + "top" : "input.213", + "type" : "elementwise", + "name" : "input.213", + "beta" : 0 + }, + { + "name" : "input.215_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.213", + "debug_info" : "input.215_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.215_reshape" + }, + { + "bottom" : "input.215_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.215_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.215_mvn", + "type" : "l2_normalize", + "name" : "input.215_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.215_scale", + "constant_blob" : 319, + "top" : "input.215_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.215_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.215_scale", + "constant_blob" : 321, + "top" : "input.215_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.215_scale_constant_in_bias" + }, + { + "bottom" : "input.215_mvn,input.215_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.215_scale", + "top" : "input.215_scale_mul_out", + "type" : "elementwise", + "name" : "input.215_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.215_scale_mul_out,input.215_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.215_scale", + "top" : "input.215_scale", + "type" : "elementwise", + "name" : "input.215_scale", + "beta" : 0 + }, + { + "name" : "input.215", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.215_scale", + "debug_info" : "input.215", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.215" + }, + { + "nB" : 768, + "top" : "input.217", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 325, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.215", + "blob_biases" : 323, + "has_tanh" : 0, + "debug_info" : "input.217", + "name" : "input.217", + "has_prelu" : 0 + }, + { + "bottom" : "input.217", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.219", + "top" : "input.219", + "type" : "activation", + "name" : "input.219" + }, + { + "nB" : 3072, + "top" : "input.223", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 329, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.219", + "blob_biases" : 327, + "has_tanh" : 0, + "debug_info" : "input.223", + "name" : "input.223", + "has_prelu" : 0 + }, + { + "bottom" : "input.213,input.223", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.225", + "top" : "input.225", + "type" : "elementwise", + "name" : "input.225", + "beta" : 0 + }, + { + "name" : "hidden_states.61_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.225", + "debug_info" : "hidden_states.61_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.61_reshape" + }, + { + "bottom" : "hidden_states.61_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.61_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.61_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.61_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.61_scale", + "constant_blob" : 331, + "top" : "hidden_states.61_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.61_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.61_scale", + "constant_blob" : 333, + "top" : "hidden_states.61_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.61_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.61_mvn,hidden_states.61_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.61_scale", + "top" : "hidden_states.61_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.61_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.61_scale_mul_out,hidden_states.61_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.61_scale", + "top" : "hidden_states.61_scale", + "type" : "elementwise", + "name" : "hidden_states.61_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.61", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.61_scale", + "debug_info" : "hidden_states.61", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.61" + }, + { + "nB" : 768, + "top" : "723", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 337, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.61", + "blob_biases" : 335, + "has_tanh" : 0, + "debug_info" : "723", + "name" : "723", + "has_prelu" : 0 + }, + { + "bottom" : "723", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.61", + "top" : "tensor.61", + "type" : "elementwise", + "name" : "tensor.61", + "beta" : 0 + }, + { + "name" : "727", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.61", + "debug_info" : "727", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "727" + }, + { + "nB" : 768, + "top" : "tensor.63", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 341, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.61", + "blob_biases" : 339, + "has_tanh" : 0, + "debug_info" : "tensor.63", + "name" : "tensor.63", + "has_prelu" : 0 + }, + { + "name" : "733", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.63", + "debug_info" : "733", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "733" + }, + { + "nB" : 768, + "top" : "tensor.65", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 345, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.61", + "blob_biases" : 343, + "has_tanh" : 0, + "debug_info" : "tensor.65", + "name" : "tensor.65", + "has_prelu" : 0 + }, + { + "name" : "740", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.65", + "debug_info" : "740", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "740" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "733", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_65", + "top" : "transpose_65", + "type" : "transpose", + "name" : "transpose_65" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "727", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_66", + "top" : "transpose_66", + "type" : "transpose", + "name" : "transpose_66" + }, + { + "bottom" : "transpose_66,transpose_65", + "weights" : { + + }, + "debug_info" : "input.227", + "top" : "input.227", + "type" : "batch_matmul", + "name" : "input.227", + "channel_mode" : false + }, + { + "bottom" : "input.227", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.229", + "top" : "input.229", + "type" : "softmax_nd", + "name" : "input.229" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "740", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_67", + "top" : "transpose_67", + "type" : "transpose", + "name" : "transpose_67" + }, + { + "bottom" : "input.229,transpose_67", + "weights" : { + + }, + "debug_info" : "attn_output.41", + "top" : "attn_output.41", + "type" : "batch_matmul", + "name" : "attn_output.41", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.41", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_64", + "top" : "transpose_64", + "type" : "transpose", + "name" : "transpose_64" + }, + { + "name" : "input.231", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_64", + "debug_info" : "input.231", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.231" + }, + { + "nB" : 768, + "top" : "input.233", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 349, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.231", + "blob_biases" : 347, + "has_tanh" : 0, + "debug_info" : "input.233", + "name" : "input.233", + "has_prelu" : 0 + }, + { + "bottom" : "input.225,input.233", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.235", + "top" : "input.235", + "type" : "elementwise", + "name" : "input.235", + "beta" : 0 + }, + { + "name" : "input.237_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.235", + "debug_info" : "input.237_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.237_reshape" + }, + { + "bottom" : "input.237_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.237_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.237_mvn", + "type" : "l2_normalize", + "name" : "input.237_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.237_scale", + "constant_blob" : 351, + "top" : "input.237_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.237_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.237_scale", + "constant_blob" : 353, + "top" : "input.237_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.237_scale_constant_in_bias" + }, + { + "bottom" : "input.237_mvn,input.237_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.237_scale", + "top" : "input.237_scale_mul_out", + "type" : "elementwise", + "name" : "input.237_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.237_scale_mul_out,input.237_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.237_scale", + "top" : "input.237_scale", + "type" : "elementwise", + "name" : "input.237_scale", + "beta" : 0 + }, + { + "name" : "input.237", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.237_scale", + "debug_info" : "input.237", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.237" + }, + { + "nB" : 768, + "top" : "input.239", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 357, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.237", + "blob_biases" : 355, + "has_tanh" : 0, + "debug_info" : "input.239", + "name" : "input.239", + "has_prelu" : 0 + }, + { + "bottom" : "input.239", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.241", + "top" : "input.241", + "type" : "activation", + "name" : "input.241" + }, + { + "nB" : 3072, + "top" : "input.245", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 361, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.241", + "blob_biases" : 359, + "has_tanh" : 0, + "debug_info" : "input.245", + "name" : "input.245", + "has_prelu" : 0 + }, + { + "bottom" : "input.235,input.245", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.247", + "top" : "input.247", + "type" : "elementwise", + "name" : "input.247", + "beta" : 0 + }, + { + "name" : "hidden_states.67_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.247", + "debug_info" : "hidden_states.67_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "hidden_states.67_reshape" + }, + { + "bottom" : "hidden_states.67_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.67_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.67_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.67_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.67_scale", + "constant_blob" : 363, + "top" : "hidden_states.67_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.67_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.67_scale", + "constant_blob" : 365, + "top" : "hidden_states.67_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.67_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.67_mvn,hidden_states.67_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.67_scale", + "top" : "hidden_states.67_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.67_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.67_scale_mul_out,hidden_states.67_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.67_scale", + "top" : "hidden_states.67_scale", + "type" : "elementwise", + "name" : "hidden_states.67_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.67", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.67_scale", + "debug_info" : "hidden_states.67", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "hidden_states.67" + }, + { + "nB" : 768, + "top" : "787", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 369, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.67", + "blob_biases" : 367, + "has_tanh" : 0, + "debug_info" : "787", + "name" : "787", + "has_prelu" : 0 + }, + { + "bottom" : "787", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.67", + "top" : "tensor.67", + "type" : "elementwise", + "name" : "tensor.67", + "beta" : 0 + }, + { + "name" : "791", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.67", + "debug_info" : "791", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "791" + }, + { + "nB" : 768, + "top" : "tensor.69", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 373, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.67", + "blob_biases" : 371, + "has_tanh" : 0, + "debug_info" : "tensor.69", + "name" : "tensor.69", + "has_prelu" : 0 + }, + { + "name" : "797", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.69", + "debug_info" : "797", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "797" + }, + { + "nB" : 768, + "top" : "tensor", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 377, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.67", + "blob_biases" : 375, + "has_tanh" : 0, + "debug_info" : "tensor", + "name" : "tensor", + "has_prelu" : 0 + }, + { + "name" : "804", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor", + "debug_info" : "804", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "804" + }, + { + "axis_h" : 0, + "axis_w" : 2, + "bottom" : "797", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_61", + "top" : "transpose_61", + "type" : "transpose", + "name" : "transpose_61" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "791", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_62", + "top" : "transpose_62", + "type" : "transpose", + "name" : "transpose_62" + }, + { + "bottom" : "transpose_62,transpose_61", + "weights" : { + + }, + "debug_info" : "input.249", + "top" : "input.249", + "type" : "batch_matmul", + "name" : "input.249", + "channel_mode" : false + }, + { + "bottom" : "input.249", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.251", + "top" : "input.251", + "type" : "softmax_nd", + "name" : "input.251" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "804", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_63", + "top" : "transpose_63", + "type" : "transpose", + "name" : "transpose_63" + }, + { + "bottom" : "input.251,transpose_63", + "weights" : { + + }, + "debug_info" : "attn_output.45", + "top" : "attn_output.45", + "type" : "batch_matmul", + "name" : "attn_output.45", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.45", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_60", + "top" : "transpose_60", + "type" : "transpose", + "name" : "transpose_60" + }, + { + "name" : "input.253", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_60", + "debug_info" : "input.253", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.253" + }, + { + "nB" : 768, + "top" : "input.255", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 381, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.253", + "blob_biases" : 379, + "has_tanh" : 0, + "debug_info" : "input.255", + "name" : "input.255", + "has_prelu" : 0 + }, + { + "bottom" : "input.247,input.255", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.257", + "top" : "input.257", + "type" : "elementwise", + "name" : "input.257", + "beta" : 0 + }, + { + "name" : "input.259_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.257", + "debug_info" : "input.259_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "input.259_reshape" + }, + { + "bottom" : "input.259_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.259_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.259_mvn", + "type" : "l2_normalize", + "name" : "input.259_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.259_scale", + "constant_blob" : 383, + "top" : "input.259_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.259_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.259_scale", + "constant_blob" : 385, + "top" : "input.259_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.259_scale_constant_in_bias" + }, + { + "bottom" : "input.259_mvn,input.259_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.259_scale", + "top" : "input.259_scale_mul_out", + "type" : "elementwise", + "name" : "input.259_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.259_scale_mul_out,input.259_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.259_scale", + "top" : "input.259_scale", + "type" : "elementwise", + "name" : "input.259_scale", + "beta" : 0 + }, + { + "name" : "input.259", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.259_scale", + "debug_info" : "input.259", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "input.259" + }, + { + "nB" : 768, + "top" : "input.261", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 389, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.259", + "blob_biases" : 387, + "has_tanh" : 0, + "debug_info" : "input.261", + "name" : "input.261", + "has_prelu" : 0 + }, + { + "bottom" : "input.261", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.263", + "top" : "input.263", + "type" : "activation", + "name" : "input.263" + }, + { + "nB" : 3072, + "top" : "input.267", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 393, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.263", + "blob_biases" : 391, + "has_tanh" : 0, + "debug_info" : "input.267", + "name" : "input.267", + "has_prelu" : 0 + }, + { + "bottom" : "input.257,input.267", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input", + "top" : "input", + "type" : "elementwise", + "name" : "input", + "beta" : 0 + }, + { + "name" : "837_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input", + "debug_info" : "837_reshape", + "dst_seq" : 1, + "dst_k" : 1500, + "top" : "837_reshape" + }, + { + "bottom" : "837_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "837_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "837_mvn", + "type" : "l2_normalize", + "name" : "837_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "837_scale", + "constant_blob" : 395, + "top" : "837_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "837_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "837_scale", + "constant_blob" : 397, + "top" : "837_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "837_scale_constant_in_bias" + }, + { + "bottom" : "837_mvn,837_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "837_scale", + "top" : "837_scale_mul_out", + "type" : "elementwise", + "name" : "837_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "837_scale_mul_out,837_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "837_scale", + "top" : "837_scale", + "type" : "elementwise", + "name" : "837_scale", + "beta" : 0 + }, + { + "name" : "837", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 1500, + "mode" : 0, + "attributes" : { + "is_output" : 1 + }, + "dynamic_shape" : false, + "bottom" : "837_scale", + "debug_info" : "837", + "dst_k" : 1, + "dst_seq" : 1, + "top" : "last_hidden_state" + } + ] +} \ No newline at end of file