diff --git "a/WhisperDecoder.mlmodelc/model.espresso.net" "b/WhisperDecoder.mlmodelc/model.espresso.net" new file mode 100644--- /dev/null +++ "b/WhisperDecoder.mlmodelc/model.espresso.net" @@ -0,0 +1,21907 @@ +{ + "layers" : [ + { + "name" : "input.1", + "rank_preserving_mode" : true, + "dst_w" : 128, + "version" : 1, + "dst_n" : 0, + "dst_nd_rank" : 2, + "weights" : { + + }, + "type" : "reshape", + "dst_h" : -1, + "mode" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "decoder_input_ids", + "debug_info" : "input.1", + "dst_k" : 0, + "dst_seq" : 0, + "top" : "input.1" + }, + { + "top" : "decoder.embed_tokens.weight", + "w" : 768, + "h" : 51865, + "name" : "decoder.embed_tokens.weight", + "nd_rank" : 2, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "decoder.embed_tokens.weight", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 1 + }, + { + "bottom" : "decoder.embed_tokens.weight,input.1", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "inputs_embeds", + "axis" : 0, + "top" : "inputs_embeds", + "type" : "gather_nd", + "name" : "inputs_embeds", + "batch_dims" : 0 + }, + { + "top" : "positions", + "w" : 768, + "h" : 128, + "name" : "positions", + "nd_rank" : 3, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "positions", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 3 + }, + { + "bottom" : "inputs_embeds,positions", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.3", + "top" : "input.3", + "type" : "elementwise", + "name" : "input.3", + "beta" : 0 + }, + { + "bottom" : "inputs_embeds", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "78_shape", + "top" : "78_shape", + "type" : "get_shape", + "name" : "78_shape" + }, + { + "top" : "gather_1_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_1_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_1_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 5 + }, + { + "bottom" : "78_shape,gather_1_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_1", + "axis" : 0, + "top" : "gather_1", + "type" : "gather_nd", + "name" : "gather_1", + "batch_dims" : 0 + }, + { + "top" : "causal_mask.1", + "w" : 129, + "h" : 128, + "name" : "causal_mask.1", + "nd_rank" : 2, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.1", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 7 + }, + { + "bottom" : "causal_mask.1", + "weights" : { + + }, + "num_upper" : 0, + "num_lower" : -1, + "nd_mode" : 1, + "debug_info" : "band_part_0", + "top" : "band_part_0", + "type" : "matrix_band_part", + "name" : "band_part_0" + }, + { + "alpha" : -1, + "bottom" : "band_part_0", + "weights" : { + + }, + "mode" : 6, + "debug_info" : "_neg_y_causal_mask.3", + "top" : "_neg_y_causal_mask.3", + "type" : "activation", + "name" : "_neg_y_causal_mask.3", + "beta" : 0 + }, + { + "bottom" : "_neg_y_causal_mask.3,causal_mask.1", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "causal_mask.3", + "top" : "causal_mask.3", + "type" : "elementwise", + "name" : "causal_mask.3", + "beta" : 0 + }, + { + "top" : "cache_position", + "w" : 128, + "h" : 1, + "name" : "cache_position", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "cache_position", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 9 + }, + { + "name" : "84", + "weights" : { + + }, + "dst_w" : 1, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 2, + "type" : "reshape", + "dst_h" : -1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "cache_position", + "debug_info" : "84", + "dst_seq" : 1, + "dst_k" : 1, + "top" : "84" + }, + { + "top" : "82", + "w" : 129, + "h" : 1, + "name" : "82", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "82", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 11 + }, + { + "bottom" : "82,84", + "alpha" : 1, + "operation" : 105, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "85", + "top" : "85", + "type" : "elementwise", + "name" : "85", + "beta" : 0 + }, + { + "bottom" : "85", + "weights" : { + + }, + "mode" : 6, + "debug_info" : "cast_100", + "top" : "cast_100", + "type" : "activation", + "name" : "cast_100", + "beta" : 0 + }, + { + "bottom" : "causal_mask.3,cast_100", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "causal_mask.5", + "nd_mode" : true, + "top" : "causal_mask.5", + "type" : "elementwise", + "name" : "causal_mask.5", + "beta" : 0 + }, + { + "size_of_axes" : 1, + "bottom" : "causal_mask.5", + "axes_0" : 0, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "87", + "top" : "87", + "type" : "expand_dims", + "name" : "87" + }, + { + "size_of_axes" : 1, + "bottom" : "87", + "axes_0" : 1, + "weights" : { + + }, + "nd_axis" : 0, + "debug_info" : "88", + "top" : "88", + "type" : "expand_dims", + "name" : "88" + }, + { + "top" : "25", + "w" : 1, + "h" : 1, + "name" : "25", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "25", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 13 + }, + { + "top" : "24", + "w" : 1, + "h" : 1, + "name" : "24", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "24", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 15 + }, + { + "bottom" : "gather_1,25,24,24", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_0", + "top" : "concat_0", + "type" : "general_concat", + "name" : "concat_0" + }, + { + "top" : "shape_0", + "w" : 4, + "h" : 1, + "name" : "shape_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "shape_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 17 + }, + { + "bottom" : "concat_0,shape_0", + "alpha" : 1, + "operation" : 102, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "real_div_0", + "nd_mode" : true, + "top" : "real_div_0", + "type" : "elementwise", + "name" : "real_div_0", + "beta" : 0 + }, + { + "name" : "attention_mask", + "bottom" : "88,real_div_0", + "dst_n" : 1, + "dst_seq" : 1, + "weights" : { + + }, + "dst_k" : 1, + "debug_info" : "attention_mask", + "top" : "attention_mask", + "dst_h" : 1, + "dst_w" : 1, + "type" : "tile" + }, + { + "name" : "hidden_states.1_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "dynamic_shape" : false, + "bottom" : "input.3", + "debug_info" : "hidden_states.1_reshape", + "dst_k" : 128, + "dst_seq" : 1, + "top" : "hidden_states.1_reshape" + }, + { + "bottom" : "hidden_states.1_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.1_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.1_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.1_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.1_scale", + "constant_blob" : 19, + "top" : "hidden_states.1_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.1_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.1_scale", + "constant_blob" : 21, + "top" : "hidden_states.1_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.1_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.1_mvn,hidden_states.1_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.1_scale", + "top" : "hidden_states.1_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.1_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.1_scale_mul_out,hidden_states.1_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.1_scale", + "top" : "hidden_states.1_scale", + "type" : "elementwise", + "name" : "hidden_states.1_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.1", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.1_scale", + "debug_info" : "hidden_states.1", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.1" + }, + { + "bottom" : "hidden_states.1", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "108_shape", + "top" : "108_shape", + "type" : "get_shape", + "name" : "108_shape" + }, + { + "top" : "gather_2_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_2_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_2_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 23 + }, + { + "bottom" : "108_shape,gather_2_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_2", + "axis" : 0, + "top" : "gather_2", + "type" : "gather_nd", + "name" : "gather_2", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "112", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 27, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.1", + "blob_biases" : 25, + "has_tanh" : 0, + "debug_info" : "112", + "name" : "112", + "has_prelu" : 0 + }, + { + "bottom" : "112", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.1", + "top" : "tensor.1", + "type" : "elementwise", + "name" : "tensor.1", + "beta" : 0 + }, + { + "name" : "116", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.1", + "debug_info" : "116", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "116" + }, + { + "nB" : 768, + "top" : "tensor.3", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 31, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.1", + "blob_biases" : 29, + "has_tanh" : 0, + "debug_info" : "tensor.3", + "name" : "tensor.3", + "has_prelu" : 0 + }, + { + "top" : "11", + "w" : 1, + "h" : 1, + "name" : "11", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "11", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 33 + }, + { + "top" : "10", + "w" : 1, + "h" : 1, + "name" : "10", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "10", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 35 + }, + { + "bottom" : "gather_2,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_2", + "top" : "concat_2", + "type" : "general_concat", + "name" : "concat_2" + }, + { + "name" : "122", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.3,concat_2", + "debug_info" : "122", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "122" + }, + { + "has_prelu" : 0, + "top" : "tensor.5", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 39, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.1", + "debug_info" : "tensor.5", + "has_tanh" : 0, + "blob_biases" : 37, + "name" : "tensor.5", + "nB" : 768 + }, + { + "name" : "129", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.5,concat_2", + "debug_info" : "129", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "129" + }, + { + "axis_seq" : 4, + "name" : "transpose_166", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "122", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_166", + "weights" : { + + }, + "top" : "transpose_166" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_166", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_164", + "top" : "transpose_164", + "type" : "transpose", + "name" : "transpose_164" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "116", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_167", + "top" : "transpose_167", + "type" : "transpose", + "name" : "transpose_167" + }, + { + "bottom" : "transpose_167,transpose_164", + "weights" : { + + }, + "debug_info" : "attn_weights.1", + "top" : "attn_weights.1", + "type" : "batch_matmul", + "name" : "attn_weights.1", + "channel_mode" : false + }, + { + "bottom" : "transpose_166", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "134_shape", + "top" : "134_shape", + "type" : "get_shape", + "name" : "134_shape" + }, + { + "top" : "gather_4_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_4_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_4_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 41 + }, + { + "bottom" : "134_shape,gather_4_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_4", + "axis" : 0, + "top" : "gather_4", + "type" : "gather_nd", + "name" : "gather_4", + "batch_dims" : 0 + }, + { + "top" : "concat_4_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_4_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_4_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 43 + }, + { + "top" : "concat_4_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_4_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_4_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 45 + }, + { + "top" : "concat_4_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_4_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_4_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 47 + }, + { + "bottom" : "concat_4_values0_0,concat_4_values1_0,concat_4_values2_0,gather_4", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_4", + "top" : "concat_4", + "type" : "general_concat", + "name" : "concat_4" + }, + { + "top" : "causal_mask.7_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.7_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.7_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 49 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.7", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.7", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.7", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.7_begin_0,concat_4", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.7", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.1,causal_mask.7", + "debug_info" : "input.7", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.7" + }, + { + "bottom" : "input.7", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.9", + "top" : "input.9", + "type" : "softmax_nd", + "name" : "input.9" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "129", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_165", + "top" : "transpose_165", + "type" : "transpose", + "name" : "transpose_165" + }, + { + "bottom" : "input.9,transpose_165", + "weights" : { + + }, + "debug_info" : "attn_output.1", + "top" : "attn_output.1", + "type" : "batch_matmul", + "name" : "attn_output.1", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.1", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_163", + "top" : "transpose_163", + "type" : "transpose", + "name" : "transpose_163" + }, + { + "name" : "input.11", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_163", + "debug_info" : "input.11", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.11" + }, + { + "nB" : 768, + "top" : "input.13", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 53, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.11", + "blob_biases" : 51, + "has_tanh" : 0, + "debug_info" : "input.13", + "name" : "input.13", + "has_prelu" : 0 + }, + { + "bottom" : "input.3,input.13", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.15", + "nd_mode" : true, + "top" : "input.15", + "type" : "elementwise", + "name" : "input.15", + "beta" : 0 + }, + { + "name" : "hidden_states.5_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.15", + "debug_info" : "hidden_states.5_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.5_reshape" + }, + { + "bottom" : "hidden_states.5_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.5_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.5_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.5_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.5_scale", + "constant_blob" : 55, + "top" : "hidden_states.5_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.5_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.5_scale", + "constant_blob" : 57, + "top" : "hidden_states.5_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.5_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.5_mvn,hidden_states.5_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.5_scale", + "top" : "hidden_states.5_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.5_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.5_scale_mul_out,hidden_states.5_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.5_scale", + "top" : "hidden_states.5_scale", + "type" : "elementwise", + "name" : "hidden_states.5_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.5", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.5_scale", + "debug_info" : "hidden_states.5", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.5" + }, + { + "bottom" : "hidden_states.5", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "159_shape", + "top" : "159_shape", + "type" : "get_shape", + "name" : "159_shape" + }, + { + "top" : "gather_5_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_5_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_5_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 59 + }, + { + "bottom" : "159_shape,gather_5_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_5", + "axis" : 0, + "top" : "gather_5", + "type" : "gather_nd", + "name" : "gather_5", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "163", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 63, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.5", + "blob_biases" : 61, + "has_tanh" : 0, + "debug_info" : "163", + "name" : "163", + "has_prelu" : 0 + }, + { + "bottom" : "163", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.7", + "top" : "tensor.7", + "type" : "elementwise", + "name" : "tensor.7", + "beta" : 0 + }, + { + "name" : "167", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.7", + "debug_info" : "167", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "167" + }, + { + "nB" : 768, + "top" : "tensor.9", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 67, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 65, + "has_tanh" : 0, + "debug_info" : "tensor.9", + "name" : "tensor.9", + "has_prelu" : 0 + }, + { + "bottom" : "gather_5,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_7", + "top" : "concat_7", + "type" : "general_concat", + "name" : "concat_7" + }, + { + "name" : "173", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.9,concat_7", + "debug_info" : "173", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "173" + }, + { + "has_prelu" : 0, + "top" : "tensor.11", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 71, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.11", + "has_tanh" : 0, + "blob_biases" : 69, + "name" : "tensor.11", + "nB" : 768 + }, + { + "name" : "180", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.11,concat_7", + "debug_info" : "180", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "180" + }, + { + "axis_seq" : 4, + "name" : "transpose_160", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "173", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_160", + "weights" : { + + }, + "top" : "transpose_160" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "167", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_161", + "top" : "transpose_161", + "type" : "transpose", + "name" : "transpose_161" + }, + { + "bottom" : "transpose_161,transpose_160", + "weights" : { + + }, + "debug_info" : "input.17", + "top" : "input.17", + "type" : "batch_matmul", + "name" : "input.17", + "channel_mode" : false + }, + { + "bottom" : "input.17", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.19", + "top" : "input.19", + "type" : "softmax_nd", + "name" : "input.19" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "180", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_162", + "top" : "transpose_162", + "type" : "transpose", + "name" : "transpose_162" + }, + { + "bottom" : "input.19,transpose_162", + "weights" : { + + }, + "debug_info" : "attn_output.5", + "top" : "attn_output.5", + "type" : "batch_matmul", + "name" : "attn_output.5", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.5", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_159", + "top" : "transpose_159", + "type" : "transpose", + "name" : "transpose_159" + }, + { + "name" : "input.21", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_159", + "debug_info" : "input.21", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.21" + }, + { + "nB" : 768, + "top" : "input.23", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 75, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.21", + "blob_biases" : 73, + "has_tanh" : 0, + "debug_info" : "input.23", + "name" : "input.23", + "has_prelu" : 0 + }, + { + "bottom" : "input.15,input.23", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.25", + "nd_mode" : true, + "top" : "input.25", + "type" : "elementwise", + "name" : "input.25", + "beta" : 0 + }, + { + "name" : "input.27_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.25", + "debug_info" : "input.27_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.27_reshape" + }, + { + "bottom" : "input.27_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.27_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.27_mvn", + "type" : "l2_normalize", + "name" : "input.27_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.27_scale", + "constant_blob" : 77, + "top" : "input.27_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.27_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.27_scale", + "constant_blob" : 79, + "top" : "input.27_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.27_scale_constant_in_bias" + }, + { + "bottom" : "input.27_mvn,input.27_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.27_scale", + "top" : "input.27_scale_mul_out", + "type" : "elementwise", + "name" : "input.27_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.27_scale_mul_out,input.27_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.27_scale", + "top" : "input.27_scale", + "type" : "elementwise", + "name" : "input.27_scale", + "beta" : 0 + }, + { + "name" : "input.27", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.27_scale", + "debug_info" : "input.27", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.27" + }, + { + "nB" : 768, + "top" : "input.29", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 83, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.27", + "blob_biases" : 81, + "has_tanh" : 0, + "debug_info" : "input.29", + "name" : "input.29", + "has_prelu" : 0 + }, + { + "bottom" : "input.29", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.31", + "top" : "input.31", + "type" : "activation", + "name" : "input.31" + }, + { + "nB" : 3072, + "top" : "input.35", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 87, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.31", + "blob_biases" : 85, + "has_tanh" : 0, + "debug_info" : "input.35", + "name" : "input.35", + "has_prelu" : 0 + }, + { + "bottom" : "input.25,input.35", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.37", + "top" : "input.37", + "type" : "elementwise", + "name" : "input.37", + "beta" : 0 + }, + { + "name" : "hidden_states.11_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.37", + "debug_info" : "hidden_states.11_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.11_reshape" + }, + { + "bottom" : "hidden_states.11_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.11_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.11_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.11_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.11_scale", + "constant_blob" : 89, + "top" : "hidden_states.11_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.11_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.11_scale", + "constant_blob" : 91, + "top" : "hidden_states.11_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.11_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.11_mvn,hidden_states.11_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.11_scale", + "top" : "hidden_states.11_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.11_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.11_scale_mul_out,hidden_states.11_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.11_scale", + "top" : "hidden_states.11_scale", + "type" : "elementwise", + "name" : "hidden_states.11_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.11", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.11_scale", + "debug_info" : "hidden_states.11", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.11" + }, + { + "bottom" : "hidden_states.11", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "225_shape", + "top" : "225_shape", + "type" : "get_shape", + "name" : "225_shape" + }, + { + "top" : "gather_7_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_7_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_7_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 93 + }, + { + "bottom" : "225_shape,gather_7_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_7", + "axis" : 0, + "top" : "gather_7", + "type" : "gather_nd", + "name" : "gather_7", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "229", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 97, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.11", + "blob_biases" : 95, + "has_tanh" : 0, + "debug_info" : "229", + "name" : "229", + "has_prelu" : 0 + }, + { + "bottom" : "229", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.13", + "top" : "tensor.13", + "type" : "elementwise", + "name" : "tensor.13", + "beta" : 0 + }, + { + "name" : "233", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.13", + "debug_info" : "233", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "233" + }, + { + "nB" : 768, + "top" : "tensor.15", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 101, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.11", + "blob_biases" : 99, + "has_tanh" : 0, + "debug_info" : "tensor.15", + "name" : "tensor.15", + "has_prelu" : 0 + }, + { + "bottom" : "gather_7,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_11", + "top" : "concat_11", + "type" : "general_concat", + "name" : "concat_11" + }, + { + "name" : "239", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.15,concat_11", + "debug_info" : "239", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "239" + }, + { + "has_prelu" : 0, + "top" : "tensor.17", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 105, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.11", + "debug_info" : "tensor.17", + "has_tanh" : 0, + "blob_biases" : 103, + "name" : "tensor.17", + "nB" : 768 + }, + { + "name" : "246", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.17,concat_11", + "debug_info" : "246", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "246" + }, + { + "axis_seq" : 4, + "name" : "transpose_157", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "239", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_157", + "weights" : { + + }, + "top" : "transpose_157" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_157", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_155", + "top" : "transpose_155", + "type" : "transpose", + "name" : "transpose_155" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "233", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_158", + "top" : "transpose_158", + "type" : "transpose", + "name" : "transpose_158" + }, + { + "bottom" : "transpose_158,transpose_155", + "weights" : { + + }, + "debug_info" : "attn_weights.7", + "top" : "attn_weights.7", + "type" : "batch_matmul", + "name" : "attn_weights.7", + "channel_mode" : false + }, + { + "bottom" : "transpose_157", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "251_shape", + "top" : "251_shape", + "type" : "get_shape", + "name" : "251_shape" + }, + { + "top" : "gather_9_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_9_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_9_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 107 + }, + { + "bottom" : "251_shape,gather_9_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_9", + "axis" : 0, + "top" : "gather_9", + "type" : "gather_nd", + "name" : "gather_9", + "batch_dims" : 0 + }, + { + "top" : "concat_13_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_13_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_13_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 109 + }, + { + "top" : "concat_13_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_13_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_13_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 111 + }, + { + "top" : "concat_13_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_13_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_13_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 113 + }, + { + "bottom" : "concat_13_values0_0,concat_13_values1_0,concat_13_values2_0,gather_9", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_13", + "top" : "concat_13", + "type" : "general_concat", + "name" : "concat_13" + }, + { + "top" : "causal_mask.9_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.9_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.9_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 115 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.9", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.9", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.9", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.9_begin_0,concat_13", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.39", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.7,causal_mask.9", + "debug_info" : "input.39", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.39" + }, + { + "bottom" : "input.39", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.41", + "top" : "input.41", + "type" : "softmax_nd", + "name" : "input.41" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "246", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_156", + "top" : "transpose_156", + "type" : "transpose", + "name" : "transpose_156" + }, + { + "bottom" : "input.41,transpose_156", + "weights" : { + + }, + "debug_info" : "attn_output.9", + "top" : "attn_output.9", + "type" : "batch_matmul", + "name" : "attn_output.9", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.9", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_154", + "top" : "transpose_154", + "type" : "transpose", + "name" : "transpose_154" + }, + { + "name" : "input.43", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_154", + "debug_info" : "input.43", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.43" + }, + { + "nB" : 768, + "top" : "input.45", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 119, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.43", + "blob_biases" : 117, + "has_tanh" : 0, + "debug_info" : "input.45", + "name" : "input.45", + "has_prelu" : 0 + }, + { + "bottom" : "input.37,input.45", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.47", + "nd_mode" : true, + "top" : "input.47", + "type" : "elementwise", + "name" : "input.47", + "beta" : 0 + }, + { + "name" : "hidden_states.15_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.47", + "debug_info" : "hidden_states.15_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.15_reshape" + }, + { + "bottom" : "hidden_states.15_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.15_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.15_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.15_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.15_scale", + "constant_blob" : 121, + "top" : "hidden_states.15_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.15_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.15_scale", + "constant_blob" : 123, + "top" : "hidden_states.15_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.15_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.15_mvn,hidden_states.15_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.15_scale", + "top" : "hidden_states.15_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.15_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.15_scale_mul_out,hidden_states.15_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.15_scale", + "top" : "hidden_states.15_scale", + "type" : "elementwise", + "name" : "hidden_states.15_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.15", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.15_scale", + "debug_info" : "hidden_states.15", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.15" + }, + { + "bottom" : "hidden_states.15", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "276_shape", + "top" : "276_shape", + "type" : "get_shape", + "name" : "276_shape" + }, + { + "top" : "gather_10_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_10_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_10_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 125 + }, + { + "bottom" : "276_shape,gather_10_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_10", + "axis" : 0, + "top" : "gather_10", + "type" : "gather_nd", + "name" : "gather_10", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "280", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 129, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.15", + "blob_biases" : 127, + "has_tanh" : 0, + "debug_info" : "280", + "name" : "280", + "has_prelu" : 0 + }, + { + "bottom" : "280", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.19", + "top" : "tensor.19", + "type" : "elementwise", + "name" : "tensor.19", + "beta" : 0 + }, + { + "name" : "284", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.19", + "debug_info" : "284", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "284" + }, + { + "nB" : 768, + "top" : "tensor.21", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 133, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 131, + "has_tanh" : 0, + "debug_info" : "tensor.21", + "name" : "tensor.21", + "has_prelu" : 0 + }, + { + "bottom" : "gather_10,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_16", + "top" : "concat_16", + "type" : "general_concat", + "name" : "concat_16" + }, + { + "name" : "290", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.21,concat_16", + "debug_info" : "290", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "290" + }, + { + "has_prelu" : 0, + "top" : "tensor.23", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 137, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.23", + "has_tanh" : 0, + "blob_biases" : 135, + "name" : "tensor.23", + "nB" : 768 + }, + { + "name" : "297", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.23,concat_16", + "debug_info" : "297", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "297" + }, + { + "axis_seq" : 4, + "name" : "transpose_151", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "290", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_151", + "weights" : { + + }, + "top" : "transpose_151" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "284", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_152", + "top" : "transpose_152", + "type" : "transpose", + "name" : "transpose_152" + }, + { + "bottom" : "transpose_152,transpose_151", + "weights" : { + + }, + "debug_info" : "input.49", + "top" : "input.49", + "type" : "batch_matmul", + "name" : "input.49", + "channel_mode" : false + }, + { + "bottom" : "input.49", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.51", + "top" : "input.51", + "type" : "softmax_nd", + "name" : "input.51" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "297", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_153", + "top" : "transpose_153", + "type" : "transpose", + "name" : "transpose_153" + }, + { + "bottom" : "input.51,transpose_153", + "weights" : { + + }, + "debug_info" : "attn_output.13", + "top" : "attn_output.13", + "type" : "batch_matmul", + "name" : "attn_output.13", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.13", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_150", + "top" : "transpose_150", + "type" : "transpose", + "name" : "transpose_150" + }, + { + "name" : "input.53", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_150", + "debug_info" : "input.53", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.53" + }, + { + "nB" : 768, + "top" : "input.55", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 141, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.53", + "blob_biases" : 139, + "has_tanh" : 0, + "debug_info" : "input.55", + "name" : "input.55", + "has_prelu" : 0 + }, + { + "bottom" : "input.47,input.55", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.57", + "nd_mode" : true, + "top" : "input.57", + "type" : "elementwise", + "name" : "input.57", + "beta" : 0 + }, + { + "name" : "input.59_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.57", + "debug_info" : "input.59_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.59_reshape" + }, + { + "bottom" : "input.59_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.59_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.59_mvn", + "type" : "l2_normalize", + "name" : "input.59_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.59_scale", + "constant_blob" : 143, + "top" : "input.59_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.59_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.59_scale", + "constant_blob" : 145, + "top" : "input.59_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.59_scale_constant_in_bias" + }, + { + "bottom" : "input.59_mvn,input.59_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.59_scale", + "top" : "input.59_scale_mul_out", + "type" : "elementwise", + "name" : "input.59_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.59_scale_mul_out,input.59_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.59_scale", + "top" : "input.59_scale", + "type" : "elementwise", + "name" : "input.59_scale", + "beta" : 0 + }, + { + "name" : "input.59", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.59_scale", + "debug_info" : "input.59", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.59" + }, + { + "nB" : 768, + "top" : "input.61", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 149, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.59", + "blob_biases" : 147, + "has_tanh" : 0, + "debug_info" : "input.61", + "name" : "input.61", + "has_prelu" : 0 + }, + { + "bottom" : "input.61", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.63", + "top" : "input.63", + "type" : "activation", + "name" : "input.63" + }, + { + "nB" : 3072, + "top" : "input.67", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 153, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.63", + "blob_biases" : 151, + "has_tanh" : 0, + "debug_info" : "input.67", + "name" : "input.67", + "has_prelu" : 0 + }, + { + "bottom" : "input.57,input.67", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.69", + "top" : "input.69", + "type" : "elementwise", + "name" : "input.69", + "beta" : 0 + }, + { + "name" : "hidden_states.21_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.69", + "debug_info" : "hidden_states.21_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.21_reshape" + }, + { + "bottom" : "hidden_states.21_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.21_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.21_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.21_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.21_scale", + "constant_blob" : 155, + "top" : "hidden_states.21_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.21_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.21_scale", + "constant_blob" : 157, + "top" : "hidden_states.21_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.21_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.21_mvn,hidden_states.21_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.21_scale", + "top" : "hidden_states.21_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.21_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.21_scale_mul_out,hidden_states.21_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.21_scale", + "top" : "hidden_states.21_scale", + "type" : "elementwise", + "name" : "hidden_states.21_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.21", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.21_scale", + "debug_info" : "hidden_states.21", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.21" + }, + { + "bottom" : "hidden_states.21", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "342_shape", + "top" : "342_shape", + "type" : "get_shape", + "name" : "342_shape" + }, + { + "top" : "gather_12_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_12_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_12_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 159 + }, + { + "bottom" : "342_shape,gather_12_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_12", + "axis" : 0, + "top" : "gather_12", + "type" : "gather_nd", + "name" : "gather_12", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "346", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 163, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.21", + "blob_biases" : 161, + "has_tanh" : 0, + "debug_info" : "346", + "name" : "346", + "has_prelu" : 0 + }, + { + "bottom" : "346", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.25", + "top" : "tensor.25", + "type" : "elementwise", + "name" : "tensor.25", + "beta" : 0 + }, + { + "name" : "350", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.25", + "debug_info" : "350", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "350" + }, + { + "nB" : 768, + "top" : "tensor.27", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 167, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.21", + "blob_biases" : 165, + "has_tanh" : 0, + "debug_info" : "tensor.27", + "name" : "tensor.27", + "has_prelu" : 0 + }, + { + "bottom" : "gather_12,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_20", + "top" : "concat_20", + "type" : "general_concat", + "name" : "concat_20" + }, + { + "name" : "356", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.27,concat_20", + "debug_info" : "356", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "356" + }, + { + "has_prelu" : 0, + "top" : "tensor.29", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 171, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.21", + "debug_info" : "tensor.29", + "has_tanh" : 0, + "blob_biases" : 169, + "name" : "tensor.29", + "nB" : 768 + }, + { + "name" : "363", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.29,concat_20", + "debug_info" : "363", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "363" + }, + { + "axis_seq" : 4, + "name" : "transpose_148", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "356", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_148", + "weights" : { + + }, + "top" : "transpose_148" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_148", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_146", + "top" : "transpose_146", + "type" : "transpose", + "name" : "transpose_146" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "350", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_149", + "top" : "transpose_149", + "type" : "transpose", + "name" : "transpose_149" + }, + { + "bottom" : "transpose_149,transpose_146", + "weights" : { + + }, + "debug_info" : "attn_weights.13", + "top" : "attn_weights.13", + "type" : "batch_matmul", + "name" : "attn_weights.13", + "channel_mode" : false + }, + { + "bottom" : "transpose_148", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "368_shape", + "top" : "368_shape", + "type" : "get_shape", + "name" : "368_shape" + }, + { + "top" : "gather_14_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_14_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_14_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 173 + }, + { + "bottom" : "368_shape,gather_14_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_14", + "axis" : 0, + "top" : "gather_14", + "type" : "gather_nd", + "name" : "gather_14", + "batch_dims" : 0 + }, + { + "top" : "concat_22_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_22_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_22_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 175 + }, + { + "top" : "concat_22_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_22_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_22_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 177 + }, + { + "top" : "concat_22_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_22_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_22_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 179 + }, + { + "bottom" : "concat_22_values0_0,concat_22_values1_0,concat_22_values2_0,gather_14", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_22", + "top" : "concat_22", + "type" : "general_concat", + "name" : "concat_22" + }, + { + "top" : "causal_mask.11_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.11_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.11_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 181 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.11", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.11", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.11", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.11_begin_0,concat_22", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.71", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.13,causal_mask.11", + "debug_info" : "input.71", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.71" + }, + { + "bottom" : "input.71", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.73", + "top" : "input.73", + "type" : "softmax_nd", + "name" : "input.73" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "363", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_147", + "top" : "transpose_147", + "type" : "transpose", + "name" : "transpose_147" + }, + { + "bottom" : "input.73,transpose_147", + "weights" : { + + }, + "debug_info" : "attn_output.17", + "top" : "attn_output.17", + "type" : "batch_matmul", + "name" : "attn_output.17", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.17", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_145", + "top" : "transpose_145", + "type" : "transpose", + "name" : "transpose_145" + }, + { + "name" : "input.75", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_145", + "debug_info" : "input.75", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.75" + }, + { + "nB" : 768, + "top" : "input.77", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 185, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.75", + "blob_biases" : 183, + "has_tanh" : 0, + "debug_info" : "input.77", + "name" : "input.77", + "has_prelu" : 0 + }, + { + "bottom" : "input.69,input.77", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.79", + "nd_mode" : true, + "top" : "input.79", + "type" : "elementwise", + "name" : "input.79", + "beta" : 0 + }, + { + "name" : "hidden_states.25_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.79", + "debug_info" : "hidden_states.25_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.25_reshape" + }, + { + "bottom" : "hidden_states.25_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.25_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.25_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.25_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.25_scale", + "constant_blob" : 187, + "top" : "hidden_states.25_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.25_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.25_scale", + "constant_blob" : 189, + "top" : "hidden_states.25_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.25_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.25_mvn,hidden_states.25_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.25_scale", + "top" : "hidden_states.25_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.25_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.25_scale_mul_out,hidden_states.25_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.25_scale", + "top" : "hidden_states.25_scale", + "type" : "elementwise", + "name" : "hidden_states.25_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.25", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.25_scale", + "debug_info" : "hidden_states.25", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.25" + }, + { + "bottom" : "hidden_states.25", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "393_shape", + "top" : "393_shape", + "type" : "get_shape", + "name" : "393_shape" + }, + { + "top" : "gather_15_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_15_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_15_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 191 + }, + { + "bottom" : "393_shape,gather_15_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_15", + "axis" : 0, + "top" : "gather_15", + "type" : "gather_nd", + "name" : "gather_15", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "397", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 195, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.25", + "blob_biases" : 193, + "has_tanh" : 0, + "debug_info" : "397", + "name" : "397", + "has_prelu" : 0 + }, + { + "bottom" : "397", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.31", + "top" : "tensor.31", + "type" : "elementwise", + "name" : "tensor.31", + "beta" : 0 + }, + { + "name" : "401", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.31", + "debug_info" : "401", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "401" + }, + { + "nB" : 768, + "top" : "tensor.33", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 199, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 197, + "has_tanh" : 0, + "debug_info" : "tensor.33", + "name" : "tensor.33", + "has_prelu" : 0 + }, + { + "bottom" : "gather_15,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_25", + "top" : "concat_25", + "type" : "general_concat", + "name" : "concat_25" + }, + { + "name" : "407", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.33,concat_25", + "debug_info" : "407", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "407" + }, + { + "has_prelu" : 0, + "top" : "tensor.35", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 203, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.35", + "has_tanh" : 0, + "blob_biases" : 201, + "name" : "tensor.35", + "nB" : 768 + }, + { + "name" : "414", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.35,concat_25", + "debug_info" : "414", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "414" + }, + { + "axis_seq" : 4, + "name" : "transpose_142", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "407", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_142", + "weights" : { + + }, + "top" : "transpose_142" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "401", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_143", + "top" : "transpose_143", + "type" : "transpose", + "name" : "transpose_143" + }, + { + "bottom" : "transpose_143,transpose_142", + "weights" : { + + }, + "debug_info" : "input.81", + "top" : "input.81", + "type" : "batch_matmul", + "name" : "input.81", + "channel_mode" : false + }, + { + "bottom" : "input.81", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.83", + "top" : "input.83", + "type" : "softmax_nd", + "name" : "input.83" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "414", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_144", + "top" : "transpose_144", + "type" : "transpose", + "name" : "transpose_144" + }, + { + "bottom" : "input.83,transpose_144", + "weights" : { + + }, + "debug_info" : "attn_output.21", + "top" : "attn_output.21", + "type" : "batch_matmul", + "name" : "attn_output.21", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.21", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_141", + "top" : "transpose_141", + "type" : "transpose", + "name" : "transpose_141" + }, + { + "name" : "input.85", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_141", + "debug_info" : "input.85", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.85" + }, + { + "nB" : 768, + "top" : "input.87", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 207, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.85", + "blob_biases" : 205, + "has_tanh" : 0, + "debug_info" : "input.87", + "name" : "input.87", + "has_prelu" : 0 + }, + { + "bottom" : "input.79,input.87", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.89", + "nd_mode" : true, + "top" : "input.89", + "type" : "elementwise", + "name" : "input.89", + "beta" : 0 + }, + { + "name" : "input.91_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.89", + "debug_info" : "input.91_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.91_reshape" + }, + { + "bottom" : "input.91_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.91_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.91_mvn", + "type" : "l2_normalize", + "name" : "input.91_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.91_scale", + "constant_blob" : 209, + "top" : "input.91_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.91_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.91_scale", + "constant_blob" : 211, + "top" : "input.91_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.91_scale_constant_in_bias" + }, + { + "bottom" : "input.91_mvn,input.91_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.91_scale", + "top" : "input.91_scale_mul_out", + "type" : "elementwise", + "name" : "input.91_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.91_scale_mul_out,input.91_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.91_scale", + "top" : "input.91_scale", + "type" : "elementwise", + "name" : "input.91_scale", + "beta" : 0 + }, + { + "name" : "input.91", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.91_scale", + "debug_info" : "input.91", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.91" + }, + { + "nB" : 768, + "top" : "input.93", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 215, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.91", + "blob_biases" : 213, + "has_tanh" : 0, + "debug_info" : "input.93", + "name" : "input.93", + "has_prelu" : 0 + }, + { + "bottom" : "input.93", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.95", + "top" : "input.95", + "type" : "activation", + "name" : "input.95" + }, + { + "nB" : 3072, + "top" : "input.99", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 219, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.95", + "blob_biases" : 217, + "has_tanh" : 0, + "debug_info" : "input.99", + "name" : "input.99", + "has_prelu" : 0 + }, + { + "bottom" : "input.89,input.99", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.101", + "top" : "input.101", + "type" : "elementwise", + "name" : "input.101", + "beta" : 0 + }, + { + "name" : "hidden_states.31_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.101", + "debug_info" : "hidden_states.31_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.31_reshape" + }, + { + "bottom" : "hidden_states.31_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.31_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.31_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.31_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.31_scale", + "constant_blob" : 221, + "top" : "hidden_states.31_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.31_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.31_scale", + "constant_blob" : 223, + "top" : "hidden_states.31_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.31_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.31_mvn,hidden_states.31_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.31_scale", + "top" : "hidden_states.31_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.31_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.31_scale_mul_out,hidden_states.31_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.31_scale", + "top" : "hidden_states.31_scale", + "type" : "elementwise", + "name" : "hidden_states.31_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.31", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.31_scale", + "debug_info" : "hidden_states.31", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.31" + }, + { + "bottom" : "hidden_states.31", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "459_shape", + "top" : "459_shape", + "type" : "get_shape", + "name" : "459_shape" + }, + { + "top" : "gather_17_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_17_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_17_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 225 + }, + { + "bottom" : "459_shape,gather_17_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_17", + "axis" : 0, + "top" : "gather_17", + "type" : "gather_nd", + "name" : "gather_17", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "463", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 229, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.31", + "blob_biases" : 227, + "has_tanh" : 0, + "debug_info" : "463", + "name" : "463", + "has_prelu" : 0 + }, + { + "bottom" : "463", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.37", + "top" : "tensor.37", + "type" : "elementwise", + "name" : "tensor.37", + "beta" : 0 + }, + { + "name" : "467", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.37", + "debug_info" : "467", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "467" + }, + { + "nB" : 768, + "top" : "tensor.39", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 233, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.31", + "blob_biases" : 231, + "has_tanh" : 0, + "debug_info" : "tensor.39", + "name" : "tensor.39", + "has_prelu" : 0 + }, + { + "bottom" : "gather_17,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_29", + "top" : "concat_29", + "type" : "general_concat", + "name" : "concat_29" + }, + { + "name" : "473", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.39,concat_29", + "debug_info" : "473", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "473" + }, + { + "has_prelu" : 0, + "top" : "tensor.41", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 237, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.31", + "debug_info" : "tensor.41", + "has_tanh" : 0, + "blob_biases" : 235, + "name" : "tensor.41", + "nB" : 768 + }, + { + "name" : "480", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.41,concat_29", + "debug_info" : "480", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "480" + }, + { + "axis_seq" : 4, + "name" : "transpose_139", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "473", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_139", + "weights" : { + + }, + "top" : "transpose_139" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_139", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_137", + "top" : "transpose_137", + "type" : "transpose", + "name" : "transpose_137" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "467", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_140", + "top" : "transpose_140", + "type" : "transpose", + "name" : "transpose_140" + }, + { + "bottom" : "transpose_140,transpose_137", + "weights" : { + + }, + "debug_info" : "attn_weights.19", + "top" : "attn_weights.19", + "type" : "batch_matmul", + "name" : "attn_weights.19", + "channel_mode" : false + }, + { + "bottom" : "transpose_139", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "485_shape", + "top" : "485_shape", + "type" : "get_shape", + "name" : "485_shape" + }, + { + "top" : "gather_19_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_19_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_19_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 239 + }, + { + "bottom" : "485_shape,gather_19_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_19", + "axis" : 0, + "top" : "gather_19", + "type" : "gather_nd", + "name" : "gather_19", + "batch_dims" : 0 + }, + { + "top" : "concat_31_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_31_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_31_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 241 + }, + { + "top" : "concat_31_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_31_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_31_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 243 + }, + { + "top" : "concat_31_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_31_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_31_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 245 + }, + { + "bottom" : "concat_31_values0_0,concat_31_values1_0,concat_31_values2_0,gather_19", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_31", + "top" : "concat_31", + "type" : "general_concat", + "name" : "concat_31" + }, + { + "top" : "causal_mask.13_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.13_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.13_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 247 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.13", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.13", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.13", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.13_begin_0,concat_31", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.103", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.19,causal_mask.13", + "debug_info" : "input.103", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.103" + }, + { + "bottom" : "input.103", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.105", + "top" : "input.105", + "type" : "softmax_nd", + "name" : "input.105" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "480", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_138", + "top" : "transpose_138", + "type" : "transpose", + "name" : "transpose_138" + }, + { + "bottom" : "input.105,transpose_138", + "weights" : { + + }, + "debug_info" : "attn_output.25", + "top" : "attn_output.25", + "type" : "batch_matmul", + "name" : "attn_output.25", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.25", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_136", + "top" : "transpose_136", + "type" : "transpose", + "name" : "transpose_136" + }, + { + "name" : "input.107", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_136", + "debug_info" : "input.107", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.107" + }, + { + "nB" : 768, + "top" : "input.109", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 251, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.107", + "blob_biases" : 249, + "has_tanh" : 0, + "debug_info" : "input.109", + "name" : "input.109", + "has_prelu" : 0 + }, + { + "bottom" : "input.101,input.109", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.111", + "nd_mode" : true, + "top" : "input.111", + "type" : "elementwise", + "name" : "input.111", + "beta" : 0 + }, + { + "name" : "hidden_states.35_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.111", + "debug_info" : "hidden_states.35_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.35_reshape" + }, + { + "bottom" : "hidden_states.35_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.35_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.35_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.35_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.35_scale", + "constant_blob" : 253, + "top" : "hidden_states.35_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.35_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.35_scale", + "constant_blob" : 255, + "top" : "hidden_states.35_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.35_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.35_mvn,hidden_states.35_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.35_scale", + "top" : "hidden_states.35_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.35_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.35_scale_mul_out,hidden_states.35_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.35_scale", + "top" : "hidden_states.35_scale", + "type" : "elementwise", + "name" : "hidden_states.35_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.35", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.35_scale", + "debug_info" : "hidden_states.35", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.35" + }, + { + "bottom" : "hidden_states.35", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "510_shape", + "top" : "510_shape", + "type" : "get_shape", + "name" : "510_shape" + }, + { + "top" : "gather_20_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_20_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_20_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 257 + }, + { + "bottom" : "510_shape,gather_20_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_20", + "axis" : 0, + "top" : "gather_20", + "type" : "gather_nd", + "name" : "gather_20", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "514", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 261, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.35", + "blob_biases" : 259, + "has_tanh" : 0, + "debug_info" : "514", + "name" : "514", + "has_prelu" : 0 + }, + { + "bottom" : "514", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.43", + "top" : "tensor.43", + "type" : "elementwise", + "name" : "tensor.43", + "beta" : 0 + }, + { + "name" : "518", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.43", + "debug_info" : "518", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "518" + }, + { + "nB" : 768, + "top" : "tensor.45", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 265, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 263, + "has_tanh" : 0, + "debug_info" : "tensor.45", + "name" : "tensor.45", + "has_prelu" : 0 + }, + { + "bottom" : "gather_20,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_34", + "top" : "concat_34", + "type" : "general_concat", + "name" : "concat_34" + }, + { + "name" : "524", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.45,concat_34", + "debug_info" : "524", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "524" + }, + { + "has_prelu" : 0, + "top" : "tensor.47", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 269, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.47", + "has_tanh" : 0, + "blob_biases" : 267, + "name" : "tensor.47", + "nB" : 768 + }, + { + "name" : "531", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.47,concat_34", + "debug_info" : "531", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "531" + }, + { + "axis_seq" : 4, + "name" : "transpose_133", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "524", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_133", + "weights" : { + + }, + "top" : "transpose_133" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "518", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_134", + "top" : "transpose_134", + "type" : "transpose", + "name" : "transpose_134" + }, + { + "bottom" : "transpose_134,transpose_133", + "weights" : { + + }, + "debug_info" : "input.113", + "top" : "input.113", + "type" : "batch_matmul", + "name" : "input.113", + "channel_mode" : false + }, + { + "bottom" : "input.113", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.115", + "top" : "input.115", + "type" : "softmax_nd", + "name" : "input.115" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "531", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_135", + "top" : "transpose_135", + "type" : "transpose", + "name" : "transpose_135" + }, + { + "bottom" : "input.115,transpose_135", + "weights" : { + + }, + "debug_info" : "attn_output.29", + "top" : "attn_output.29", + "type" : "batch_matmul", + "name" : "attn_output.29", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.29", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_132", + "top" : "transpose_132", + "type" : "transpose", + "name" : "transpose_132" + }, + { + "name" : "input.117", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_132", + "debug_info" : "input.117", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.117" + }, + { + "nB" : 768, + "top" : "input.119", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 273, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.117", + "blob_biases" : 271, + "has_tanh" : 0, + "debug_info" : "input.119", + "name" : "input.119", + "has_prelu" : 0 + }, + { + "bottom" : "input.111,input.119", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.121", + "nd_mode" : true, + "top" : "input.121", + "type" : "elementwise", + "name" : "input.121", + "beta" : 0 + }, + { + "name" : "input.123_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.121", + "debug_info" : "input.123_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.123_reshape" + }, + { + "bottom" : "input.123_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.123_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.123_mvn", + "type" : "l2_normalize", + "name" : "input.123_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.123_scale", + "constant_blob" : 275, + "top" : "input.123_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.123_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.123_scale", + "constant_blob" : 277, + "top" : "input.123_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.123_scale_constant_in_bias" + }, + { + "bottom" : "input.123_mvn,input.123_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.123_scale", + "top" : "input.123_scale_mul_out", + "type" : "elementwise", + "name" : "input.123_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.123_scale_mul_out,input.123_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.123_scale", + "top" : "input.123_scale", + "type" : "elementwise", + "name" : "input.123_scale", + "beta" : 0 + }, + { + "name" : "input.123", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.123_scale", + "debug_info" : "input.123", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.123" + }, + { + "nB" : 768, + "top" : "input.125", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 281, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.123", + "blob_biases" : 279, + "has_tanh" : 0, + "debug_info" : "input.125", + "name" : "input.125", + "has_prelu" : 0 + }, + { + "bottom" : "input.125", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.127", + "top" : "input.127", + "type" : "activation", + "name" : "input.127" + }, + { + "nB" : 3072, + "top" : "input.131", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 285, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.127", + "blob_biases" : 283, + "has_tanh" : 0, + "debug_info" : "input.131", + "name" : "input.131", + "has_prelu" : 0 + }, + { + "bottom" : "input.121,input.131", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.133", + "top" : "input.133", + "type" : "elementwise", + "name" : "input.133", + "beta" : 0 + }, + { + "name" : "hidden_states.41_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.133", + "debug_info" : "hidden_states.41_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.41_reshape" + }, + { + "bottom" : "hidden_states.41_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.41_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.41_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.41_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.41_scale", + "constant_blob" : 287, + "top" : "hidden_states.41_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.41_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.41_scale", + "constant_blob" : 289, + "top" : "hidden_states.41_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.41_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.41_mvn,hidden_states.41_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.41_scale", + "top" : "hidden_states.41_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.41_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.41_scale_mul_out,hidden_states.41_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.41_scale", + "top" : "hidden_states.41_scale", + "type" : "elementwise", + "name" : "hidden_states.41_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.41", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.41_scale", + "debug_info" : "hidden_states.41", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.41" + }, + { + "bottom" : "hidden_states.41", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "576_shape", + "top" : "576_shape", + "type" : "get_shape", + "name" : "576_shape" + }, + { + "top" : "gather_22_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_22_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_22_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 291 + }, + { + "bottom" : "576_shape,gather_22_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_22", + "axis" : 0, + "top" : "gather_22", + "type" : "gather_nd", + "name" : "gather_22", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "580", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 295, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.41", + "blob_biases" : 293, + "has_tanh" : 0, + "debug_info" : "580", + "name" : "580", + "has_prelu" : 0 + }, + { + "bottom" : "580", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.49", + "top" : "tensor.49", + "type" : "elementwise", + "name" : "tensor.49", + "beta" : 0 + }, + { + "name" : "584", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.49", + "debug_info" : "584", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "584" + }, + { + "nB" : 768, + "top" : "tensor.51", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 299, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.41", + "blob_biases" : 297, + "has_tanh" : 0, + "debug_info" : "tensor.51", + "name" : "tensor.51", + "has_prelu" : 0 + }, + { + "bottom" : "gather_22,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_38", + "top" : "concat_38", + "type" : "general_concat", + "name" : "concat_38" + }, + { + "name" : "590", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.51,concat_38", + "debug_info" : "590", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "590" + }, + { + "has_prelu" : 0, + "top" : "tensor.53", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 303, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.41", + "debug_info" : "tensor.53", + "has_tanh" : 0, + "blob_biases" : 301, + "name" : "tensor.53", + "nB" : 768 + }, + { + "name" : "597", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.53,concat_38", + "debug_info" : "597", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "597" + }, + { + "axis_seq" : 4, + "name" : "transpose_130", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "590", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_130", + "weights" : { + + }, + "top" : "transpose_130" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_130", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_128", + "top" : "transpose_128", + "type" : "transpose", + "name" : "transpose_128" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "584", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_131", + "top" : "transpose_131", + "type" : "transpose", + "name" : "transpose_131" + }, + { + "bottom" : "transpose_131,transpose_128", + "weights" : { + + }, + "debug_info" : "attn_weights.25", + "top" : "attn_weights.25", + "type" : "batch_matmul", + "name" : "attn_weights.25", + "channel_mode" : false + }, + { + "bottom" : "transpose_130", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "602_shape", + "top" : "602_shape", + "type" : "get_shape", + "name" : "602_shape" + }, + { + "top" : "gather_24_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_24_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_24_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 305 + }, + { + "bottom" : "602_shape,gather_24_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_24", + "axis" : 0, + "top" : "gather_24", + "type" : "gather_nd", + "name" : "gather_24", + "batch_dims" : 0 + }, + { + "top" : "concat_40_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_40_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_40_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 307 + }, + { + "top" : "concat_40_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_40_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_40_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 309 + }, + { + "top" : "concat_40_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_40_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_40_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 311 + }, + { + "bottom" : "concat_40_values0_0,concat_40_values1_0,concat_40_values2_0,gather_24", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_40", + "top" : "concat_40", + "type" : "general_concat", + "name" : "concat_40" + }, + { + "top" : "causal_mask.15_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.15_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.15_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 313 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.15", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.15", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.15", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.15_begin_0,concat_40", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.135", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.25,causal_mask.15", + "debug_info" : "input.135", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.135" + }, + { + "bottom" : "input.135", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.137", + "top" : "input.137", + "type" : "softmax_nd", + "name" : "input.137" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "597", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_129", + "top" : "transpose_129", + "type" : "transpose", + "name" : "transpose_129" + }, + { + "bottom" : "input.137,transpose_129", + "weights" : { + + }, + "debug_info" : "attn_output.33", + "top" : "attn_output.33", + "type" : "batch_matmul", + "name" : "attn_output.33", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.33", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_127", + "top" : "transpose_127", + "type" : "transpose", + "name" : "transpose_127" + }, + { + "name" : "input.139", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_127", + "debug_info" : "input.139", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.139" + }, + { + "nB" : 768, + "top" : "input.141", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 317, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.139", + "blob_biases" : 315, + "has_tanh" : 0, + "debug_info" : "input.141", + "name" : "input.141", + "has_prelu" : 0 + }, + { + "bottom" : "input.133,input.141", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.143", + "nd_mode" : true, + "top" : "input.143", + "type" : "elementwise", + "name" : "input.143", + "beta" : 0 + }, + { + "name" : "hidden_states.45_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.143", + "debug_info" : "hidden_states.45_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.45_reshape" + }, + { + "bottom" : "hidden_states.45_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.45_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.45_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.45_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.45_scale", + "constant_blob" : 319, + "top" : "hidden_states.45_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.45_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.45_scale", + "constant_blob" : 321, + "top" : "hidden_states.45_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.45_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.45_mvn,hidden_states.45_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.45_scale", + "top" : "hidden_states.45_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.45_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.45_scale_mul_out,hidden_states.45_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.45_scale", + "top" : "hidden_states.45_scale", + "type" : "elementwise", + "name" : "hidden_states.45_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.45", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.45_scale", + "debug_info" : "hidden_states.45", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.45" + }, + { + "bottom" : "hidden_states.45", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "627_shape", + "top" : "627_shape", + "type" : "get_shape", + "name" : "627_shape" + }, + { + "top" : "gather_25_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_25_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_25_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 323 + }, + { + "bottom" : "627_shape,gather_25_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_25", + "axis" : 0, + "top" : "gather_25", + "type" : "gather_nd", + "name" : "gather_25", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "631", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 327, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.45", + "blob_biases" : 325, + "has_tanh" : 0, + "debug_info" : "631", + "name" : "631", + "has_prelu" : 0 + }, + { + "bottom" : "631", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.55", + "top" : "tensor.55", + "type" : "elementwise", + "name" : "tensor.55", + "beta" : 0 + }, + { + "name" : "635", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.55", + "debug_info" : "635", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "635" + }, + { + "nB" : 768, + "top" : "tensor.57", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 331, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 329, + "has_tanh" : 0, + "debug_info" : "tensor.57", + "name" : "tensor.57", + "has_prelu" : 0 + }, + { + "bottom" : "gather_25,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_43", + "top" : "concat_43", + "type" : "general_concat", + "name" : "concat_43" + }, + { + "name" : "641", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.57,concat_43", + "debug_info" : "641", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "641" + }, + { + "has_prelu" : 0, + "top" : "tensor.59", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 335, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.59", + "has_tanh" : 0, + "blob_biases" : 333, + "name" : "tensor.59", + "nB" : 768 + }, + { + "name" : "648", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.59,concat_43", + "debug_info" : "648", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "648" + }, + { + "axis_seq" : 4, + "name" : "transpose_124", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "641", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_124", + "weights" : { + + }, + "top" : "transpose_124" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "635", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_125", + "top" : "transpose_125", + "type" : "transpose", + "name" : "transpose_125" + }, + { + "bottom" : "transpose_125,transpose_124", + "weights" : { + + }, + "debug_info" : "input.145", + "top" : "input.145", + "type" : "batch_matmul", + "name" : "input.145", + "channel_mode" : false + }, + { + "bottom" : "input.145", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.147", + "top" : "input.147", + "type" : "softmax_nd", + "name" : "input.147" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "648", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_126", + "top" : "transpose_126", + "type" : "transpose", + "name" : "transpose_126" + }, + { + "bottom" : "input.147,transpose_126", + "weights" : { + + }, + "debug_info" : "attn_output.37", + "top" : "attn_output.37", + "type" : "batch_matmul", + "name" : "attn_output.37", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.37", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_123", + "top" : "transpose_123", + "type" : "transpose", + "name" : "transpose_123" + }, + { + "name" : "input.149", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_123", + "debug_info" : "input.149", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.149" + }, + { + "nB" : 768, + "top" : "input.151", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 339, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.149", + "blob_biases" : 337, + "has_tanh" : 0, + "debug_info" : "input.151", + "name" : "input.151", + "has_prelu" : 0 + }, + { + "bottom" : "input.143,input.151", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.153", + "nd_mode" : true, + "top" : "input.153", + "type" : "elementwise", + "name" : "input.153", + "beta" : 0 + }, + { + "name" : "input.155_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.153", + "debug_info" : "input.155_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.155_reshape" + }, + { + "bottom" : "input.155_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.155_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.155_mvn", + "type" : "l2_normalize", + "name" : "input.155_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.155_scale", + "constant_blob" : 341, + "top" : "input.155_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.155_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.155_scale", + "constant_blob" : 343, + "top" : "input.155_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.155_scale_constant_in_bias" + }, + { + "bottom" : "input.155_mvn,input.155_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.155_scale", + "top" : "input.155_scale_mul_out", + "type" : "elementwise", + "name" : "input.155_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.155_scale_mul_out,input.155_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.155_scale", + "top" : "input.155_scale", + "type" : "elementwise", + "name" : "input.155_scale", + "beta" : 0 + }, + { + "name" : "input.155", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.155_scale", + "debug_info" : "input.155", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.155" + }, + { + "nB" : 768, + "top" : "input.157", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 347, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.155", + "blob_biases" : 345, + "has_tanh" : 0, + "debug_info" : "input.157", + "name" : "input.157", + "has_prelu" : 0 + }, + { + "bottom" : "input.157", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.159", + "top" : "input.159", + "type" : "activation", + "name" : "input.159" + }, + { + "nB" : 3072, + "top" : "input.163", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 351, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.159", + "blob_biases" : 349, + "has_tanh" : 0, + "debug_info" : "input.163", + "name" : "input.163", + "has_prelu" : 0 + }, + { + "bottom" : "input.153,input.163", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.165", + "top" : "input.165", + "type" : "elementwise", + "name" : "input.165", + "beta" : 0 + }, + { + "name" : "hidden_states.51_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.165", + "debug_info" : "hidden_states.51_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.51_reshape" + }, + { + "bottom" : "hidden_states.51_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.51_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.51_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.51_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.51_scale", + "constant_blob" : 353, + "top" : "hidden_states.51_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.51_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.51_scale", + "constant_blob" : 355, + "top" : "hidden_states.51_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.51_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.51_mvn,hidden_states.51_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.51_scale", + "top" : "hidden_states.51_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.51_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.51_scale_mul_out,hidden_states.51_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.51_scale", + "top" : "hidden_states.51_scale", + "type" : "elementwise", + "name" : "hidden_states.51_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.51", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.51_scale", + "debug_info" : "hidden_states.51", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.51" + }, + { + "bottom" : "hidden_states.51", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "693_shape", + "top" : "693_shape", + "type" : "get_shape", + "name" : "693_shape" + }, + { + "top" : "gather_27_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_27_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_27_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 357 + }, + { + "bottom" : "693_shape,gather_27_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_27", + "axis" : 0, + "top" : "gather_27", + "type" : "gather_nd", + "name" : "gather_27", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "697", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 361, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.51", + "blob_biases" : 359, + "has_tanh" : 0, + "debug_info" : "697", + "name" : "697", + "has_prelu" : 0 + }, + { + "bottom" : "697", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.61", + "top" : "tensor.61", + "type" : "elementwise", + "name" : "tensor.61", + "beta" : 0 + }, + { + "name" : "701", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.61", + "debug_info" : "701", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "701" + }, + { + "nB" : 768, + "top" : "tensor.63", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 365, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.51", + "blob_biases" : 363, + "has_tanh" : 0, + "debug_info" : "tensor.63", + "name" : "tensor.63", + "has_prelu" : 0 + }, + { + "bottom" : "gather_27,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_47", + "top" : "concat_47", + "type" : "general_concat", + "name" : "concat_47" + }, + { + "name" : "707", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.63,concat_47", + "debug_info" : "707", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "707" + }, + { + "has_prelu" : 0, + "top" : "tensor.65", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 369, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.51", + "debug_info" : "tensor.65", + "has_tanh" : 0, + "blob_biases" : 367, + "name" : "tensor.65", + "nB" : 768 + }, + { + "name" : "714", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.65,concat_47", + "debug_info" : "714", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "714" + }, + { + "axis_seq" : 4, + "name" : "transpose_121", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "707", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_121", + "weights" : { + + }, + "top" : "transpose_121" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_121", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_119", + "top" : "transpose_119", + "type" : "transpose", + "name" : "transpose_119" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "701", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_122", + "top" : "transpose_122", + "type" : "transpose", + "name" : "transpose_122" + }, + { + "bottom" : "transpose_122,transpose_119", + "weights" : { + + }, + "debug_info" : "attn_weights.31", + "top" : "attn_weights.31", + "type" : "batch_matmul", + "name" : "attn_weights.31", + "channel_mode" : false + }, + { + "bottom" : "transpose_121", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "719_shape", + "top" : "719_shape", + "type" : "get_shape", + "name" : "719_shape" + }, + { + "top" : "gather_29_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_29_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_29_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 371 + }, + { + "bottom" : "719_shape,gather_29_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_29", + "axis" : 0, + "top" : "gather_29", + "type" : "gather_nd", + "name" : "gather_29", + "batch_dims" : 0 + }, + { + "top" : "concat_49_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_49_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_49_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 373 + }, + { + "top" : "concat_49_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_49_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_49_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 375 + }, + { + "top" : "concat_49_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_49_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_49_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 377 + }, + { + "bottom" : "concat_49_values0_0,concat_49_values1_0,concat_49_values2_0,gather_29", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_49", + "top" : "concat_49", + "type" : "general_concat", + "name" : "concat_49" + }, + { + "top" : "causal_mask.17_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.17_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.17_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 379 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.17", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.17", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.17", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.17_begin_0,concat_49", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.167", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.31,causal_mask.17", + "debug_info" : "input.167", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.167" + }, + { + "bottom" : "input.167", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.169", + "top" : "input.169", + "type" : "softmax_nd", + "name" : "input.169" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "714", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_120", + "top" : "transpose_120", + "type" : "transpose", + "name" : "transpose_120" + }, + { + "bottom" : "input.169,transpose_120", + "weights" : { + + }, + "debug_info" : "attn_output.41", + "top" : "attn_output.41", + "type" : "batch_matmul", + "name" : "attn_output.41", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.41", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_118", + "top" : "transpose_118", + "type" : "transpose", + "name" : "transpose_118" + }, + { + "name" : "input.171", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_118", + "debug_info" : "input.171", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.171" + }, + { + "nB" : 768, + "top" : "input.173", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 383, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.171", + "blob_biases" : 381, + "has_tanh" : 0, + "debug_info" : "input.173", + "name" : "input.173", + "has_prelu" : 0 + }, + { + "bottom" : "input.165,input.173", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.175", + "nd_mode" : true, + "top" : "input.175", + "type" : "elementwise", + "name" : "input.175", + "beta" : 0 + }, + { + "name" : "hidden_states.55_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.175", + "debug_info" : "hidden_states.55_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.55_reshape" + }, + { + "bottom" : "hidden_states.55_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.55_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.55_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.55_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.55_scale", + "constant_blob" : 385, + "top" : "hidden_states.55_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.55_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.55_scale", + "constant_blob" : 387, + "top" : "hidden_states.55_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.55_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.55_mvn,hidden_states.55_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.55_scale", + "top" : "hidden_states.55_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.55_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.55_scale_mul_out,hidden_states.55_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.55_scale", + "top" : "hidden_states.55_scale", + "type" : "elementwise", + "name" : "hidden_states.55_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.55", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.55_scale", + "debug_info" : "hidden_states.55", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.55" + }, + { + "bottom" : "hidden_states.55", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "744_shape", + "top" : "744_shape", + "type" : "get_shape", + "name" : "744_shape" + }, + { + "top" : "gather_30_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_30_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_30_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 389 + }, + { + "bottom" : "744_shape,gather_30_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_30", + "axis" : 0, + "top" : "gather_30", + "type" : "gather_nd", + "name" : "gather_30", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "748", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 393, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.55", + "blob_biases" : 391, + "has_tanh" : 0, + "debug_info" : "748", + "name" : "748", + "has_prelu" : 0 + }, + { + "bottom" : "748", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.67", + "top" : "tensor.67", + "type" : "elementwise", + "name" : "tensor.67", + "beta" : 0 + }, + { + "name" : "752", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.67", + "debug_info" : "752", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "752" + }, + { + "nB" : 768, + "top" : "tensor.69", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 397, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 395, + "has_tanh" : 0, + "debug_info" : "tensor.69", + "name" : "tensor.69", + "has_prelu" : 0 + }, + { + "bottom" : "gather_30,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_52", + "top" : "concat_52", + "type" : "general_concat", + "name" : "concat_52" + }, + { + "name" : "758", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.69,concat_52", + "debug_info" : "758", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "758" + }, + { + "has_prelu" : 0, + "top" : "tensor.71", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 401, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.71", + "has_tanh" : 0, + "blob_biases" : 399, + "name" : "tensor.71", + "nB" : 768 + }, + { + "name" : "765", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.71,concat_52", + "debug_info" : "765", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "765" + }, + { + "axis_seq" : 4, + "name" : "transpose_115", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "758", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_115", + "weights" : { + + }, + "top" : "transpose_115" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "752", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_116", + "top" : "transpose_116", + "type" : "transpose", + "name" : "transpose_116" + }, + { + "bottom" : "transpose_116,transpose_115", + "weights" : { + + }, + "debug_info" : "input.177", + "top" : "input.177", + "type" : "batch_matmul", + "name" : "input.177", + "channel_mode" : false + }, + { + "bottom" : "input.177", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.179", + "top" : "input.179", + "type" : "softmax_nd", + "name" : "input.179" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "765", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_117", + "top" : "transpose_117", + "type" : "transpose", + "name" : "transpose_117" + }, + { + "bottom" : "input.179,transpose_117", + "weights" : { + + }, + "debug_info" : "attn_output.45", + "top" : "attn_output.45", + "type" : "batch_matmul", + "name" : "attn_output.45", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.45", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_114", + "top" : "transpose_114", + "type" : "transpose", + "name" : "transpose_114" + }, + { + "name" : "input.181", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_114", + "debug_info" : "input.181", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.181" + }, + { + "nB" : 768, + "top" : "input.183", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 405, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.181", + "blob_biases" : 403, + "has_tanh" : 0, + "debug_info" : "input.183", + "name" : "input.183", + "has_prelu" : 0 + }, + { + "bottom" : "input.175,input.183", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.185", + "nd_mode" : true, + "top" : "input.185", + "type" : "elementwise", + "name" : "input.185", + "beta" : 0 + }, + { + "name" : "input.187_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.185", + "debug_info" : "input.187_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.187_reshape" + }, + { + "bottom" : "input.187_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.187_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.187_mvn", + "type" : "l2_normalize", + "name" : "input.187_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.187_scale", + "constant_blob" : 407, + "top" : "input.187_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.187_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.187_scale", + "constant_blob" : 409, + "top" : "input.187_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.187_scale_constant_in_bias" + }, + { + "bottom" : "input.187_mvn,input.187_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.187_scale", + "top" : "input.187_scale_mul_out", + "type" : "elementwise", + "name" : "input.187_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.187_scale_mul_out,input.187_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.187_scale", + "top" : "input.187_scale", + "type" : "elementwise", + "name" : "input.187_scale", + "beta" : 0 + }, + { + "name" : "input.187", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.187_scale", + "debug_info" : "input.187", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.187" + }, + { + "nB" : 768, + "top" : "input.189", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 413, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.187", + "blob_biases" : 411, + "has_tanh" : 0, + "debug_info" : "input.189", + "name" : "input.189", + "has_prelu" : 0 + }, + { + "bottom" : "input.189", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.191", + "top" : "input.191", + "type" : "activation", + "name" : "input.191" + }, + { + "nB" : 3072, + "top" : "input.195", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 417, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.191", + "blob_biases" : 415, + "has_tanh" : 0, + "debug_info" : "input.195", + "name" : "input.195", + "has_prelu" : 0 + }, + { + "bottom" : "input.185,input.195", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.197", + "top" : "input.197", + "type" : "elementwise", + "name" : "input.197", + "beta" : 0 + }, + { + "name" : "hidden_states.61_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.197", + "debug_info" : "hidden_states.61_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.61_reshape" + }, + { + "bottom" : "hidden_states.61_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.61_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.61_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.61_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.61_scale", + "constant_blob" : 419, + "top" : "hidden_states.61_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.61_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.61_scale", + "constant_blob" : 421, + "top" : "hidden_states.61_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.61_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.61_mvn,hidden_states.61_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.61_scale", + "top" : "hidden_states.61_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.61_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.61_scale_mul_out,hidden_states.61_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.61_scale", + "top" : "hidden_states.61_scale", + "type" : "elementwise", + "name" : "hidden_states.61_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.61", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.61_scale", + "debug_info" : "hidden_states.61", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.61" + }, + { + "bottom" : "hidden_states.61", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "810_shape", + "top" : "810_shape", + "type" : "get_shape", + "name" : "810_shape" + }, + { + "top" : "gather_32_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_32_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_32_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 423 + }, + { + "bottom" : "810_shape,gather_32_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_32", + "axis" : 0, + "top" : "gather_32", + "type" : "gather_nd", + "name" : "gather_32", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "814", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 427, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.61", + "blob_biases" : 425, + "has_tanh" : 0, + "debug_info" : "814", + "name" : "814", + "has_prelu" : 0 + }, + { + "bottom" : "814", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.73", + "top" : "tensor.73", + "type" : "elementwise", + "name" : "tensor.73", + "beta" : 0 + }, + { + "name" : "818", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.73", + "debug_info" : "818", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "818" + }, + { + "nB" : 768, + "top" : "tensor.75", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 431, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.61", + "blob_biases" : 429, + "has_tanh" : 0, + "debug_info" : "tensor.75", + "name" : "tensor.75", + "has_prelu" : 0 + }, + { + "bottom" : "gather_32,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_56", + "top" : "concat_56", + "type" : "general_concat", + "name" : "concat_56" + }, + { + "name" : "824", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.75,concat_56", + "debug_info" : "824", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "824" + }, + { + "has_prelu" : 0, + "top" : "tensor.77", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 435, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.61", + "debug_info" : "tensor.77", + "has_tanh" : 0, + "blob_biases" : 433, + "name" : "tensor.77", + "nB" : 768 + }, + { + "name" : "831", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.77,concat_56", + "debug_info" : "831", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "831" + }, + { + "axis_seq" : 4, + "name" : "transpose_112", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "824", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_112", + "weights" : { + + }, + "top" : "transpose_112" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_112", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_110", + "top" : "transpose_110", + "type" : "transpose", + "name" : "transpose_110" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "818", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_113", + "top" : "transpose_113", + "type" : "transpose", + "name" : "transpose_113" + }, + { + "bottom" : "transpose_113,transpose_110", + "weights" : { + + }, + "debug_info" : "attn_weights.37", + "top" : "attn_weights.37", + "type" : "batch_matmul", + "name" : "attn_weights.37", + "channel_mode" : false + }, + { + "bottom" : "transpose_112", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "836_shape", + "top" : "836_shape", + "type" : "get_shape", + "name" : "836_shape" + }, + { + "top" : "gather_34_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_34_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_34_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 437 + }, + { + "bottom" : "836_shape,gather_34_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_34", + "axis" : 0, + "top" : "gather_34", + "type" : "gather_nd", + "name" : "gather_34", + "batch_dims" : 0 + }, + { + "top" : "concat_58_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_58_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_58_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 439 + }, + { + "top" : "concat_58_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_58_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_58_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 441 + }, + { + "top" : "concat_58_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_58_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_58_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 443 + }, + { + "bottom" : "concat_58_values0_0,concat_58_values1_0,concat_58_values2_0,gather_34", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_58", + "top" : "concat_58", + "type" : "general_concat", + "name" : "concat_58" + }, + { + "top" : "causal_mask.19_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.19_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.19_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 445 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.19", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.19", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.19", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.19_begin_0,concat_58", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.199", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.37,causal_mask.19", + "debug_info" : "input.199", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.199" + }, + { + "bottom" : "input.199", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.201", + "top" : "input.201", + "type" : "softmax_nd", + "name" : "input.201" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "831", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_111", + "top" : "transpose_111", + "type" : "transpose", + "name" : "transpose_111" + }, + { + "bottom" : "input.201,transpose_111", + "weights" : { + + }, + "debug_info" : "attn_output.49", + "top" : "attn_output.49", + "type" : "batch_matmul", + "name" : "attn_output.49", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.49", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_109", + "top" : "transpose_109", + "type" : "transpose", + "name" : "transpose_109" + }, + { + "name" : "input.203", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_109", + "debug_info" : "input.203", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.203" + }, + { + "nB" : 768, + "top" : "input.205", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 449, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.203", + "blob_biases" : 447, + "has_tanh" : 0, + "debug_info" : "input.205", + "name" : "input.205", + "has_prelu" : 0 + }, + { + "bottom" : "input.197,input.205", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.207", + "nd_mode" : true, + "top" : "input.207", + "type" : "elementwise", + "name" : "input.207", + "beta" : 0 + }, + { + "name" : "hidden_states.65_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.207", + "debug_info" : "hidden_states.65_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.65_reshape" + }, + { + "bottom" : "hidden_states.65_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.65_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.65_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.65_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.65_scale", + "constant_blob" : 451, + "top" : "hidden_states.65_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.65_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.65_scale", + "constant_blob" : 453, + "top" : "hidden_states.65_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.65_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.65_mvn,hidden_states.65_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.65_scale", + "top" : "hidden_states.65_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.65_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.65_scale_mul_out,hidden_states.65_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.65_scale", + "top" : "hidden_states.65_scale", + "type" : "elementwise", + "name" : "hidden_states.65_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.65", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.65_scale", + "debug_info" : "hidden_states.65", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.65" + }, + { + "bottom" : "hidden_states.65", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "861_shape", + "top" : "861_shape", + "type" : "get_shape", + "name" : "861_shape" + }, + { + "top" : "gather_35_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_35_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_35_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 455 + }, + { + "bottom" : "861_shape,gather_35_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_35", + "axis" : 0, + "top" : "gather_35", + "type" : "gather_nd", + "name" : "gather_35", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "865", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 459, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.65", + "blob_biases" : 457, + "has_tanh" : 0, + "debug_info" : "865", + "name" : "865", + "has_prelu" : 0 + }, + { + "bottom" : "865", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.79", + "top" : "tensor.79", + "type" : "elementwise", + "name" : "tensor.79", + "beta" : 0 + }, + { + "name" : "869", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.79", + "debug_info" : "869", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "869" + }, + { + "nB" : 768, + "top" : "tensor.81", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 463, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 461, + "has_tanh" : 0, + "debug_info" : "tensor.81", + "name" : "tensor.81", + "has_prelu" : 0 + }, + { + "bottom" : "gather_35,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_61", + "top" : "concat_61", + "type" : "general_concat", + "name" : "concat_61" + }, + { + "name" : "875", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.81,concat_61", + "debug_info" : "875", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "875" + }, + { + "has_prelu" : 0, + "top" : "tensor.83", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 467, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.83", + "has_tanh" : 0, + "blob_biases" : 465, + "name" : "tensor.83", + "nB" : 768 + }, + { + "name" : "882", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.83,concat_61", + "debug_info" : "882", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "882" + }, + { + "axis_seq" : 4, + "name" : "transpose_106", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "875", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_106", + "weights" : { + + }, + "top" : "transpose_106" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "869", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_107", + "top" : "transpose_107", + "type" : "transpose", + "name" : "transpose_107" + }, + { + "bottom" : "transpose_107,transpose_106", + "weights" : { + + }, + "debug_info" : "input.209", + "top" : "input.209", + "type" : "batch_matmul", + "name" : "input.209", + "channel_mode" : false + }, + { + "bottom" : "input.209", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.211", + "top" : "input.211", + "type" : "softmax_nd", + "name" : "input.211" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "882", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_108", + "top" : "transpose_108", + "type" : "transpose", + "name" : "transpose_108" + }, + { + "bottom" : "input.211,transpose_108", + "weights" : { + + }, + "debug_info" : "attn_output.53", + "top" : "attn_output.53", + "type" : "batch_matmul", + "name" : "attn_output.53", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.53", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_105", + "top" : "transpose_105", + "type" : "transpose", + "name" : "transpose_105" + }, + { + "name" : "input.213", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_105", + "debug_info" : "input.213", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.213" + }, + { + "nB" : 768, + "top" : "input.215", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 471, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.213", + "blob_biases" : 469, + "has_tanh" : 0, + "debug_info" : "input.215", + "name" : "input.215", + "has_prelu" : 0 + }, + { + "bottom" : "input.207,input.215", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.217", + "nd_mode" : true, + "top" : "input.217", + "type" : "elementwise", + "name" : "input.217", + "beta" : 0 + }, + { + "name" : "input.219_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.217", + "debug_info" : "input.219_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.219_reshape" + }, + { + "bottom" : "input.219_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.219_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.219_mvn", + "type" : "l2_normalize", + "name" : "input.219_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.219_scale", + "constant_blob" : 473, + "top" : "input.219_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.219_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.219_scale", + "constant_blob" : 475, + "top" : "input.219_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.219_scale_constant_in_bias" + }, + { + "bottom" : "input.219_mvn,input.219_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.219_scale", + "top" : "input.219_scale_mul_out", + "type" : "elementwise", + "name" : "input.219_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.219_scale_mul_out,input.219_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.219_scale", + "top" : "input.219_scale", + "type" : "elementwise", + "name" : "input.219_scale", + "beta" : 0 + }, + { + "name" : "input.219", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.219_scale", + "debug_info" : "input.219", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.219" + }, + { + "nB" : 768, + "top" : "input.221", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 479, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.219", + "blob_biases" : 477, + "has_tanh" : 0, + "debug_info" : "input.221", + "name" : "input.221", + "has_prelu" : 0 + }, + { + "bottom" : "input.221", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.223", + "top" : "input.223", + "type" : "activation", + "name" : "input.223" + }, + { + "nB" : 3072, + "top" : "input.227", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 483, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.223", + "blob_biases" : 481, + "has_tanh" : 0, + "debug_info" : "input.227", + "name" : "input.227", + "has_prelu" : 0 + }, + { + "bottom" : "input.217,input.227", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.229", + "top" : "input.229", + "type" : "elementwise", + "name" : "input.229", + "beta" : 0 + }, + { + "name" : "hidden_states.71_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.229", + "debug_info" : "hidden_states.71_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.71_reshape" + }, + { + "bottom" : "hidden_states.71_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.71_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.71_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.71_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.71_scale", + "constant_blob" : 485, + "top" : "hidden_states.71_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.71_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.71_scale", + "constant_blob" : 487, + "top" : "hidden_states.71_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.71_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.71_mvn,hidden_states.71_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.71_scale", + "top" : "hidden_states.71_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.71_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.71_scale_mul_out,hidden_states.71_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.71_scale", + "top" : "hidden_states.71_scale", + "type" : "elementwise", + "name" : "hidden_states.71_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.71", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.71_scale", + "debug_info" : "hidden_states.71", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.71" + }, + { + "bottom" : "hidden_states.71", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "927_shape", + "top" : "927_shape", + "type" : "get_shape", + "name" : "927_shape" + }, + { + "top" : "gather_37_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_37_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_37_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 489 + }, + { + "bottom" : "927_shape,gather_37_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_37", + "axis" : 0, + "top" : "gather_37", + "type" : "gather_nd", + "name" : "gather_37", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "931", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 493, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.71", + "blob_biases" : 491, + "has_tanh" : 0, + "debug_info" : "931", + "name" : "931", + "has_prelu" : 0 + }, + { + "bottom" : "931", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.85", + "top" : "tensor.85", + "type" : "elementwise", + "name" : "tensor.85", + "beta" : 0 + }, + { + "name" : "935", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.85", + "debug_info" : "935", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "935" + }, + { + "nB" : 768, + "top" : "tensor.87", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 497, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.71", + "blob_biases" : 495, + "has_tanh" : 0, + "debug_info" : "tensor.87", + "name" : "tensor.87", + "has_prelu" : 0 + }, + { + "bottom" : "gather_37,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_65", + "top" : "concat_65", + "type" : "general_concat", + "name" : "concat_65" + }, + { + "name" : "941", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.87,concat_65", + "debug_info" : "941", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "941" + }, + { + "has_prelu" : 0, + "top" : "tensor.89", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 501, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.71", + "debug_info" : "tensor.89", + "has_tanh" : 0, + "blob_biases" : 499, + "name" : "tensor.89", + "nB" : 768 + }, + { + "name" : "948", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.89,concat_65", + "debug_info" : "948", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "948" + }, + { + "axis_seq" : 4, + "name" : "transpose_103", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "941", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_103", + "weights" : { + + }, + "top" : "transpose_103" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_103", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_101", + "top" : "transpose_101", + "type" : "transpose", + "name" : "transpose_101" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "935", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_104", + "top" : "transpose_104", + "type" : "transpose", + "name" : "transpose_104" + }, + { + "bottom" : "transpose_104,transpose_101", + "weights" : { + + }, + "debug_info" : "attn_weights.43", + "top" : "attn_weights.43", + "type" : "batch_matmul", + "name" : "attn_weights.43", + "channel_mode" : false + }, + { + "bottom" : "transpose_103", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "953_shape", + "top" : "953_shape", + "type" : "get_shape", + "name" : "953_shape" + }, + { + "top" : "gather_39_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_39_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_39_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 503 + }, + { + "bottom" : "953_shape,gather_39_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_39", + "axis" : 0, + "top" : "gather_39", + "type" : "gather_nd", + "name" : "gather_39", + "batch_dims" : 0 + }, + { + "top" : "concat_67_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_67_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_67_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 505 + }, + { + "top" : "concat_67_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_67_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_67_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 507 + }, + { + "top" : "concat_67_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_67_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_67_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 509 + }, + { + "bottom" : "concat_67_values0_0,concat_67_values1_0,concat_67_values2_0,gather_39", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_67", + "top" : "concat_67", + "type" : "general_concat", + "name" : "concat_67" + }, + { + "top" : "causal_mask.21_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.21_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.21_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 511 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.21", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.21", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.21", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.21_begin_0,concat_67", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.231", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.43,causal_mask.21", + "debug_info" : "input.231", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.231" + }, + { + "bottom" : "input.231", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.233", + "top" : "input.233", + "type" : "softmax_nd", + "name" : "input.233" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "948", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_102", + "top" : "transpose_102", + "type" : "transpose", + "name" : "transpose_102" + }, + { + "bottom" : "input.233,transpose_102", + "weights" : { + + }, + "debug_info" : "attn_output.57", + "top" : "attn_output.57", + "type" : "batch_matmul", + "name" : "attn_output.57", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.57", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_100", + "top" : "transpose_100", + "type" : "transpose", + "name" : "transpose_100" + }, + { + "name" : "input.235", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_100", + "debug_info" : "input.235", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.235" + }, + { + "nB" : 768, + "top" : "input.237", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 515, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.235", + "blob_biases" : 513, + "has_tanh" : 0, + "debug_info" : "input.237", + "name" : "input.237", + "has_prelu" : 0 + }, + { + "bottom" : "input.229,input.237", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.239", + "nd_mode" : true, + "top" : "input.239", + "type" : "elementwise", + "name" : "input.239", + "beta" : 0 + }, + { + "name" : "hidden_states.75_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.239", + "debug_info" : "hidden_states.75_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.75_reshape" + }, + { + "bottom" : "hidden_states.75_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.75_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.75_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.75_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.75_scale", + "constant_blob" : 517, + "top" : "hidden_states.75_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.75_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.75_scale", + "constant_blob" : 519, + "top" : "hidden_states.75_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.75_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.75_mvn,hidden_states.75_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.75_scale", + "top" : "hidden_states.75_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.75_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.75_scale_mul_out,hidden_states.75_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.75_scale", + "top" : "hidden_states.75_scale", + "type" : "elementwise", + "name" : "hidden_states.75_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.75", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.75_scale", + "debug_info" : "hidden_states.75", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.75" + }, + { + "bottom" : "hidden_states.75", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "978_shape", + "top" : "978_shape", + "type" : "get_shape", + "name" : "978_shape" + }, + { + "top" : "gather_40_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_40_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_40_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 521 + }, + { + "bottom" : "978_shape,gather_40_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_40", + "axis" : 0, + "top" : "gather_40", + "type" : "gather_nd", + "name" : "gather_40", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "982", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 525, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.75", + "blob_biases" : 523, + "has_tanh" : 0, + "debug_info" : "982", + "name" : "982", + "has_prelu" : 0 + }, + { + "bottom" : "982", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.91", + "top" : "tensor.91", + "type" : "elementwise", + "name" : "tensor.91", + "beta" : 0 + }, + { + "name" : "986", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.91", + "debug_info" : "986", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "986" + }, + { + "nB" : 768, + "top" : "tensor.93", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 529, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 527, + "has_tanh" : 0, + "debug_info" : "tensor.93", + "name" : "tensor.93", + "has_prelu" : 0 + }, + { + "bottom" : "gather_40,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_70", + "top" : "concat_70", + "type" : "general_concat", + "name" : "concat_70" + }, + { + "name" : "992", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.93,concat_70", + "debug_info" : "992", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "992" + }, + { + "has_prelu" : 0, + "top" : "tensor.95", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 533, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.95", + "has_tanh" : 0, + "blob_biases" : 531, + "name" : "tensor.95", + "nB" : 768 + }, + { + "name" : "999", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.95,concat_70", + "debug_info" : "999", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "999" + }, + { + "axis_seq" : 4, + "name" : "transpose_97", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "992", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_97", + "weights" : { + + }, + "top" : "transpose_97" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "986", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_98", + "top" : "transpose_98", + "type" : "transpose", + "name" : "transpose_98" + }, + { + "bottom" : "transpose_98,transpose_97", + "weights" : { + + }, + "debug_info" : "input.241", + "top" : "input.241", + "type" : "batch_matmul", + "name" : "input.241", + "channel_mode" : false + }, + { + "bottom" : "input.241", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.243", + "top" : "input.243", + "type" : "softmax_nd", + "name" : "input.243" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "999", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_99", + "top" : "transpose_99", + "type" : "transpose", + "name" : "transpose_99" + }, + { + "bottom" : "input.243,transpose_99", + "weights" : { + + }, + "debug_info" : "attn_output.61", + "top" : "attn_output.61", + "type" : "batch_matmul", + "name" : "attn_output.61", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.61", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_96", + "top" : "transpose_96", + "type" : "transpose", + "name" : "transpose_96" + }, + { + "name" : "input.245", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_96", + "debug_info" : "input.245", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.245" + }, + { + "nB" : 768, + "top" : "input.247", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 537, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.245", + "blob_biases" : 535, + "has_tanh" : 0, + "debug_info" : "input.247", + "name" : "input.247", + "has_prelu" : 0 + }, + { + "bottom" : "input.239,input.247", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.249", + "nd_mode" : true, + "top" : "input.249", + "type" : "elementwise", + "name" : "input.249", + "beta" : 0 + }, + { + "name" : "input.251_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.249", + "debug_info" : "input.251_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.251_reshape" + }, + { + "bottom" : "input.251_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.251_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.251_mvn", + "type" : "l2_normalize", + "name" : "input.251_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.251_scale", + "constant_blob" : 539, + "top" : "input.251_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.251_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.251_scale", + "constant_blob" : 541, + "top" : "input.251_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.251_scale_constant_in_bias" + }, + { + "bottom" : "input.251_mvn,input.251_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.251_scale", + "top" : "input.251_scale_mul_out", + "type" : "elementwise", + "name" : "input.251_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.251_scale_mul_out,input.251_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.251_scale", + "top" : "input.251_scale", + "type" : "elementwise", + "name" : "input.251_scale", + "beta" : 0 + }, + { + "name" : "input.251", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.251_scale", + "debug_info" : "input.251", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.251" + }, + { + "nB" : 768, + "top" : "input.253", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 545, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.251", + "blob_biases" : 543, + "has_tanh" : 0, + "debug_info" : "input.253", + "name" : "input.253", + "has_prelu" : 0 + }, + { + "bottom" : "input.253", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.255", + "top" : "input.255", + "type" : "activation", + "name" : "input.255" + }, + { + "nB" : 3072, + "top" : "input.259", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 549, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.255", + "blob_biases" : 547, + "has_tanh" : 0, + "debug_info" : "input.259", + "name" : "input.259", + "has_prelu" : 0 + }, + { + "bottom" : "input.249,input.259", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.261", + "top" : "input.261", + "type" : "elementwise", + "name" : "input.261", + "beta" : 0 + }, + { + "name" : "hidden_states.81_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.261", + "debug_info" : "hidden_states.81_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.81_reshape" + }, + { + "bottom" : "hidden_states.81_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.81_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.81_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.81_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.81_scale", + "constant_blob" : 551, + "top" : "hidden_states.81_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.81_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.81_scale", + "constant_blob" : 553, + "top" : "hidden_states.81_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.81_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.81_mvn,hidden_states.81_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.81_scale", + "top" : "hidden_states.81_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.81_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.81_scale_mul_out,hidden_states.81_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.81_scale", + "top" : "hidden_states.81_scale", + "type" : "elementwise", + "name" : "hidden_states.81_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.81", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.81_scale", + "debug_info" : "hidden_states.81", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.81" + }, + { + "bottom" : "hidden_states.81", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1044_shape", + "top" : "1044_shape", + "type" : "get_shape", + "name" : "1044_shape" + }, + { + "top" : "gather_42_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_42_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_42_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 555 + }, + { + "bottom" : "1044_shape,gather_42_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_42", + "axis" : 0, + "top" : "gather_42", + "type" : "gather_nd", + "name" : "gather_42", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1048", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 559, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.81", + "blob_biases" : 557, + "has_tanh" : 0, + "debug_info" : "1048", + "name" : "1048", + "has_prelu" : 0 + }, + { + "bottom" : "1048", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.97", + "top" : "tensor.97", + "type" : "elementwise", + "name" : "tensor.97", + "beta" : 0 + }, + { + "name" : "1052", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.97", + "debug_info" : "1052", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1052" + }, + { + "nB" : 768, + "top" : "tensor.99", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 563, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.81", + "blob_biases" : 561, + "has_tanh" : 0, + "debug_info" : "tensor.99", + "name" : "tensor.99", + "has_prelu" : 0 + }, + { + "bottom" : "gather_42,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_74", + "top" : "concat_74", + "type" : "general_concat", + "name" : "concat_74" + }, + { + "name" : "1058", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.99,concat_74", + "debug_info" : "1058", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1058" + }, + { + "has_prelu" : 0, + "top" : "tensor.101", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 567, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.81", + "debug_info" : "tensor.101", + "has_tanh" : 0, + "blob_biases" : 565, + "name" : "tensor.101", + "nB" : 768 + }, + { + "name" : "1065", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.101,concat_74", + "debug_info" : "1065", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1065" + }, + { + "axis_seq" : 4, + "name" : "transpose_94", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1058", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_94", + "weights" : { + + }, + "top" : "transpose_94" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_94", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_92", + "top" : "transpose_92", + "type" : "transpose", + "name" : "transpose_92" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1052", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_95", + "top" : "transpose_95", + "type" : "transpose", + "name" : "transpose_95" + }, + { + "bottom" : "transpose_95,transpose_92", + "weights" : { + + }, + "debug_info" : "attn_weights.49", + "top" : "attn_weights.49", + "type" : "batch_matmul", + "name" : "attn_weights.49", + "channel_mode" : false + }, + { + "bottom" : "transpose_94", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1070_shape", + "top" : "1070_shape", + "type" : "get_shape", + "name" : "1070_shape" + }, + { + "top" : "gather_44_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_44_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_44_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 569 + }, + { + "bottom" : "1070_shape,gather_44_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_44", + "axis" : 0, + "top" : "gather_44", + "type" : "gather_nd", + "name" : "gather_44", + "batch_dims" : 0 + }, + { + "top" : "concat_76_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_76_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_76_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 571 + }, + { + "top" : "concat_76_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_76_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_76_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 573 + }, + { + "top" : "concat_76_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_76_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_76_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 575 + }, + { + "bottom" : "concat_76_values0_0,concat_76_values1_0,concat_76_values2_0,gather_44", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_76", + "top" : "concat_76", + "type" : "general_concat", + "name" : "concat_76" + }, + { + "top" : "causal_mask.23_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.23_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.23_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 577 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.23", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.23", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.23", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.23_begin_0,concat_76", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.263", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.49,causal_mask.23", + "debug_info" : "input.263", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.263" + }, + { + "bottom" : "input.263", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.265", + "top" : "input.265", + "type" : "softmax_nd", + "name" : "input.265" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1065", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_93", + "top" : "transpose_93", + "type" : "transpose", + "name" : "transpose_93" + }, + { + "bottom" : "input.265,transpose_93", + "weights" : { + + }, + "debug_info" : "attn_output.65", + "top" : "attn_output.65", + "type" : "batch_matmul", + "name" : "attn_output.65", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.65", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_91", + "top" : "transpose_91", + "type" : "transpose", + "name" : "transpose_91" + }, + { + "name" : "input.267", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_91", + "debug_info" : "input.267", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.267" + }, + { + "nB" : 768, + "top" : "input.269", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 581, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.267", + "blob_biases" : 579, + "has_tanh" : 0, + "debug_info" : "input.269", + "name" : "input.269", + "has_prelu" : 0 + }, + { + "bottom" : "input.261,input.269", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.271", + "nd_mode" : true, + "top" : "input.271", + "type" : "elementwise", + "name" : "input.271", + "beta" : 0 + }, + { + "name" : "hidden_states.85_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.271", + "debug_info" : "hidden_states.85_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.85_reshape" + }, + { + "bottom" : "hidden_states.85_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.85_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.85_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.85_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.85_scale", + "constant_blob" : 583, + "top" : "hidden_states.85_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.85_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.85_scale", + "constant_blob" : 585, + "top" : "hidden_states.85_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.85_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.85_mvn,hidden_states.85_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.85_scale", + "top" : "hidden_states.85_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.85_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.85_scale_mul_out,hidden_states.85_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.85_scale", + "top" : "hidden_states.85_scale", + "type" : "elementwise", + "name" : "hidden_states.85_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.85", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.85_scale", + "debug_info" : "hidden_states.85", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.85" + }, + { + "bottom" : "hidden_states.85", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1095_shape", + "top" : "1095_shape", + "type" : "get_shape", + "name" : "1095_shape" + }, + { + "top" : "gather_45_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_45_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_45_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 587 + }, + { + "bottom" : "1095_shape,gather_45_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_45", + "axis" : 0, + "top" : "gather_45", + "type" : "gather_nd", + "name" : "gather_45", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1099", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 591, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.85", + "blob_biases" : 589, + "has_tanh" : 0, + "debug_info" : "1099", + "name" : "1099", + "has_prelu" : 0 + }, + { + "bottom" : "1099", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.103", + "top" : "tensor.103", + "type" : "elementwise", + "name" : "tensor.103", + "beta" : 0 + }, + { + "name" : "1103", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.103", + "debug_info" : "1103", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1103" + }, + { + "nB" : 768, + "top" : "tensor.105", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 595, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 593, + "has_tanh" : 0, + "debug_info" : "tensor.105", + "name" : "tensor.105", + "has_prelu" : 0 + }, + { + "bottom" : "gather_45,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_79", + "top" : "concat_79", + "type" : "general_concat", + "name" : "concat_79" + }, + { + "name" : "1109", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.105,concat_79", + "debug_info" : "1109", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1109" + }, + { + "has_prelu" : 0, + "top" : "tensor.107", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 599, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.107", + "has_tanh" : 0, + "blob_biases" : 597, + "name" : "tensor.107", + "nB" : 768 + }, + { + "name" : "1116", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.107,concat_79", + "debug_info" : "1116", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1116" + }, + { + "axis_seq" : 4, + "name" : "transpose_88", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1109", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_88", + "weights" : { + + }, + "top" : "transpose_88" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1103", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_89", + "top" : "transpose_89", + "type" : "transpose", + "name" : "transpose_89" + }, + { + "bottom" : "transpose_89,transpose_88", + "weights" : { + + }, + "debug_info" : "input.273", + "top" : "input.273", + "type" : "batch_matmul", + "name" : "input.273", + "channel_mode" : false + }, + { + "bottom" : "input.273", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.275", + "top" : "input.275", + "type" : "softmax_nd", + "name" : "input.275" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1116", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_90", + "top" : "transpose_90", + "type" : "transpose", + "name" : "transpose_90" + }, + { + "bottom" : "input.275,transpose_90", + "weights" : { + + }, + "debug_info" : "attn_output.69", + "top" : "attn_output.69", + "type" : "batch_matmul", + "name" : "attn_output.69", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.69", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_87", + "top" : "transpose_87", + "type" : "transpose", + "name" : "transpose_87" + }, + { + "name" : "input.277", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_87", + "debug_info" : "input.277", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.277" + }, + { + "nB" : 768, + "top" : "input.279", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 603, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.277", + "blob_biases" : 601, + "has_tanh" : 0, + "debug_info" : "input.279", + "name" : "input.279", + "has_prelu" : 0 + }, + { + "bottom" : "input.271,input.279", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.281", + "nd_mode" : true, + "top" : "input.281", + "type" : "elementwise", + "name" : "input.281", + "beta" : 0 + }, + { + "name" : "input.283_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.281", + "debug_info" : "input.283_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.283_reshape" + }, + { + "bottom" : "input.283_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.283_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.283_mvn", + "type" : "l2_normalize", + "name" : "input.283_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.283_scale", + "constant_blob" : 605, + "top" : "input.283_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.283_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.283_scale", + "constant_blob" : 607, + "top" : "input.283_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.283_scale_constant_in_bias" + }, + { + "bottom" : "input.283_mvn,input.283_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.283_scale", + "top" : "input.283_scale_mul_out", + "type" : "elementwise", + "name" : "input.283_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.283_scale_mul_out,input.283_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.283_scale", + "top" : "input.283_scale", + "type" : "elementwise", + "name" : "input.283_scale", + "beta" : 0 + }, + { + "name" : "input.283", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.283_scale", + "debug_info" : "input.283", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.283" + }, + { + "nB" : 768, + "top" : "input.285", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 611, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.283", + "blob_biases" : 609, + "has_tanh" : 0, + "debug_info" : "input.285", + "name" : "input.285", + "has_prelu" : 0 + }, + { + "bottom" : "input.285", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.287", + "top" : "input.287", + "type" : "activation", + "name" : "input.287" + }, + { + "nB" : 3072, + "top" : "input.291", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 615, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.287", + "blob_biases" : 613, + "has_tanh" : 0, + "debug_info" : "input.291", + "name" : "input.291", + "has_prelu" : 0 + }, + { + "bottom" : "input.281,input.291", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.293", + "top" : "input.293", + "type" : "elementwise", + "name" : "input.293", + "beta" : 0 + }, + { + "name" : "hidden_states.91_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.293", + "debug_info" : "hidden_states.91_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.91_reshape" + }, + { + "bottom" : "hidden_states.91_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.91_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.91_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.91_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.91_scale", + "constant_blob" : 617, + "top" : "hidden_states.91_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.91_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.91_scale", + "constant_blob" : 619, + "top" : "hidden_states.91_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.91_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.91_mvn,hidden_states.91_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.91_scale", + "top" : "hidden_states.91_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.91_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.91_scale_mul_out,hidden_states.91_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.91_scale", + "top" : "hidden_states.91_scale", + "type" : "elementwise", + "name" : "hidden_states.91_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.91", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.91_scale", + "debug_info" : "hidden_states.91", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.91" + }, + { + "bottom" : "hidden_states.91", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1161_shape", + "top" : "1161_shape", + "type" : "get_shape", + "name" : "1161_shape" + }, + { + "top" : "gather_47_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_47_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_47_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 621 + }, + { + "bottom" : "1161_shape,gather_47_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_47", + "axis" : 0, + "top" : "gather_47", + "type" : "gather_nd", + "name" : "gather_47", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1165", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 625, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.91", + "blob_biases" : 623, + "has_tanh" : 0, + "debug_info" : "1165", + "name" : "1165", + "has_prelu" : 0 + }, + { + "bottom" : "1165", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.109", + "top" : "tensor.109", + "type" : "elementwise", + "name" : "tensor.109", + "beta" : 0 + }, + { + "name" : "1169", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.109", + "debug_info" : "1169", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1169" + }, + { + "nB" : 768, + "top" : "tensor.111", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 629, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.91", + "blob_biases" : 627, + "has_tanh" : 0, + "debug_info" : "tensor.111", + "name" : "tensor.111", + "has_prelu" : 0 + }, + { + "bottom" : "gather_47,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_83", + "top" : "concat_83", + "type" : "general_concat", + "name" : "concat_83" + }, + { + "name" : "1175", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.111,concat_83", + "debug_info" : "1175", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1175" + }, + { + "has_prelu" : 0, + "top" : "tensor.113", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 633, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.91", + "debug_info" : "tensor.113", + "has_tanh" : 0, + "blob_biases" : 631, + "name" : "tensor.113", + "nB" : 768 + }, + { + "name" : "1182", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.113,concat_83", + "debug_info" : "1182", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1182" + }, + { + "axis_seq" : 4, + "name" : "transpose_85", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1175", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_85", + "weights" : { + + }, + "top" : "transpose_85" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_85", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_83", + "top" : "transpose_83", + "type" : "transpose", + "name" : "transpose_83" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1169", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_86", + "top" : "transpose_86", + "type" : "transpose", + "name" : "transpose_86" + }, + { + "bottom" : "transpose_86,transpose_83", + "weights" : { + + }, + "debug_info" : "attn_weights.55", + "top" : "attn_weights.55", + "type" : "batch_matmul", + "name" : "attn_weights.55", + "channel_mode" : false + }, + { + "bottom" : "transpose_85", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1187_shape", + "top" : "1187_shape", + "type" : "get_shape", + "name" : "1187_shape" + }, + { + "top" : "gather_49_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_49_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_49_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 635 + }, + { + "bottom" : "1187_shape,gather_49_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_49", + "axis" : 0, + "top" : "gather_49", + "type" : "gather_nd", + "name" : "gather_49", + "batch_dims" : 0 + }, + { + "top" : "concat_85_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_85_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_85_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 637 + }, + { + "top" : "concat_85_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_85_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_85_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 639 + }, + { + "top" : "concat_85_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_85_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_85_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 641 + }, + { + "bottom" : "concat_85_values0_0,concat_85_values1_0,concat_85_values2_0,gather_49", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_85", + "top" : "concat_85", + "type" : "general_concat", + "name" : "concat_85" + }, + { + "top" : "causal_mask.25_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.25_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.25_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 643 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.25", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.25", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.25", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.25_begin_0,concat_85", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.295", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.55,causal_mask.25", + "debug_info" : "input.295", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.295" + }, + { + "bottom" : "input.295", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.297", + "top" : "input.297", + "type" : "softmax_nd", + "name" : "input.297" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1182", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_84", + "top" : "transpose_84", + "type" : "transpose", + "name" : "transpose_84" + }, + { + "bottom" : "input.297,transpose_84", + "weights" : { + + }, + "debug_info" : "attn_output.73", + "top" : "attn_output.73", + "type" : "batch_matmul", + "name" : "attn_output.73", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.73", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_82", + "top" : "transpose_82", + "type" : "transpose", + "name" : "transpose_82" + }, + { + "name" : "input.299", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_82", + "debug_info" : "input.299", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.299" + }, + { + "nB" : 768, + "top" : "input.301", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 647, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.299", + "blob_biases" : 645, + "has_tanh" : 0, + "debug_info" : "input.301", + "name" : "input.301", + "has_prelu" : 0 + }, + { + "bottom" : "input.293,input.301", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.303", + "nd_mode" : true, + "top" : "input.303", + "type" : "elementwise", + "name" : "input.303", + "beta" : 0 + }, + { + "name" : "hidden_states.95_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.303", + "debug_info" : "hidden_states.95_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.95_reshape" + }, + { + "bottom" : "hidden_states.95_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.95_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.95_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.95_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.95_scale", + "constant_blob" : 649, + "top" : "hidden_states.95_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.95_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.95_scale", + "constant_blob" : 651, + "top" : "hidden_states.95_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.95_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.95_mvn,hidden_states.95_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.95_scale", + "top" : "hidden_states.95_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.95_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.95_scale_mul_out,hidden_states.95_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.95_scale", + "top" : "hidden_states.95_scale", + "type" : "elementwise", + "name" : "hidden_states.95_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.95", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.95_scale", + "debug_info" : "hidden_states.95", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.95" + }, + { + "bottom" : "hidden_states.95", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1212_shape", + "top" : "1212_shape", + "type" : "get_shape", + "name" : "1212_shape" + }, + { + "top" : "gather_50_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_50_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_50_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 653 + }, + { + "bottom" : "1212_shape,gather_50_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_50", + "axis" : 0, + "top" : "gather_50", + "type" : "gather_nd", + "name" : "gather_50", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1216", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 657, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.95", + "blob_biases" : 655, + "has_tanh" : 0, + "debug_info" : "1216", + "name" : "1216", + "has_prelu" : 0 + }, + { + "bottom" : "1216", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.115", + "top" : "tensor.115", + "type" : "elementwise", + "name" : "tensor.115", + "beta" : 0 + }, + { + "name" : "1220", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.115", + "debug_info" : "1220", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1220" + }, + { + "nB" : 768, + "top" : "tensor.117", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 661, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 659, + "has_tanh" : 0, + "debug_info" : "tensor.117", + "name" : "tensor.117", + "has_prelu" : 0 + }, + { + "bottom" : "gather_50,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_88", + "top" : "concat_88", + "type" : "general_concat", + "name" : "concat_88" + }, + { + "name" : "1226", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.117,concat_88", + "debug_info" : "1226", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1226" + }, + { + "has_prelu" : 0, + "top" : "tensor.119", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 665, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.119", + "has_tanh" : 0, + "blob_biases" : 663, + "name" : "tensor.119", + "nB" : 768 + }, + { + "name" : "1233", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.119,concat_88", + "debug_info" : "1233", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1233" + }, + { + "axis_seq" : 4, + "name" : "transpose_79", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1226", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_79", + "weights" : { + + }, + "top" : "transpose_79" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1220", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_80", + "top" : "transpose_80", + "type" : "transpose", + "name" : "transpose_80" + }, + { + "bottom" : "transpose_80,transpose_79", + "weights" : { + + }, + "debug_info" : "input.305", + "top" : "input.305", + "type" : "batch_matmul", + "name" : "input.305", + "channel_mode" : false + }, + { + "bottom" : "input.305", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.307", + "top" : "input.307", + "type" : "softmax_nd", + "name" : "input.307" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1233", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_81", + "top" : "transpose_81", + "type" : "transpose", + "name" : "transpose_81" + }, + { + "bottom" : "input.307,transpose_81", + "weights" : { + + }, + "debug_info" : "attn_output.77", + "top" : "attn_output.77", + "type" : "batch_matmul", + "name" : "attn_output.77", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.77", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_78", + "top" : "transpose_78", + "type" : "transpose", + "name" : "transpose_78" + }, + { + "name" : "input.309", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_78", + "debug_info" : "input.309", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.309" + }, + { + "nB" : 768, + "top" : "input.311", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 669, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.309", + "blob_biases" : 667, + "has_tanh" : 0, + "debug_info" : "input.311", + "name" : "input.311", + "has_prelu" : 0 + }, + { + "bottom" : "input.303,input.311", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.313", + "nd_mode" : true, + "top" : "input.313", + "type" : "elementwise", + "name" : "input.313", + "beta" : 0 + }, + { + "name" : "input.315_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.313", + "debug_info" : "input.315_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.315_reshape" + }, + { + "bottom" : "input.315_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.315_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.315_mvn", + "type" : "l2_normalize", + "name" : "input.315_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.315_scale", + "constant_blob" : 671, + "top" : "input.315_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.315_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.315_scale", + "constant_blob" : 673, + "top" : "input.315_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.315_scale_constant_in_bias" + }, + { + "bottom" : "input.315_mvn,input.315_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.315_scale", + "top" : "input.315_scale_mul_out", + "type" : "elementwise", + "name" : "input.315_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.315_scale_mul_out,input.315_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.315_scale", + "top" : "input.315_scale", + "type" : "elementwise", + "name" : "input.315_scale", + "beta" : 0 + }, + { + "name" : "input.315", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.315_scale", + "debug_info" : "input.315", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.315" + }, + { + "nB" : 768, + "top" : "input.317", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 677, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.315", + "blob_biases" : 675, + "has_tanh" : 0, + "debug_info" : "input.317", + "name" : "input.317", + "has_prelu" : 0 + }, + { + "bottom" : "input.317", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.319", + "top" : "input.319", + "type" : "activation", + "name" : "input.319" + }, + { + "nB" : 3072, + "top" : "input.323", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 681, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.319", + "blob_biases" : 679, + "has_tanh" : 0, + "debug_info" : "input.323", + "name" : "input.323", + "has_prelu" : 0 + }, + { + "bottom" : "input.313,input.323", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.325", + "top" : "input.325", + "type" : "elementwise", + "name" : "input.325", + "beta" : 0 + }, + { + "name" : "hidden_states.101_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.325", + "debug_info" : "hidden_states.101_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.101_reshape" + }, + { + "bottom" : "hidden_states.101_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.101_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.101_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.101_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.101_scale", + "constant_blob" : 683, + "top" : "hidden_states.101_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.101_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.101_scale", + "constant_blob" : 685, + "top" : "hidden_states.101_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.101_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.101_mvn,hidden_states.101_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.101_scale", + "top" : "hidden_states.101_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.101_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.101_scale_mul_out,hidden_states.101_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.101_scale", + "top" : "hidden_states.101_scale", + "type" : "elementwise", + "name" : "hidden_states.101_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.101", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.101_scale", + "debug_info" : "hidden_states.101", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.101" + }, + { + "bottom" : "hidden_states.101", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1278_shape", + "top" : "1278_shape", + "type" : "get_shape", + "name" : "1278_shape" + }, + { + "top" : "gather_52_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_52_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_52_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 687 + }, + { + "bottom" : "1278_shape,gather_52_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_52", + "axis" : 0, + "top" : "gather_52", + "type" : "gather_nd", + "name" : "gather_52", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1282", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 691, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.101", + "blob_biases" : 689, + "has_tanh" : 0, + "debug_info" : "1282", + "name" : "1282", + "has_prelu" : 0 + }, + { + "bottom" : "1282", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.121", + "top" : "tensor.121", + "type" : "elementwise", + "name" : "tensor.121", + "beta" : 0 + }, + { + "name" : "1286", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.121", + "debug_info" : "1286", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1286" + }, + { + "nB" : 768, + "top" : "tensor.123", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 695, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.101", + "blob_biases" : 693, + "has_tanh" : 0, + "debug_info" : "tensor.123", + "name" : "tensor.123", + "has_prelu" : 0 + }, + { + "bottom" : "gather_52,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_92", + "top" : "concat_92", + "type" : "general_concat", + "name" : "concat_92" + }, + { + "name" : "1292", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.123,concat_92", + "debug_info" : "1292", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1292" + }, + { + "has_prelu" : 0, + "top" : "tensor.125", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 699, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.101", + "debug_info" : "tensor.125", + "has_tanh" : 0, + "blob_biases" : 697, + "name" : "tensor.125", + "nB" : 768 + }, + { + "name" : "1299", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.125,concat_92", + "debug_info" : "1299", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1299" + }, + { + "axis_seq" : 4, + "name" : "transpose_76", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1292", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_76", + "weights" : { + + }, + "top" : "transpose_76" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_76", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_74", + "top" : "transpose_74", + "type" : "transpose", + "name" : "transpose_74" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1286", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_77", + "top" : "transpose_77", + "type" : "transpose", + "name" : "transpose_77" + }, + { + "bottom" : "transpose_77,transpose_74", + "weights" : { + + }, + "debug_info" : "attn_weights.61", + "top" : "attn_weights.61", + "type" : "batch_matmul", + "name" : "attn_weights.61", + "channel_mode" : false + }, + { + "bottom" : "transpose_76", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1304_shape", + "top" : "1304_shape", + "type" : "get_shape", + "name" : "1304_shape" + }, + { + "top" : "gather_54_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_54_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_54_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 701 + }, + { + "bottom" : "1304_shape,gather_54_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_54", + "axis" : 0, + "top" : "gather_54", + "type" : "gather_nd", + "name" : "gather_54", + "batch_dims" : 0 + }, + { + "top" : "concat_94_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_94_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_94_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 703 + }, + { + "top" : "concat_94_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_94_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_94_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 705 + }, + { + "top" : "concat_94_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_94_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_94_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 707 + }, + { + "bottom" : "concat_94_values0_0,concat_94_values1_0,concat_94_values2_0,gather_54", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_94", + "top" : "concat_94", + "type" : "general_concat", + "name" : "concat_94" + }, + { + "top" : "causal_mask.27_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask.27_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask.27_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 709 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask.27", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask.27", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask.27", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask.27_begin_0,concat_94", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.327", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.61,causal_mask.27", + "debug_info" : "input.327", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.327" + }, + { + "bottom" : "input.327", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.329", + "top" : "input.329", + "type" : "softmax_nd", + "name" : "input.329" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1299", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_75", + "top" : "transpose_75", + "type" : "transpose", + "name" : "transpose_75" + }, + { + "bottom" : "input.329,transpose_75", + "weights" : { + + }, + "debug_info" : "attn_output.81", + "top" : "attn_output.81", + "type" : "batch_matmul", + "name" : "attn_output.81", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.81", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_73", + "top" : "transpose_73", + "type" : "transpose", + "name" : "transpose_73" + }, + { + "name" : "input.331", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_73", + "debug_info" : "input.331", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.331" + }, + { + "nB" : 768, + "top" : "input.333", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 713, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.331", + "blob_biases" : 711, + "has_tanh" : 0, + "debug_info" : "input.333", + "name" : "input.333", + "has_prelu" : 0 + }, + { + "bottom" : "input.325,input.333", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.335", + "nd_mode" : true, + "top" : "input.335", + "type" : "elementwise", + "name" : "input.335", + "beta" : 0 + }, + { + "name" : "hidden_states.105_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.335", + "debug_info" : "hidden_states.105_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.105_reshape" + }, + { + "bottom" : "hidden_states.105_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.105_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.105_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.105_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.105_scale", + "constant_blob" : 715, + "top" : "hidden_states.105_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.105_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.105_scale", + "constant_blob" : 717, + "top" : "hidden_states.105_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.105_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.105_mvn,hidden_states.105_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.105_scale", + "top" : "hidden_states.105_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.105_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.105_scale_mul_out,hidden_states.105_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.105_scale", + "top" : "hidden_states.105_scale", + "type" : "elementwise", + "name" : "hidden_states.105_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.105", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.105_scale", + "debug_info" : "hidden_states.105", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.105" + }, + { + "bottom" : "hidden_states.105", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1329_shape", + "top" : "1329_shape", + "type" : "get_shape", + "name" : "1329_shape" + }, + { + "top" : "gather_55_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_55_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_55_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 719 + }, + { + "bottom" : "1329_shape,gather_55_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_55", + "axis" : 0, + "top" : "gather_55", + "type" : "gather_nd", + "name" : "gather_55", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1333", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 723, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.105", + "blob_biases" : 721, + "has_tanh" : 0, + "debug_info" : "1333", + "name" : "1333", + "has_prelu" : 0 + }, + { + "bottom" : "1333", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.127", + "top" : "tensor.127", + "type" : "elementwise", + "name" : "tensor.127", + "beta" : 0 + }, + { + "name" : "1337", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.127", + "debug_info" : "1337", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1337" + }, + { + "nB" : 768, + "top" : "tensor.129", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 727, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 725, + "has_tanh" : 0, + "debug_info" : "tensor.129", + "name" : "tensor.129", + "has_prelu" : 0 + }, + { + "bottom" : "gather_55,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_97", + "top" : "concat_97", + "type" : "general_concat", + "name" : "concat_97" + }, + { + "name" : "1343", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.129,concat_97", + "debug_info" : "1343", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1343" + }, + { + "has_prelu" : 0, + "top" : "tensor.131", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 731, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor.131", + "has_tanh" : 0, + "blob_biases" : 729, + "name" : "tensor.131", + "nB" : 768 + }, + { + "name" : "1350", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.131,concat_97", + "debug_info" : "1350", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1350" + }, + { + "axis_seq" : 4, + "name" : "transpose_70", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1343", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_70", + "weights" : { + + }, + "top" : "transpose_70" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1337", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_71", + "top" : "transpose_71", + "type" : "transpose", + "name" : "transpose_71" + }, + { + "bottom" : "transpose_71,transpose_70", + "weights" : { + + }, + "debug_info" : "input.337", + "top" : "input.337", + "type" : "batch_matmul", + "name" : "input.337", + "channel_mode" : false + }, + { + "bottom" : "input.337", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.339", + "top" : "input.339", + "type" : "softmax_nd", + "name" : "input.339" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1350", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_72", + "top" : "transpose_72", + "type" : "transpose", + "name" : "transpose_72" + }, + { + "bottom" : "input.339,transpose_72", + "weights" : { + + }, + "debug_info" : "attn_output.85", + "top" : "attn_output.85", + "type" : "batch_matmul", + "name" : "attn_output.85", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.85", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_69", + "top" : "transpose_69", + "type" : "transpose", + "name" : "transpose_69" + }, + { + "name" : "input.341", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_69", + "debug_info" : "input.341", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.341" + }, + { + "nB" : 768, + "top" : "input.343", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 735, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.341", + "blob_biases" : 733, + "has_tanh" : 0, + "debug_info" : "input.343", + "name" : "input.343", + "has_prelu" : 0 + }, + { + "bottom" : "input.335,input.343", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.345", + "nd_mode" : true, + "top" : "input.345", + "type" : "elementwise", + "name" : "input.345", + "beta" : 0 + }, + { + "name" : "input.347_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.345", + "debug_info" : "input.347_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.347_reshape" + }, + { + "bottom" : "input.347_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.347_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.347_mvn", + "type" : "l2_normalize", + "name" : "input.347_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.347_scale", + "constant_blob" : 737, + "top" : "input.347_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.347_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.347_scale", + "constant_blob" : 739, + "top" : "input.347_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.347_scale_constant_in_bias" + }, + { + "bottom" : "input.347_mvn,input.347_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.347_scale", + "top" : "input.347_scale_mul_out", + "type" : "elementwise", + "name" : "input.347_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.347_scale_mul_out,input.347_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.347_scale", + "top" : "input.347_scale", + "type" : "elementwise", + "name" : "input.347_scale", + "beta" : 0 + }, + { + "name" : "input.347", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.347_scale", + "debug_info" : "input.347", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.347" + }, + { + "nB" : 768, + "top" : "input.349", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 743, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.347", + "blob_biases" : 741, + "has_tanh" : 0, + "debug_info" : "input.349", + "name" : "input.349", + "has_prelu" : 0 + }, + { + "bottom" : "input.349", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.351", + "top" : "input.351", + "type" : "activation", + "name" : "input.351" + }, + { + "nB" : 3072, + "top" : "input.355", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 747, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.351", + "blob_biases" : 745, + "has_tanh" : 0, + "debug_info" : "input.355", + "name" : "input.355", + "has_prelu" : 0 + }, + { + "bottom" : "input.345,input.355", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.357", + "top" : "input.357", + "type" : "elementwise", + "name" : "input.357", + "beta" : 0 + }, + { + "name" : "hidden_states.111_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.357", + "debug_info" : "hidden_states.111_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.111_reshape" + }, + { + "bottom" : "hidden_states.111_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.111_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.111_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.111_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.111_scale", + "constant_blob" : 749, + "top" : "hidden_states.111_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.111_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.111_scale", + "constant_blob" : 751, + "top" : "hidden_states.111_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.111_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.111_mvn,hidden_states.111_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.111_scale", + "top" : "hidden_states.111_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.111_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.111_scale_mul_out,hidden_states.111_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.111_scale", + "top" : "hidden_states.111_scale", + "type" : "elementwise", + "name" : "hidden_states.111_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.111", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.111_scale", + "debug_info" : "hidden_states.111", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.111" + }, + { + "bottom" : "hidden_states.111", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1395_shape", + "top" : "1395_shape", + "type" : "get_shape", + "name" : "1395_shape" + }, + { + "top" : "gather_57_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_57_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_57_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 753 + }, + { + "bottom" : "1395_shape,gather_57_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_57", + "axis" : 0, + "top" : "gather_57", + "type" : "gather_nd", + "name" : "gather_57", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1399", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 757, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.111", + "blob_biases" : 755, + "has_tanh" : 0, + "debug_info" : "1399", + "name" : "1399", + "has_prelu" : 0 + }, + { + "bottom" : "1399", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.133", + "top" : "tensor.133", + "type" : "elementwise", + "name" : "tensor.133", + "beta" : 0 + }, + { + "name" : "1403", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.133", + "debug_info" : "1403", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1403" + }, + { + "nB" : 768, + "top" : "tensor.135", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 761, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.111", + "blob_biases" : 759, + "has_tanh" : 0, + "debug_info" : "tensor.135", + "name" : "tensor.135", + "has_prelu" : 0 + }, + { + "bottom" : "gather_57,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_101", + "top" : "concat_101", + "type" : "general_concat", + "name" : "concat_101" + }, + { + "name" : "1409", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.135,concat_101", + "debug_info" : "1409", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1409" + }, + { + "has_prelu" : 0, + "top" : "tensor.137", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 765, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "hidden_states.111", + "debug_info" : "tensor.137", + "has_tanh" : 0, + "blob_biases" : 763, + "name" : "tensor.137", + "nB" : 768 + }, + { + "name" : "1416", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.137,concat_101", + "debug_info" : "1416", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1416" + }, + { + "axis_seq" : 4, + "name" : "transpose_67", + "axis_n" : 3, + "axis_h" : 2, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1409", + "axis_w" : 0, + "axis_k" : 1, + "debug_info" : "transpose_67", + "weights" : { + + }, + "top" : "transpose_67" + }, + { + "axis_h" : 0, + "axis_w" : 1, + "bottom" : "transpose_67", + "axis_k" : 2, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_65", + "top" : "transpose_65", + "type" : "transpose", + "name" : "transpose_65" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1403", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_68", + "top" : "transpose_68", + "type" : "transpose", + "name" : "transpose_68" + }, + { + "bottom" : "transpose_68,transpose_65", + "weights" : { + + }, + "debug_info" : "attn_weights.67", + "top" : "attn_weights.67", + "type" : "batch_matmul", + "name" : "attn_weights.67", + "channel_mode" : false + }, + { + "bottom" : "transpose_67", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1421_shape", + "top" : "1421_shape", + "type" : "get_shape", + "name" : "1421_shape" + }, + { + "top" : "gather_59_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_59_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_59_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 767 + }, + { + "bottom" : "1421_shape,gather_59_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_59", + "axis" : 0, + "top" : "gather_59", + "type" : "gather_nd", + "name" : "gather_59", + "batch_dims" : 0 + }, + { + "top" : "concat_103_values0_0", + "w" : 1, + "h" : 1, + "name" : "concat_103_values0_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_103_values0_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 769 + }, + { + "top" : "concat_103_values1_0", + "w" : 1, + "h" : 1, + "name" : "concat_103_values1_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_103_values1_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 771 + }, + { + "top" : "concat_103_values2_0", + "w" : 1, + "h" : 1, + "name" : "concat_103_values2_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "concat_103_values2_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 773 + }, + { + "bottom" : "concat_103_values0_0,concat_103_values1_0,concat_103_values2_0,gather_59", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_103", + "top" : "concat_103", + "type" : "general_concat", + "name" : "concat_103" + }, + { + "top" : "causal_mask_begin_0", + "w" : 4, + "h" : 1, + "name" : "causal_mask_begin_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "causal_mask_begin_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 775 + }, + { + "begin_n" : 0, + "static_sizes_k" : 0, + "end_mask_h" : true, + "end_w" : 1, + "end_h" : 1, + "begin_h" : 0, + "stride_n" : 1, + "end_seq" : 1, + "begin_mask_seq" : false, + "begin_mask_h" : false, + "squeeze_mask_h" : false, + "stride_k" : 1, + "type" : "general_slice", + "begin_mask_k" : false, + "static_sizes_w" : 0, + "begin_w" : 0, + "end_mask_k" : true, + "use_static_sizes" : false, + "stride_h" : 1, + "static_sizes_n" : 0, + "begin_mask_n" : false, + "weights" : { + + }, + "name" : "causal_mask", + "stride_seq" : 1, + "end_k" : 1, + "static_sizes_seq" : 0, + "top" : "causal_mask", + "begin_seq" : 0, + "squeeze_mask_k" : false, + "begin_k" : 0, + "end_mask_w" : true, + "end_mask_seq" : false, + "end_mask_n" : false, + "debug_info" : "causal_mask", + "stride_w" : 1, + "squeeze_mask_seq" : false, + "static_sizes_h" : 0, + "begin_mask_w" : false, + "bottom" : "attention_mask,causal_mask_begin_0,concat_103", + "end_n" : 1, + "squeeze_mask_w" : false, + "squeeze_mask_n" : false + }, + { + "name" : "input.359", + "fused_relu" : 0, + "beta" : 0, + "operation" : 0, + "type" : "elementwise", + "alpha" : 1, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "attn_weights.67,causal_mask", + "debug_info" : "input.359", + "nd_mode" : true, + "weights" : { + + }, + "top" : "input.359" + }, + { + "bottom" : "input.359", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.361", + "top" : "input.361", + "type" : "softmax_nd", + "name" : "input.361" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1416", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_66", + "top" : "transpose_66", + "type" : "transpose", + "name" : "transpose_66" + }, + { + "bottom" : "input.361,transpose_66", + "weights" : { + + }, + "debug_info" : "attn_output.89", + "top" : "attn_output.89", + "type" : "batch_matmul", + "name" : "attn_output.89", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.89", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_64", + "top" : "transpose_64", + "type" : "transpose", + "name" : "transpose_64" + }, + { + "name" : "input.363", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_64", + "debug_info" : "input.363", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.363" + }, + { + "nB" : 768, + "top" : "input.365", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 779, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.363", + "blob_biases" : 777, + "has_tanh" : 0, + "debug_info" : "input.365", + "name" : "input.365", + "has_prelu" : 0 + }, + { + "bottom" : "input.357,input.365", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.367", + "nd_mode" : true, + "top" : "input.367", + "type" : "elementwise", + "name" : "input.367", + "beta" : 0 + }, + { + "name" : "hidden_states.115_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.367", + "debug_info" : "hidden_states.115_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "hidden_states.115_reshape" + }, + { + "bottom" : "hidden_states.115_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "hidden_states.115_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "hidden_states.115_mvn", + "type" : "l2_normalize", + "name" : "hidden_states.115_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.115_scale", + "constant_blob" : 781, + "top" : "hidden_states.115_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.115_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "hidden_states.115_scale", + "constant_blob" : 783, + "top" : "hidden_states.115_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "hidden_states.115_scale_constant_in_bias" + }, + { + "bottom" : "hidden_states.115_mvn,hidden_states.115_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.115_scale", + "top" : "hidden_states.115_scale_mul_out", + "type" : "elementwise", + "name" : "hidden_states.115_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "hidden_states.115_scale_mul_out,hidden_states.115_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "hidden_states.115_scale", + "top" : "hidden_states.115_scale", + "type" : "elementwise", + "name" : "hidden_states.115_scale", + "beta" : 0 + }, + { + "name" : "hidden_states.115", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "hidden_states.115_scale", + "debug_info" : "hidden_states.115", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "hidden_states.115" + }, + { + "bottom" : "hidden_states.115", + "weights" : { + + }, + "nd_mode" : 1, + "axis" : -1, + "debug_info" : "1446_shape", + "top" : "1446_shape", + "type" : "get_shape", + "name" : "1446_shape" + }, + { + "top" : "gather_60_indices_0", + "w" : 1, + "h" : 1, + "name" : "gather_60_indices_0", + "nd_rank" : 1, + "type" : "load_constant", + "k" : 1, + "bottom" : "", + "debug_info" : "gather_60_indices_0", + "n" : 1, + "weights" : { + + }, + "constant_blob" : 785 + }, + { + "bottom" : "1446_shape,gather_60_indices_0", + "validate_indices" : true, + "allow_negative_indices" : true, + "mode" : 0, + "weights" : { + + }, + "debug_info" : "gather_60", + "axis" : 0, + "top" : "gather_60", + "type" : "gather_nd", + "name" : "gather_60", + "batch_dims" : 0 + }, + { + "nB" : 768, + "top" : "1450", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 789, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "hidden_states.115", + "blob_biases" : 787, + "has_tanh" : 0, + "debug_info" : "1450", + "name" : "1450", + "has_prelu" : 0 + }, + { + "bottom" : "1450", + "alpha" : 0.125, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "tensor.139", + "top" : "tensor.139", + "type" : "elementwise", + "name" : "tensor.139", + "beta" : 0 + }, + { + "name" : "1454", + "weights" : { + + }, + "dst_w" : 64, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 12, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "tensor.139", + "debug_info" : "1454", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "1454" + }, + { + "nB" : 768, + "top" : "tensor.141", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 793, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "encoder_hidden_states", + "blob_biases" : 791, + "has_tanh" : 0, + "debug_info" : "tensor.141", + "name" : "tensor.141", + "has_prelu" : 0 + }, + { + "bottom" : "gather_60,24,11,10", + "weights" : { + + }, + "nd_mode" : true, + "axis" : 0, + "debug_info" : "concat_106", + "top" : "concat_106", + "type" : "general_concat", + "name" : "concat_106" + }, + { + "name" : "1460", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor.141,concat_106", + "debug_info" : "1460", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1460" + }, + { + "has_prelu" : 0, + "top" : "tensor", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 797, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "encoder_hidden_states", + "debug_info" : "tensor", + "has_tanh" : 0, + "blob_biases" : 795, + "name" : "tensor", + "nB" : 768 + }, + { + "name" : "1467", + "weights" : { + + }, + "dst_w" : 0, + "version" : 1, + "dst_n" : 0, + "type" : "reshape", + "dst_h" : 0, + "mode" : 0, + "bottom" : "tensor,concat_106", + "debug_info" : "1467", + "nd_mode" : 1, + "dst_seq" : 1, + "dst_k" : 0, + "top" : "1467" + }, + { + "axis_seq" : 4, + "name" : "transpose_61", + "axis_n" : 3, + "axis_h" : 0, + "type" : "transpose", + "attributes" : { + "basic_block_head" : 1 + }, + "bottom" : "1460", + "axis_w" : 2, + "axis_k" : 1, + "debug_info" : "transpose_61", + "weights" : { + + }, + "top" : "transpose_61" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1454", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_62", + "top" : "transpose_62", + "type" : "transpose", + "name" : "transpose_62" + }, + { + "bottom" : "transpose_62,transpose_61", + "weights" : { + + }, + "debug_info" : "input.369", + "top" : "input.369", + "type" : "batch_matmul", + "name" : "input.369", + "channel_mode" : false + }, + { + "bottom" : "input.369", + "weights" : { + + }, + "nd_axis" : -1, + "debug_info" : "input.371", + "top" : "input.371", + "type" : "softmax_nd", + "name" : "input.371" + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "1467", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_63", + "top" : "transpose_63", + "type" : "transpose", + "name" : "transpose_63" + }, + { + "bottom" : "input.371,transpose_63", + "weights" : { + + }, + "debug_info" : "attn_output.93", + "top" : "attn_output.93", + "type" : "batch_matmul", + "name" : "attn_output.93", + "channel_mode" : false + }, + { + "axis_h" : 2, + "axis_w" : 0, + "bottom" : "attn_output.93", + "axis_k" : 1, + "axis_n" : 3, + "axis_seq" : 4, + "weights" : { + + }, + "debug_info" : "transpose_60", + "top" : "transpose_60", + "type" : "transpose", + "name" : "transpose_60" + }, + { + "name" : "input.373", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "transpose_60", + "debug_info" : "input.373", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.373" + }, + { + "nB" : 768, + "top" : "input.375", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 801, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.373", + "blob_biases" : 799, + "has_tanh" : 0, + "debug_info" : "input.375", + "name" : "input.375", + "has_prelu" : 0 + }, + { + "bottom" : "input.367,input.375", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.377", + "nd_mode" : true, + "top" : "input.377", + "type" : "elementwise", + "name" : "input.377", + "beta" : 0 + }, + { + "name" : "input.379_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.377", + "debug_info" : "input.379_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input.379_reshape" + }, + { + "bottom" : "input.379_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input.379_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input.379_mvn", + "type" : "l2_normalize", + "name" : "input.379_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.379_scale", + "constant_blob" : 803, + "top" : "input.379_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input.379_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input.379_scale", + "constant_blob" : 805, + "top" : "input.379_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input.379_scale_constant_in_bias" + }, + { + "bottom" : "input.379_mvn,input.379_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.379_scale", + "top" : "input.379_scale_mul_out", + "type" : "elementwise", + "name" : "input.379_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input.379_scale_mul_out,input.379_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.379_scale", + "top" : "input.379_scale", + "type" : "elementwise", + "name" : "input.379_scale", + "beta" : 0 + }, + { + "name" : "input.379", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.379_scale", + "debug_info" : "input.379", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input.379" + }, + { + "nB" : 768, + "top" : "input.381", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 3072, + "blob_weights" : 809, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.379", + "blob_biases" : 807, + "has_tanh" : 0, + "debug_info" : "input.381", + "name" : "input.381", + "has_prelu" : 0 + }, + { + "bottom" : "input.381", + "weights" : { + + }, + "mode" : 19, + "debug_info" : "input.383", + "top" : "input.383", + "type" : "activation", + "name" : "input.383" + }, + { + "nB" : 3072, + "top" : "input.387", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 768, + "blob_weights" : 813, + "type" : "inner_product", + "has_relu" : 0, + "bottom" : "input.383", + "blob_biases" : 811, + "has_tanh" : 0, + "debug_info" : "input.387", + "name" : "input.387", + "has_prelu" : 0 + }, + { + "bottom" : "input.377,input.387", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input.389", + "top" : "input.389", + "type" : "elementwise", + "name" : "input.389", + "beta" : 0 + }, + { + "name" : "input_reshape", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : -1, + "dst_nd_rank" : 4, + "type" : "reshape", + "dst_h" : 1, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input.389", + "debug_info" : "input_reshape", + "dst_seq" : 1, + "dst_k" : 128, + "top" : "input_reshape" + }, + { + "bottom" : "input_reshape", + "weights" : { + + }, + "eps" : 9.9999997473787516e-06, + "debug_info" : "input_mvn", + "axis" : 1, + "normalization_mode" : 1, + "top" : "input_mvn", + "type" : "l2_normalize", + "name" : "input_mvn" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input_scale", + "constant_blob" : 815, + "top" : "input_scale_constant_in_scale", + "w" : 768, + "type" : "load_constant", + "name" : "input_scale_constant_in_scale" + }, + { + "bottom" : "", + "k" : 1, + "n" : 1, + "weights" : { + + }, + "h" : 1, + "debug_info" : "input_scale", + "constant_blob" : 817, + "top" : "input_scale_constant_in_bias", + "w" : 768, + "type" : "load_constant", + "name" : "input_scale_constant_in_bias" + }, + { + "bottom" : "input_mvn,input_scale_constant_in_scale", + "alpha" : 1, + "operation" : 1, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input_scale", + "top" : "input_scale_mul_out", + "type" : "elementwise", + "name" : "input_scale_mul_out", + "beta" : 0 + }, + { + "bottom" : "input_scale_mul_out,input_scale_constant_in_bias", + "alpha" : 1, + "operation" : 0, + "weights" : { + + }, + "fused_relu" : 0, + "debug_info" : "input_scale", + "top" : "input_scale", + "type" : "elementwise", + "name" : "input_scale", + "beta" : 0 + }, + { + "name" : "input", + "weights" : { + + }, + "dst_w" : 768, + "version" : 1, + "dst_n" : 1, + "dst_nd_rank" : 3, + "type" : "reshape", + "dst_h" : 128, + "mode" : 0, + "dynamic_shape" : false, + "bottom" : "input_scale", + "debug_info" : "input", + "dst_seq" : 1, + "dst_k" : -1, + "top" : "input" + }, + { + "has_prelu" : 0, + "top" : "logits", + "has_biases" : 1, + "weights" : { + + }, + "nC" : 51865, + "blob_weights" : 821, + "type" : "inner_product", + "has_relu" : 0, + "attributes" : { + "is_output" : 1 + }, + "bottom" : "input", + "debug_info" : "1502", + "has_tanh" : 0, + "blob_biases" : 819, + "name" : "1502", + "nB" : 768 + } + ], + "storage" : "model.espresso.weights", + "metadata_in_weights" : [ + + ], + "analyses" : { + + }, + "properties" : { + + }, + "format_version" : 300, + "shape" : { + "layer_shapes" : { + "causal_mask.3" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "hidden_states.55_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "positions" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.221" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_136" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.115_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.219_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.35_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.95_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.299" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_7" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_22" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_output.85" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "tensor" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "causal_mask.7" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "concat_40_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "10" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "11" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_37" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.347" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_22_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_47" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.31" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.315_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.145" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.73" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.25" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_142" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.241" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.73" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.45_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.137" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.65_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.71_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.105_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_52_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.55_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_34_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_42" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_52" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_1_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_146" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "1395_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_output.61" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "transpose_94" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "attn_output.89" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_57" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.367" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_101" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "concat_67" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.75" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.319" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.93" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.165" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.5_scale" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "concat_85_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.117" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.45" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "hidden_states.35_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.123_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.261" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "1282" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_67_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.115" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.77" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_103" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.213" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.105" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1333" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_49_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_105" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "tensor.5" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "causal_mask.25_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.115_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.81_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.31_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_87" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "transpose_152" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_14" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.35_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "attn_output.65" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.387" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_72" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.51_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_29" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.339" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_109" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.379_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.185" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "tensor.65" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.1_scale" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.137" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_4_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.281" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "tensor.17" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_156" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "gather_59_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1286" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "input.233" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_111" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "attn_output.53" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "gather_60_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1337" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_1" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "1052" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "hidden_states.55_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_42_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.51" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1350" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "causal_mask.15_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1103" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_24_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_92" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.1_mvn" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.123_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_34" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_65" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "attn_output.69" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "attn_output.41" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_115" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "gather_49" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.37" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.359" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.75_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.157" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.85" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1403" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "transpose_162" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.251_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.37" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.109" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_7" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.25_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.253" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.127" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.25_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "attn_output.57" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.111_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.205" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_119" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "78_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_40_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.301" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.81_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_7_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.81_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_22_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_54" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_166" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.31_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_121" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "concat_16" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.379" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "attn_output.45" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.123_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_79" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1058" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.177" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.187_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.57" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "1109" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "gather_49_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_103_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.95_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.105" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.273" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.29" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.59" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_125" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_97" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.89" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.91_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "attn_output.33" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_50_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.51_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_32_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_82" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_14_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "576_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1409" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.15_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_129" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "attn_output.49" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.21" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.197" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "concat_94_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.149" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "693_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.77" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "342_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.293" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.37" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.21_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_131" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "input.67" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.29" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.245" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.139" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.37" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "transpose_75" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.341" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.21_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "122" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.101_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.71_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "inputs_embeds" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_60" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "input.27_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_56" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "82" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "459_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_135" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "108_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "84" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "input.219_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "attn_output.25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_57_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "85" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "input.169" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.97" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.15" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_39_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "87" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "350" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "input.75" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.49" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "524" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "88" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 4, + "h" : 128 + }, + "1044_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.45" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_40_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.15_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.265" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_139" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "gather_47_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_2" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.55" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.217" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "225_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_22_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_output.1" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "attn_output.13" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "824" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.91_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.117" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "129" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.59_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "real_div_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_61" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.111_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.313" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "229" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_68" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "input.361" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.219_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "concat_13" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.95_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "1446_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_141" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "180" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "concat_76" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.111" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "280" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "752" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.29" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "attn_output.5" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.25_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "causal_mask.13_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "cast_100" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "356" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "480" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.189" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.7" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "580" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.53" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_58_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.83" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.23" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.69" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "concat_76_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.15_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_145" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.285" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_92" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "input.237" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_100" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.25_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "284" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.381" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "attn_output.17" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_output.9" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_9" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.81" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.333" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "causal_mask.23_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "584" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "758" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "882" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "982" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.131" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.283_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_149" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "input.347_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.11" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.91_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_104" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "attn_weights.25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.31" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1460" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.31" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.85_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.61" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.91" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1099" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_29_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.85_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.89" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_85" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "gather_55_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.35_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_151" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "gather_37_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_30_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "986" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.251_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.315_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.129" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "transpose_70" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "gather_12_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.61" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.209" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_108" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.123_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "1399" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.85_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.305" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "485_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.91_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_155" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "decoder_input_ids" : { + "k" : 1, + "w" : 128, + "n" : 1, + "_rank" : 2, + "h" : 1 + }, + "input.151" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.141" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "1165" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "encoder_hidden_states" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.103" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "causal_mask.9_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.5_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.31" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_110" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "1216" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_94_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_78" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "input.315_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.91_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_63" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_159" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "251_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.107" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.277" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.91_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.61_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.7" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_31_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_114" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "input.229" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.55_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.11_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.315_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.11_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_13_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.373" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.21_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.325" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "gather_15" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_161" : { + "k" : 12, + "w" : 64, + "n" : 1, + "_rank" : 4, + "h" : 128 + }, + "input.347_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "concat_25" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.171" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1095_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_88" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "_neg_y_causal_mask.3" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "input.123" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "368_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.51" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1169" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "1182" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_118" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1467" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "logits" : { + "k" : 11, + "w" : 51865, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_2" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1233" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "gather_45_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_19_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1304_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_27_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_165" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.35_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_20_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.25_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.297" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_120" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "hidden_states.81_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.347_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.5_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "393_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.249" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "gather_20" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "134_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.345" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "gather_35" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.1" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.27_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.45_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.191" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_76_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "cache_position" : { + "k" : 1, + "w" : 128, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.111" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.143" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.71" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.155_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_124" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "concat_58_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_95" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "tensor.23" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.9" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_80" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.105_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.105_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.45_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.59_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.119" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.269" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_128" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "gather_40" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.67" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.85" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.1_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "276_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_55" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.365" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_65" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_88" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.317" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.59_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.91" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_130" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.163" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.131" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "hidden_states.35" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.115" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.43" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_73" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.347_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.91_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.379_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.251_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.75_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_35_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.211" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.1" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_17_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_10_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "causal_mask.1" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "transpose_134" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_60" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_70" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_12" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_22" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_85" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_94_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.5" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.337" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_27" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.71_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "159_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.183" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "causal_mask.5" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "input.283_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_66" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "transpose_138" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "tensor.63" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.155_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.135" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.15" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "attn_weights.13" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.61" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.231" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_31_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.39" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.69" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.91_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "causal_mask.11_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.99" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_140" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "concat_13_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.41_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "causal_mask.9" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.251_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_9_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_32" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.11" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.41_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.357" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "gather_47" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "810_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.309" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.65_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_144" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_90" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.155" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.83" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "causal_mask.21_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "causal_mask" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.379_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.15_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.35" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.107" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_25_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.251" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "401" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.123_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.47" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.17" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.203" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.55_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.77" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.347_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.71_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "927_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "gather_4" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_148" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "shape_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "701" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "tensor.109" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.11_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.65_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_98" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_103" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "gather_52" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_103_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_76_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.187_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.377" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "concat_58_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_83" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "transpose_150" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "gather_19" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.329" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.121" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_29" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.175" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "233" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "407" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.127" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "531" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "tensor.55" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_107" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "631" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_weights.1" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.25" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.271" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.55" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.91_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.41_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.85" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.223" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "831" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "707" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.65_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_154" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "931" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.11_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.91_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "band_part_0" : { + "k" : 1, + "w" : 129, + "n" : 1, + "_rank" : 2, + "h" : 128 + }, + "1292" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_76" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.379_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_24" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "163" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "635" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_34" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1343" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.283_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_97" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.349" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_39" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "363" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "239" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "concat_49" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.115_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.195" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_158" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "463" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "935" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "input.219_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_61" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.147" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_113" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "tensor.75" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "290" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.123_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "attn_weights.19" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.65_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_15_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.63" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.93" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.27" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.291" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.101_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.243" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "167" : { + "k" : 128, + "w" : 64, + "n" : 1, + "_rank" : 4, + "h" : 12 + }, + "transpose_160" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "590" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_69" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "467" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "1048" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "765" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.155_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.21_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_117" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "865" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "attn_weights.7" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.155_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_44" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.55" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1278_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.369" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_59" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_164" : { + "k" : 128, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "hidden_states.81_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "causal_mask.19_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.133" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.167" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.11" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.95" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "input.41" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_31_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.71" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.65" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.119" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.47" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "397" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "869" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_13_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "297" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "hidden_states.61_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "992" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.263" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1161_shape" : { + "k" : 1, + "w" : 3, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.215" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "597" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "953_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "602_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "697" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.9" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "1299" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.15" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_93" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "transpose_123" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "input.311" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_11" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "causal_mask.7_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_74" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1065" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "1116" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.389" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "tensor.111" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.75_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.111_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.115_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "999" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.187" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_127" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.67" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.139" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_4" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.5_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.61_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.283" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.19" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.75_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.1_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.235" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.85_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.91" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.61_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_86" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "1416" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.71_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_5" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.331" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "836_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.15_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "concat_31" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_71" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_94" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.105_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.115" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.41" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.59_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.45_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.101_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_133" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.187_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "861_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attention_mask" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.159" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.71_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.87" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_2_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "510_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.39" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_79" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "hidden_states.115_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.101_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.255" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.31_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.207" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_4_values0_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_64" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.5_mvn" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.351" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.95_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_137" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "hidden_states.15_mvn" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.303" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "978_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.45_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.51_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.251_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.155_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "719_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "627_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.101" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.379_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.123" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.179" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.5_reshape" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.55_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "24" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.379_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "tensor.59" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "744_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.3" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.251_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.275" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.227" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_85_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.101" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.371" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.41_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_143" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_67_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.111_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_5_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.323" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1450" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_49_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.121" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "concat_38" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.101" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.81_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.21_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_147" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.199" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.1_reshape" : { + "k" : 128, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.85_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_96" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "transpose_102" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "tensor.79" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1421_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.41_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.11_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.91_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.295" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_81" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.247" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.343" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1454" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_43" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_weights.43" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_106" : { + "k" : 1500, + "w" : 1, + "n" : 1, + "h" : 768 + }, + "input.141" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "1220" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_58" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.1" : { + "k" : 1, + "w" : 128, + "n" : 1, + "_rank" : 2, + "h" : 1 + }, + "tensor.21" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "transpose_89" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_153" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.101_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "concat_4_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "causal_mask.27_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.135" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.61_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.27_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.59_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "tensor.99" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.283_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_74" : { + "k" : 128, + "w" : 11, + "n" : 1, + "h" : 768 + }, + "hidden_states.95_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.19" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.95" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.49" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.79" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "hidden_states.45_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.51_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.35_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.267" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "causal_mask.21" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.187_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.59_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "1187_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "1329_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_106" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.219" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.21_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_157" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.363" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_112" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "hidden_states.45" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.27_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.315" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.111_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "hidden_states.31_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_40_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.85_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.113" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.41" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "causal_mask.23" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "1212_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.31_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_22_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "tensor.113" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_101" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.1_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_67" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.219_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.95_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.283_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "causal_mask.17_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.27" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_116" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.57" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.87" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.187_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "112" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "1175" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.187_mvn" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.287" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.51_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "causal_mask.25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "causal_mask.11" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "1226" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.239" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "transpose_163" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 128 + }, + "gather_10" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.105_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "concat_20" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.27_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "gather_54_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.383" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.101_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_83" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.335" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_25" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.111_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.27_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.181" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.115_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.283_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "116" : { + "k" : 128, + "w" : 64, + "n" : 1, + "_rank" : 4, + "h" : 12 + }, + "tensor.61" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.71" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "414" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.133" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "causal_mask.27" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "514" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "causal_mask.13" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "tensor.13" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.11_scale_mul_out" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_167" : { + "k" : 12, + "w" : 64, + "n" : 1, + "_rank" : 4, + "h" : 128 + }, + "input.219_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_91" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "714" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_122" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "input.35" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "causal_mask_begin_0" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.95" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "814" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_85_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.21" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "concat_67_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "641" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "hidden_states.75_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.347_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.259" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "causal_mask.15" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "gather_30" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "concat_49_values1_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "518" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "concat_40" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "hidden_states.61_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.41_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "decoder.embed_tokens.weight" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 2, + "h" : 51865 + }, + "1070_shape" : { + "k" : 1, + "w" : 4, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "input.315_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "concat_4_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "941" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "input.355" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_45" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "246" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_126" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "transpose_99" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "attn_weights.49" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.307" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "346" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "818" : { + "k" : 128, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 12 + }, + "input.153" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "tensor.81" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "concat_103_values2_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "173" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "hidden_states.51_scale" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "transpose_84" : { + "k" : 128, + "w" : 768, + "n" : 1, + "h" : 11 + }, + "input.105" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.33" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 1500 + }, + "causal_mask.17" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.31_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "attn_output.93" : { + "k" : 1500, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "input.13" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.43" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.155_reshape" : { + "k" : 128, + "w" : 768, + "n" : 11, + "_rank" : 4, + "h" : 1 + }, + "input.73" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.125" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.201" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "473" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "hidden_states.105_reshape" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "648" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "hidden_states.75_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.91_scale_constant_in_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "748" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.279" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "gather_50" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "causal_mask.19" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 4, + "h" : 1 + }, + "hidden_states.25_scale" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "gather_44_indices_0" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "attn_output.81" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "948" : { + "k" : 11, + "w" : 768, + "n" : 1, + "h" : 128 + }, + "transpose_132" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 4, + "h" : 1500 + }, + "input.375" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.327" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "gather_17" : { + "k" : 1, + "w" : 1, + "n" : 1, + "_rank" : 1, + "h" : 1 + }, + "transpose_77" : { + "k" : 12, + "w" : 64, + "n" : 11, + "_rank" : 4, + "h" : 128 + }, + "input.173" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "875" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1500 + }, + "input.315_scale_constant_in_bias" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.3" : { + "k" : 1, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "hidden_states.65_scale_mul_out" : { + "k" : 1, + "w" : 768, + "n" : 1, + "h" : 1 + }, + "input.125" : { + "k" : 11, + "w" : 3072, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "tensor.53" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "input.21" : { + "k" : 11, + "w" : 768, + "n" : 1, + "_rank" : 3, + "h" : 128 + }, + "transpose_62" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.51" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "tensor.103" : { + "k" : 768, + "w" : 1, + "n" : 1, + "h" : 1 + }, + "input.81" : { + "k" : 1, + "w" : 1, + "n" : 1, + "h" : 1 + } + }, + "global_shapes" : { + + } + }, + "subnetworks" : { + + } +} \ No newline at end of file